package jp.ac.osaka_u.ist.sel.similarity.register;

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Set;
import java.util.regex.Pattern;

import jp.ac.osaka_u.ist.sel.similarity.commons.Settings;
import jp.ac.osaka_u.ist.sel.similarity.commons.SupportedLanguage;
import jp.ac.osaka_u.ist.sel.similarity.database.ColumnName;
import jp.ac.osaka_u.ist.sel.similarity.register.database.FileRegisterDAOManager;
import jp.ac.osaka_u.ist.sel.similarity.register.database.InsertionDomainDAO;
import jp.ac.osaka_u.ist.sel.similarity.register.database.InsertionFileDAO;

import org.apache.log4j.Logger;

/**
 * Register information of each file into the database.
 * 
 * @author ysk-ssk
 *
 */
public class FileInfoRegister {
    protected final Logger _log = Logger.getLogger("FileInfoRegister");
    protected String _domainName = null;
    protected InsertionDomainDAO _domainDAO;
    protected InsertionFileDAO _fileDAO;
    
    private Pattern _ignoreFileRegularExpression;

    protected FileInfoRegister() {
        defineIgnoreTarget();
    }
    
    /**
     * 
     * @param daoManager
     */
    public FileInfoRegister(FileRegisterDAOManager daoManager) {
        _domainDAO = daoManager.getDomainDAO();
        _fileDAO = daoManager.getFileDAO();
        defineIgnoreTarget();
    }
    
    private void defineIgnoreTarget() {
        // make the ignore file regular expression
        String ignoreRegix = "";
        int i = 0;
        for (String ngWord : Settings.getInstance().getNGWords()) {
            if (i != 0) {
                ignoreRegix += "|";
            }
            ignoreRegix += "(.*" + ngWord + ".*)";
            i++;
        }
        _ignoreFileRegularExpression = Pattern.compile(ignoreRegix,Pattern.CASE_INSENSITIVE);
    }
    
    public boolean setDomainName(String domainName) throws IOException {
        assert(domainName != null);
        if (domainName.length() > ColumnName.getColumnLength(ColumnName.NAME)) {
            return false;
        }
        _domainName = domainName;
        return true;
    }

    public void registerFileSet(Set<String> filePathSet) throws SQLException {
        registerNewDomain();
        for (String filePath : filePathSet) {
            if (!(new File(filePath)).canRead()) {
                System.err.println("Invalid path: " + filePath);
                continue;
            }
            String ext = SupportedLanguage.getFileExtension(filePath);
            registerFile(new File(filePath),ext);
        }
    }

    protected void registerNewDomain() throws SQLException {
        assert(_domainName != null);
        int domainId = -1;
        domainId = _domainDAO.getId(_domainName);
        if (domainId <= 0 ) {
            if (!_domainDAO.insert(_domainName)) {
                _log.error("Can not insert:\t" + _domainName);
            }
        }
    }

    protected void registerFile(File file, String ext) throws SQLException {
        String filePath = InsertionFileDAO.getPath(file);
        if (filePath == null) {
            _log.error("Can not get the path.\t" + _domainName + "\t:" + file);
            return;
        }
        if (_ignoreFileRegularExpression.matcher(filePath).matches()) {
            if (_log.isDebugEnabled()) {
                _log.debug("NG:\t" + filePath);
            }
            return;
        }
        if (filePath.length() > ColumnName.getColumnLength(ColumnName.PATH)) {
            if (_log.isDebugEnabled()) {
                _log.debug("too long path:\t" + filePath);
            }
            return;
        }
        long fileSize = file.length();
        //long fileModified = file.lastModified();
        if (!_fileDAO.insert(filePath, ext, fileSize, _domainDAO.getId(_domainName))) {
            _log.info("Can not insert:\t" + filePath);
        }
    }

}
