package jp.ac.osaka_u.ist.sel.similarity.hash.tokenizer;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.SQLException;

import jp.ac.osaka_u.ist.sel.similarity.commons.Settings;
import jp.ac.osaka_u.ist.sel.similarity.commons.StringValidator;
import jp.ac.osaka_u.ist.sel.similarity.database.ColumnName;
import jp.ac.osaka_u.ist.sel.similarity.hash.database.InsertionTokenInfoDAO;
import jp.ac.osaka_u.ist.sel.similarity.hash.database.SelectionFileDAO;
import jp.ac.osaka_u.ist.sel.similarity.hash.parser.TokenStream;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

/**
 * Parse source code files (remove comments and normalize),
 * and preserve parse result files in the tokenDir.
 * 
 * @author ysk-ssk
 *
 */
public abstract class TokenInfoRegister {
    private final Logger _log = Logger.getLogger("TokenInfoRegister");
    private final String _tokenDir;//current path for preservation
    private final InsertionTokenInfoDAO _tokenDAO;

    public TokenInfoRegister(InsertionTokenInfoDAO tokenDAO) {
        super();
        _tokenDir = Settings.getInstance().getWorkingPath();
        _tokenDAO = tokenDAO;
    }

    /**
     * Make a new preservation dir path.
     * @param path
     * @throws IOException 
     */
    private void makeDirToPreserve(String path) throws IOException {
        File parent = (new File(path)).getParentFile();
        if (parent.getParentFile() != null && !parent.getParentFile().exists()) {
            // make parent of parent
            makeDirToPreserve(SelectionFileDAO.getPath(parent));
        }
        // make parent
        if (!parent.mkdir() && !parent.exists()) {
            _log.error("Can not make a directory. " + parent);
            return;
        }
        if (_log.isDebugEnabled()) {
            _log.debug("mkdir: " + parent);
        }
    }

    protected abstract TokenStream getTokenStream(Reader reader);

    /**
     * parse and preserve the tokenized source code
     * @param fileId
     * @param sourcePath
     * @throws IOException
     * @throws NoSuchAlgorithmException 
     * @throws SQLException
     */
    public byte[] calculateHash(int fileId, String sourcePath) throws IOException, NoSuchAlgorithmException, SQLException {
        // reconstruct file path
        String tokenPath = null;
        if (_tokenDir != null) {
            tokenPath = _tokenDir + "/" + fileId;
        }
        // begin check
        File sourceFile = new File(sourcePath);
        if (!sourceFile.exists()) {
            _log.error("No such file:\t" + sourcePath);
            return null;
        }
        // get token reader
        FileReader fileReader = new FileReader(sourceFile);
        TokenStream readStream = this.getTokenStream(fileReader);
        // make dir to preserve
        if (tokenPath != null) {
            makeDirToPreserve(tokenPath);
        }
        // write
        FileOutputStream fWriter = null;
        OutputStreamWriter bWriter = null;
        int count = 0;
        byte[] digest;
        try {
            // if dump mode, output a parsed source code.
            if (tokenPath != null) {
                fWriter = new FileOutputStream(tokenPath);
                bWriter = new OutputStreamWriter(fWriter,StringValidator._UTF_8);
            }

            MessageDigest md = MessageDigest.getInstance("MD5");
            String token = null;
            try {
                while ((token = readStream.next()) != null) {
                    token += '\n';
                    byte[] tokenByte = token.getBytes();
                    md.update(tokenByte, 0, tokenByte.length);
                    if (bWriter != null) {
                        bWriter.write(token);
                    }
                    count++;
                }
            } catch (Throwable e) {
                _log.error("Parse error:\t" + sourcePath);
                _log.error(readStream.getLine() + "," + readStream.getColumn() + " before '" + token + "'");
                _log.error(e.getMessage());
                //e.printStackTrace();
                return null;
            }
            digest = md.digest();
        } finally {
            if (bWriter != null) {
                bWriter.close();
            }
            if (fWriter != null) {
                fWriter.close();
            }
            if (fileReader != null) {
                fileReader.close();
            }
        }
        if (_log.isDebugEnabled()) {
            if (tokenPath != null) {
                _log.debug("token:\t" + tokenPath);
            }
        }
        // return null if (current number of tokens) < (minimum number of tokens)
        if (count < Settings.getInstance().getMinNOT()) {
            return null;
        }
        // register the file into the database
        if (tokenPath != null) {
            registerToken(fileId,tokenPath,count);
        }
        return digest;
    }

    private void registerToken(int fileId,String tokenPath, int tokenCount) throws SQLException {
        if (tokenPath.length() > ColumnName.getColumnLength(ColumnName.PATH)) {
            _log.error("too long path:\t" + tokenPath);
            return;
        }
        File file = new File(tokenPath);
        long fileSize = file.length();
        if (!_tokenDAO.insert(fileId, tokenPath,tokenCount,fileSize)) {
            _log.error("Can not register:\t" + tokenPath);
            return;
        }
        if (_log.isDebugEnabled()) {
            _log.debug("register:\t" + tokenPath);
        }
    }

}
