/*
    docfbe.cpp    June 6, 2004.

    Copyright (C) 2003-2004 CFbE Research Group,
    Software Engineering Laboratory,
    Graduate School of Information Science,
    Nara Institute of Science and Technology,
    All rights reserved.

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, (at your option) or
    any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with GNU Emacs; see the file COPYING.  If not, write to the
    Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.
*/
//---------------------------------------------------------------------------
#pragma hdrstop
//---------------------------------------------------------------------------
#include <Classes.hpp>
#include <SysUtils.hpp>
#include <stdio.h>

#include "EDocfbeError.h"
#include "TCFbEToolkit.h"
#include "TCFbECommandLineParser.h"
#include "TCFbEDataLoader.h"
#include "TCFbEOutputCash.h"
#include "TCFbEOutputFormatter.h"
#include "TCFbECsvOutputFormatter.h"
#include "TCFbERecommendationOutputFormatter.h"

#include "TCFbEDataSetEx.h"
#include "TCFbESimilarityComputationAlgorithm.h"
#include "TCFbEPredictionAlgorithm.h"
//---------------------------------------------------------------------------
#pragma argsused
//---------------------------------------------------------------------------
// wv
AnsiString helpStr
    //            0         1         2         3         4         5         6         7
    //            01234567890123456789012345678901234567890123456789012345678901234567890123456789
    = AnsiString("USAGE:                                                                         \n")
    + AnsiString("  docfbe [OPTIONS] -l=FILENAMES -e=FILENAMES -s=ALGORITHM -p=ALGORITHM           \n")
    + AnsiString("\n")
    + AnsiString("where,                                                                         \n")
    + AnsiString("  -l, --learning=FILENAMES   Learning Data filename separeted by commna.       \n")
    + AnsiString("  -e, --estimating=FILENAMES Estimating Data filenames separated by comma.     \n")
    + AnsiString("  -s, --similarity=ALGORITHM Similarity computation algorithm.                 \n")
    + AnsiString("  -p, --prediction=ALGORITHM Prediction algorithm.                             \n")
    + AnsiString("\n")
    + AnsiString("OPTIONS are specified as:                                                      \n")
    + AnsiString("  -h, --help                 Display this help and exit.                       \n")
    + AnsiString("  -n, --normalize=METHOD     Method to normalize values. METHOD is specified as\n")
    + AnsiString("                             value | normalize | standardize | order           \n")
    + AnsiString("  -i, --itembased            Apply item-based collaborative filtering.         \n")
    + AnsiString("  -ns,--neighbors-size=SIZE  Neighbors size; i.e., how much similar rows will  \n")
    + AnsiString("                             be used for estimating.                           \n")
    + AnsiString("  -d, --default=VALUE        Default value is put instead of missing values.   \n")
    + AnsiString("  -if,--inverse-frequency    Inverse Case Frequency is applied.                \n")
    + AnsiString("  -ca,--case-amplifier=VALUE Case Amplification is applied.                    \n")
    + AnsiString("  -t, --target=LABEL[,LABEL] Target columns to estimate. Unspecified denotes   \n")
    + AnsiString("                             all unknown columns will be estimated. Or \"*\"   \n")
    + AnsiString("                             indicates all columns will be estimated.          \n")
    + AnsiString("  -m, --merge=BASEDATA       Base dataset in merging data.  BASEDATA is        \n")
    + AnsiString("                             specified as: none | both | learning | estimating \n")
    + AnsiString("  -dn,--disp-neighbors       Display neighbors with the results.               \n")
    + AnsiString("  -ds,--disp-similarities    Display neighbors' similarities with the results. \n")
    + AnsiString("  -dv,--disp-values          Display neighbors' values with the results.       \n")
    + AnsiString("  -dd,--disp-distribution    Display distribution of the similarities.         \n")
    + AnsiString("  -r, --recommendation       Output as recommendation form.                    \n")
    + AnsiString("  -o, --output=FILENAME      File name for outputting the results.             \n")
    + AnsiString("\n")
    + AnsiString("Similarity computation algorithm is one of the following terms:                \n")
    + AnsiString("  CosineSimilarity                                                             \n")
    + AnsiString("  AdjustedCosineSimilarityWithAverage                                          \n")
    + AnsiString("  AdjustedCosineSimilarityWithMedian                                           \n")
    + AnsiString("  CorrelationCoefficientWithAverage\                                           \n")
    + AnsiString("  CorrelationCoefficientWithMedian                                             \n")
    + AnsiString("  RankCorrelation                                                              \n")
    + AnsiString("  DistanceSimilarityWithAverage                                                \n")
    + AnsiString("  DistanceSimilarityWithMedian                                                 \n")
    + AnsiString("\n")
    + AnsiString("Prediction algorithm is one of the following terms:                            \n")
    + AnsiString("  WeightedSum                                                                  \n")
    + AnsiString("  AdjustedWeightedSumWithAverageOfColumn                                       \n")
    + AnsiString("  AdjustedWeightedSumWithMedianOfColumn                                        \n")
    + AnsiString("  AdjustedWeightedSumWithAverageOfNeighbors                                    \n")
    + AnsiString("  AdjustedWeightedSumWithMedianOfNeighbors                                     \n")
    + AnsiString("  AdjustedWeightedSumWithAverageOfRow                                          \n")
    + AnsiString("  AdjustedWeightedSumWithMedianOfRow                                           \n")
    + AnsiString("  AmplifiedWeightedSumWithAveragedMultiplier                                   \n")
    + AnsiString("  AmplifiedWeightedSumWithMedianOfMultiplier                                   \n")
    + AnsiString("\n")
    + AnsiString("EXAMPLES:                                                                      \n")
    + AnsiString("  docfbe -l=learning.csv -e=estimating.csv -s=CosineSimilarity -p=WeightedSum  \n")
    + AnsiString("\n")
    + AnsiString("  docfbe --learning=learning.csv --estimating=estimating.csv --similarity=Cosin\n")
    + AnsiString("  eSimilarity --prediction=WeightedSum                                         \n")
    + AnsiString("\n")
    + AnsiString("  docfbe -n=normalize -i -ns=10 -t=BUG,EFFORT -dn -ds -dd -o=output.txt -l=lear\n")
    + AnsiString("  ning.csv -e=estimating.csv -s=CosineSimilarity -p=WeightedSum                \n")
    + AnsiString("\n")
    + AnsiString("  docfbe --normalize=normalize --itembased --neighbors-size=10 --target=BUG,EFF\n")
    + AnsiString("  ORT --disp-neighbors --disp-similarities --disp-distribution --output=output.\n")
    + AnsiString("  txt --learning=learning.csv --estimating=estimating.csv --similarity=CosineS\n")
    + AnsiString("  imilarity --prediction=WeightedSum                                           \n");

//---------------------------------------------------------------------------
// t@C̃o[W擾֐
AnsiString __fastcall GetExeVersion(AnsiString FileName)
{
    DWORD dwSize;
    DWORD dwReserved;
    LPVOID lpBuffer;
    AnsiString version;

    dwSize = ::GetFileVersionInfoSize(FileName.c_str(), &dwReserved);
    lpBuffer = ::HeapAlloc(::GetProcessHeap(), HEAP_ZERO_MEMORY, dwSize );
    if( lpBuffer && ::GetFileVersionInfo(Application->ExeName.c_str(), 0, dwSize, lpBuffer )) {
        LPVOID lpStr;
        UINT dwLength;

        ::VerQueryValue( lpBuffer, "\\StringFileInfo\\041103A4\\FileVersion", &lpStr, &dwLength );
        version = (LPTSTR)lpStr;
    }
    if(lpBuffer){
        ::HeapFree( ::GetProcessHeap(), 0, lpBuffer );
    }
    return version;
}

//---------------------------------------------------------------------------
// Rs[CgԂ֐
AnsiString __fastcall GetCopyrightStr(void) {
    return "Collaborative Filtering based Estimation (CFbE) Program " + GetExeVersion(Application->ExeName) + "\n"
        + "Copyright (C) 2003-2004 CFbE Research Group,\n"
        + "Software Engineering Laboratory,\n"
        + "Graduate School of Information Science,\n"
        + "Nara Institute of Science and Technology, All rights reserved.\n";
}

//---------------------------------------------------------------------------
//     Usage
//        docfbe.exe help
//         docfbe.exe (params) (options)
//     (params) ɂ͈ȉ̃IvVw肷iK{j
//         training=<[jOf[^̃t@C>
//        test=<eXgf[^̃t@C>
//        target=<\ΏۃgNX̃x>
//        nsize=<Neighborhood Size>
//        similarity=<ގxvZASY>
//        prediction=<\lvZASY>
//
//    (options)
//        nlabel=<gNXx>
//        nsims
//        sdstrb
//        output=<o̓t@C>     output w肳ĂꍇCʂ͓Yt@C
//                              łȂꍇCʂ͕Wo͂ɏo͂D
//
//    R}hC̗
//        >docfbe.exe training=learningdata_sample.csv test=testdata_sample.csv target="oO" nsize=10 similarity=NormalizedValueBasedCosineSimilarity prediction=SimpleWeightedSum nlabel=t@C nsims sdstrb output=test.csv
//
int main(int argc, char* argv[])
{
    // Rs[Cg\
    fprintf(stderr, "\n======================================================================\n");
    fprintf(stderr, "%s", GetCopyrightStr());
    fprintf(stderr, "======================================================================\n");
            
    try {         
        //------------------------------------------------------------
        // R}hC߂
        TCFbECommandLineParser* Parser;
        try {
            Parser = new TCFbECommandLineParser(argc, argv);
        } catch (Exception& e) {
            throw EDocfbeError(-13, "Commandline Parsing", e.Message);
        }

        if (Parser->ErrorCode != 0) {    // G[
            throw EDocfbeError(Parser->ErrorCode, Parser->ErrorParameter, "");
        }

        if (Parser->HelpEnabled) {    // wv\
            fprintf(stderr, "%s", helpStr.c_str());

            // I
            TCFbEToolkit::DeleteToolkit();

            fprintf(stderr, "======================================================================\n");
            return 0;
        }

        fprintf(stderr, "CF based Estimation is started.\n\n");
        
        //------------------------------------------------------------
        // f[^
        TCFbEDataLoader*    DataLoader;
        try {
            DataLoader = new TCFbEDataLoader(Parser, true);
        } catch (Exception &e) {
            delete Parser;
            throw EDocfbeError(-13, "Data Loading", e.Message);
        }

        if (DataLoader->ErrorCode != 0) {    // G[
            throw EDocfbeError(DataLoader->ErrorCode, DataLoader->ErrorParameter, "");
        }

        //------------------------------------------------------------
        // tB^OsCΏۃgNX\

        TCFbEOutputCash*    OutputCash;
        try {
            OutputCash = new TCFbEOutputCash(Parser, DataLoader, true);
        } catch (Exception &e) {
            delete Parser;
            delete DataLoader;
            throw EDocfbeError(-13, "Estimating", e.Message);
        }           

        //------------------------------------------------------------
        // \ʂt@CCႵ́CWo͂ɏo
        TCFbEOutputFormatter*    Formatter;
        try {
            if (Parser->Recommendation) {
                Formatter = new TCFbERecommendationOutputFormatter(Parser, OutputCash, true);
            } else {
                Formatter = new TCFbECsvOutputFormatter(Parser, OutputCash, true);
            }            
        } catch (Exception &e) {
            delete OutputCash;
            delete Parser;
            delete DataLoader;
            throw EDocfbeError(-13, "Output Formatting", e.Message);
        }


        if (Parser->OutputFileName.IsEmpty()) {    // Wo͂ɏo
            fprintf(stdout, "%s\n", Formatter->GetOutputString().c_str());

        } else {    // t@Cɕۑ
            try {
                Formatter->SaveOutputStringTo(Parser->OutputFileName);
            } catch (EFCreateError &efce) {
                throw EDocfbeError(-12, Parser->OutputFileName, efce.Message);
            }
        }

        delete Formatter;
        delete OutputCash;
        delete Parser;
        delete DataLoader;

        fprintf(stderr, "CF based Estimation is successfully completed.\n");

    } catch (EDocfbeError &ede) {    // ُIO󂯎 catch
        fprintf(stderr, "\n\nCF based Estimation terminated unsuccessfully with the following error.");
        fprintf(stderr, "\n\nError Code %d %s\n\n", ede.ErrorCode, ede.Message);
        fprintf(stderr, "======================================================================\n");
        return ede.ErrorCode;
        
    } catch (...) {
        throw;
    }

    // I
    TCFbEToolkit::DeleteToolkit();

    fprintf(stderr, "======================================================================\n");
    
    return 0;
}

//---------------------------------------------------------------------------
