/*
    TCFbEDataLoader.cpp    June 6, 2004.

    Copyright (C) 2003-2004 CFbE Research Group,
    Software Engineering Laboratory,
    Graduate School of Information Science,
    Nara Institute of Science and Technology,
    All rights reserved.

    This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2, (at your option) or
    any later version.

    This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with GNU Emacs; see the file COPYING.  If not, write to the
    Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.
*/
//---------------------------------------------------------------------------
#pragma hdrstop

#include "TCFbEDataLoader.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
//---------------------------------------------------------------------------
// RXgN^
__fastcall TCFbEDataLoader::TCFbEDataLoader(TCFbECommandLineParser* Parser, bool IsVerbose)
{
    // g[jOf[^̏
    if (IsVerbose) {
        fprintf(stderr, "(1/4) Reading the following learning data.\n");
    }

    this->LearningData = new TCFbEDataSet();

    for (int k = 0; k < Parser->LearningDataFileNameList->Count; k++) {
        TFileName    CurrentFileName = Parser->LearningDataFileNameList->Strings[k];

        if (IsVerbose) {
            fprintf(stderr, "  %s\n", CurrentFileName.c_str());
        }

        TStringList* LearningDataStringList = new TStringList();

        try {
            LearningDataStringList->LoadFromFile(CurrentFileName);
        } catch (EFOpenError &efoe) {
            delete this->LearningData;
            delete LearningDataStringList;

            this->ErrorCode = -7;
            this->ErrorParameter = CurrentFileName;
            return;
        }

        TCFbEDataSet*    BufferDataSet;
        try {
            BufferDataSet = new TCFbEDataSet(LearningDataStringList);
        } __finally {
            delete LearningDataStringList;
        }
        this->LearningData->FillColumns(BufferDataSet);
        for (int i = 0; i < BufferDataSet->NumberOfRows; i++) {
            this->LearningData->UpdateRow(BufferDataSet, i);
        }
        delete BufferDataSet;
    }

    if (IsVerbose) {
        fprintf(stderr, "    ..... completed: %d column(s) and %d row(s).\n\n", LearningData->NumberOfColumns, LearningData->NumberOfRows);
    }

    //------------------------------------------------------------
    // eXgf[^̏
    if (this->CompareStringList(Parser->LearningDataFileNameList, Parser->EstimatingDataFileNameList)) {    // g[jOf[^ƃeXgf[^t@C
        if (IsVerbose) {
            fprintf(stderr, "(2/4) Estimating data is equal to learning data.\n\n");
        }
        EstimatingData = LearningData;

    } else {    // g[jOf[^ƃeXgf[^Ⴄt@C
        if (IsVerbose) {
            fprintf(stderr, "(2/4) Reading the following estimating data.\n");
        }

        this->EstimatingData = new TCFbEDataSet();

        for (int k = 0; k < Parser->EstimatingDataFileNameList->Count; k++) {
            TFileName    CurrentFileName = Parser->EstimatingDataFileNameList->Strings[k];

            if (IsVerbose) {
                fprintf(stderr, "  %s\n", CurrentFileName.c_str());
            }

            TStringList* EstimatingDataStringList = new TStringList();

            try {
                EstimatingDataStringList->LoadFromFile(CurrentFileName);
            } catch (EFOpenError& efoe) {
                delete this->EstimatingData;
                delete this->LearningData;
                delete EstimatingDataStringList;

                this->ErrorCode = -8;
                this->ErrorParameter = CurrentFileName;
                return;
            }

            TCFbEDataSet*    BufferDataSet;
            try {
                BufferDataSet = new TCFbEDataSet(EstimatingDataStringList);
            } __finally {
                delete EstimatingDataStringList;
            }
            this->EstimatingData->FillColumns(BufferDataSet);
            for (int i = 0; i< BufferDataSet->NumberOfRows; i++) {
                this->EstimatingData->UpdateRow(BufferDataSet, i);
            }
            delete BufferDataSet;
        }

        if (IsVerbose) {
            fprintf(stderr, "    ..... completed: %d column(s) and %d row(s).\n\n", EstimatingData->NumberOfColumns, EstimatingData->NumberOfRows);
        }

        // eXgf[^ƃg[jOf[^̐`FbN
        if (!EstimatingData->IsConsistentWith(LearningData)) {
            if (Parser->MergeBase == tmbNone) {
                delete LearningData;
                delete EstimatingData;

                this->ErrorCode = -9;
                this->ErrorParameter = Parser->LearningDataFileNameList->Text.Trim() + "\" and \"" + Parser->EstimatingDataFileNameList->Text.Trim();
                return;

            // f[^}[W
            } else if (Parser->MergeBase == tmbLearningData) {
                this->AdoptData(this->LearningData, this->EstimatingData);
                if (IsVerbose) {
                    fprintf(stderr, "    ..... Estimating Data are adapted to Learning Data.\n\n");
                }

            } else if (Parser->MergeBase == tmbEstimatingData) {
                this->AdoptData(this->EstimatingData, this->LearningData);
                if (IsVerbose) {
                    fprintf(stderr, "    ..... Learning Data are adapted to Estimating Data.\n\n");
                }

            } else {
                this->LearningData->FillColumns(this->EstimatingData);
                this->EstimatingData->FillColumns(this->LearningData);
                if (IsVerbose) {
                    fprintf(stderr, "    ..... Data are merged each other: %d column(s) and %d row(s).\n\n", EstimatingData->NumberOfColumns, EstimatingData->NumberOfRows);
                }
            }
        }
    }

    // g[jOf[^̗\Ώۂ̃gNX̏󋵂𒲂ׂ
    if (Parser->TargetColumnLabelList->Text.Trim() != "*") {
        for (int j = 0; j < Parser->TargetColumnLabelList->Count; j++) {
            TTargetColumnType TargetColumnType = this->TargetColumnCheck(Parser->TargetColumnLabelList->Strings[j], Parser->NeighborsSize);

            try {
                if (TargetColumnType == tmtEmpty) {    // \̌ɂȂf[^݂Ȃ
                    this->ErrorCode = -10;
                    this->ErrorParameter = Parser->TargetColumnLabelList->Strings[j];
                    return;

                } else if (TargetColumnType == tmtTooSparsity) {    // \̌ɂȂf[^Ȃ
                    this->ErrorCode = -11;
                    this->ErrorParameter = Parser->TargetColumnLabelList->Strings[j];
                    return;
                }

            } catch (...) {
                delete this->EstimatingData;
                if (this->EstimatingData != this->LearningData) {
                    delete this->LearningData;
                }
                throw;
            }
        }
    }

    return;
}

//---------------------------------------------------------------------------
__fastcall TCFbEDataLoader::~TCFbEDataLoader()
{
    delete this->EstimatingData;
    if (this->EstimatingData != this->LearningData) {
        delete this->LearningData;
    }
}

//---------------------------------------------------------------------------
// ȉCprotected \bh̒`
//---------------------------------------------------------------------------
// \ΏۃgNXǂ̂悤ȃf[^ł邩Ԃ
TTargetColumnType __fastcall TCFbEDataLoader::TargetColumnCheck(AnsiString TargetColumnLabel, const int NeighborsSize)
{
    if (this->LearningData->NumberOfRows == 0) {
        return tmtEmpty;
    }

    int    TargetColumnIndex = this->LearningData->ColumnLabelList->IndexOf(TargetColumnLabel);
    if (TargetColumnIndex == -1) {
        return tmtEmpty;
    }

    int    NumberOfEnableData = 0;

    for (int i = 0; i < this->LearningData->NumberOfRows; i++) {
        if (this->LearningData->EnabledByIndex[i][TargetColumnIndex]) {
            NumberOfEnableData++;
        }
    }

    if (NumberOfEnableData == 0) {
        return tmtEmpty;
    }

    if (NeighborsSize > 0) {
        if (NumberOfEnableData < NeighborsSize) {
            return tmtTooSparsity;
        }
    } else {
        if (NumberOfEnableData < 2) {
            return tmtTooSparsity;
        }
    }

    return tmtPredictable;
}

//---------------------------------------------------------------------------
// AdoptedDataSet  BaseDataSet ɍ킹ēK
void __fastcall TCFbEDataLoader::AdoptData(TCFbEDataSet* BaseDataSet, TCFbEDataSet*& AdoptedDataSet)
{
    TCFbEDataSet*    NewDataSet = new TCFbEDataSet();

    for (int j = 0; j < BaseDataSet->NumberOfColumns; j++) {
        NewDataSet->AddColumn(BaseDataSet->ColumnLabelList->Strings[j]);
    }

    for (int i = 0; i < AdoptedDataSet->NumberOfRows; i++) {
        NewDataSet->UpdateRow(AdoptedDataSet, i);
    }

    delete AdoptedDataSet;
    AdoptedDataSet = NewDataSet;
}

//---------------------------------------------------------------------------
// StringList0  StringList1 r
// StringList0  StringList1 Ȃ true
// StringList0  StringList1 قȂȂ false Ԃ
bool __fastcall TCFbEDataLoader::CompareStringList(TStrings* StringList0, TStrings* StringList1)
{
    if (StringList0->Count != StringList1->Count) {
        return false;
    }
    for (int k = 0; k < StringList0->Count; k++) {
        if (StringList1->IndexOf(StringList0->Strings[k]) < 0) {
            return false;
        }
    }
    return true;
}

//---------------------------------------------------------------------------

