<?php

require_once("tiny_segmenter.php");

/*
  sqlite3を使った
  ベイジアンフィルタの実装
*/
class NaiveBayes
{

  /// sqlite3コネクション
  var $con;
  var $lock;
  
  function  __construct()
  {
    $lockpath = readConfig("LOCKDIR") . "bayses.lock";
    $this->lock = fopen($lockpath, 'w');
    flock($this->lock, LOCK_EX) or die("can't lock bayses.lock");
  
    $this->con = new SQLite3(readConfig("METADIR") . 'bayes.sqlite');

    /* カテゴリで登場した単語の累計
       wordcount
        category   カテゴリ
        word       単語
        count      単語が登場した回数
    */
    $this->con->query(
      "create table if not exists wordcount  ".
      " (category text, word text, count integer, PRIMARY KEY(category,word)) ");
    
    $this->con->exec(
      "create index if not exists rwords on wordcount (word,category) ");
    
    $this->con->query(
      "create table if not exists wordcountall  ".
      " (word text, count integer, PRIMARY KEY(word)) ");

    /* カテゴリの累計
       catcount
        category   カテゴリ
        count      カテゴリが登場した回数
    */
    $this->con->query(
      "create table if not exists catcount   ".
      " (category text, count integer, PRIMARY KEY(category)) ");
    
    /* カテゴリ推定用のワークテーブル */
    $this->con->query(
      "create table if not exists classifier_work ".
      " (id INTEGER PRIMARY KEY AUTOINCREMENT, word text) "
    );
    $this->con->exec("pragma synchronous=off;");
    $this->con->exec("pragma journal_mode=memory;");
    
  }

  /// リソースの開放
  function close(){
    $this->con->close();
    flock($this->lock, LOCK_UN);
    fclose($this->lock);
  }

  /// 文字列を単語別にリストに分解する
  function getwords($sentence){
    $sentence = str_replace("<br>"," ",$sentence);
    $sentence = str_replace("\r"," ",$sentence);
    $sentence = str_replace("\n"," ",$sentence);
    
    if(function_exists ('mecab_split')){
      return ( mecab_split($sentence) ) ;
    }else{
      $spliter = new TinySegmenterarray();
      return ( $spliter->segment($sentence,'UTF-8') );
    }
    
  }

//-----------------------------------------------
//train (学習用)

  /// wordcount.countをインクリメント
  function _wordcountup($category, $word){
    $stmt = $this->con->prepare(
      "select count from wordcount ".
      " where category=:cat ".
      "   and word    =:word ");
    $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
    $stmt->bindValue(":word", $word, SQLITE3_TEXT );
    
    $result = $stmt->execute();
    $arr = $result->fetchArray(SQLITE3_NUM);
    
    if($arr) {
      //update
      $count = $arr[0];
      
      //insert
      $stmt = $this->con->prepare(
        "update wordcount ".
        "   set count =(count+1)".
        " where category=:cat ".
        "   and word    =:word ");
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->bindValue(":word", $word, SQLITE3_TEXT );
      $stmt->execute();
    }else{
      //insert
      $stmt = $this->con->prepare(
        "insert into wordcount ".
        " (category,word,count)".
        " values(:cat,:word,1) ");
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->bindValue(":word", $word, SQLITE3_TEXT );
      $stmt->execute();
    }
    
    //-------------------------------------
    $stmt = $this->con->prepare(
      "select count from wordcountall ".
      " where word    =:word ");
    $stmt->bindValue(":word", $word, SQLITE3_TEXT );
    
    $result = $stmt->execute();
    $arr = $result->fetchArray(SQLITE3_NUM);
    
    if($arr) {
      //update
      $count = $arr[0];
      
      //insert
      $stmt = $this->con->prepare(
        "update wordcountall ".
        "   set count =(count+1)".
        " where word    =:word ");
      $stmt->bindValue(":word", $word, SQLITE3_TEXT );
      $stmt->execute();
    }else{
      //insert
      $stmt = $this->con->prepare(
        "insert into wordcountall ".
        " (word,count)".
        " values(:word,1) ");
      $stmt->bindValue(":word", $word, SQLITE3_TEXT );
      $stmt->execute();
    }
  }
  
  /// catcount.count をインクリメント
  function _catcountup($category){
    
    $stmt = $this->con->prepare(
      "select count from catcount ".
      " where category=:cat ");
    $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
    
    $result = $stmt->execute();
    $arr = $result->fetchArray(SQLITE3_NUM);
    
    if($arr) {
      //update
      $count = $arr[0];
      
      //insert
      $stmt = $this->con->prepare(
        "update catcount ".
        "   set count =(count+1)".
        " where category=:cat ");
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->execute();
    }else{
      //insert
      $stmt = $this->con->prepare(
        "insert into catcount ".
        " (category,count)".
        " values(:cat,1) ");
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->execute();
    }
  }
  /// 学習する
  function train($category, $sentence ){
    $words = $this->getwords($sentence) ;
    
    //start transaction
    $this->con->exec("BEGIN DEFERRED;");
    
    $this->_catcountup($category);
    foreach($words as $word){
      $this->_wordcountup($category, $word);
    }
    
    $this->con->exec("COMMIT;");
  }

//-----------------------------------------------
//classifier (カテゴリ推定用)
  
  function _add_work($words){
    $stmt = $this->con->prepare(
      "insert into classifier_work ".
      " (word) values(:word) ");
    
    foreach($words as $word) {
      $stmt->bindValue(":word",  $word, SQLITE3_TEXT );
      $stmt->execute();
    }
  }
  
  /* カテゴリ推定
     $sentence 検索対象の文字列
     $limit    候補の数
  */
  function classifier_list($sentence, $limit=5){
    $words = $this->getwords($sentence) ;
    
    $result = array();
    
    $this->con->exec("BEGIN DEFERRED;");
    $this->_add_work( $words );
    
    $stmt = $this->con->prepare(
      " select ".
      "   MT.category category,".
      "   sum(MT.score*MT.score) score".
      " from(select ".
      "     wc.category category,".
      "     max(1.0*wc.count/vocab.count-0.1,0)*10/9 score".
      "   from  classifier_work work ".
      "   inner join wordcount wc".
      "      on wc.word    = work.word ".
      "   inner join wordcountall vocab".
      "      on vocab.word = work.word) MT".
      " group by MT.category".
      " order by score DESC".
      " limit :limit"
    );
    $stmt->bindValue(":limit",  $limit, SQLITE3_INTEGER );
    $cur = $stmt->execute();
    
    
    while($arr = $cur->fetchArray(SQLITE3_ASSOC)){
      $result[] = $arr;
    }
    
    $this->con->query("delete from classifier_work");
    $this->con->exec("ROLLBACK;");
    
    return $result;
  }
  
  function classifier($sentence)
  {
    $cat = $this->classifier_list($sentence,1);
    if(count($cat)>0){
      return $cat[0]["category"];
    }else{
      return "";
    }
  }
}

//----main----
/*
$con = new NaiveBayes();

//学習
$con->train("figure","フィギュアスレ深夜");
$con->train("pucho","どうして土曜日も虹裏ですか？ 他にすることはないのですか？");


//カテゴリ分類
var_dump($con->classifier_list("どうして土曜日も虹裏ですか"));

$con->close();
//*/


