<?php

require_once("naivebayes.php");

/************************************************************
NaiveBayesでの分類をurl別にキャッシュし
重い分類処理の高速化を狙う
*************************************************************/

class NaiveBayesCache
{
  var $_engine = false;
  var $con;
  var $lock;
  
  function  __construct()
  {
    $lockpath = readConfig("LOCKDIR") . "bayses_cache.lock";
    $this->lock = fopen($lockpath, 'w');
    flock($this->lock, LOCK_EX) or die("can't lock bayses_cache.lock");
    
    $this->con = new SQLite3(readConfig("METADIR") .'/bayes_cache.sqlite');

    $this->con->query(
      "create table if not exists bayescache  ".
      "(id INTEGER PRIMARY KEY AUTOINCREMENT,".
      " url text UNIQUE,".
      " category text) ");
    $this->con->query(
      "create index if not exists urlidx on bayescache(url)");
    $this->con->exec("pragma synchronous=off;");
    $this->con->exec("pragma journal_mode=memory;");
  }
  
  function _get_cache($url) {
    $stmt = $this->con->prepare(
      "select category from bayescache ".
      " where url=:url ");
    $stmt->bindValue(":url",  $url, SQLITE3_TEXT );
    $cursor = $stmt->execute();
    
    $row = $cursor->fetchArray(SQLITE3_NUM);
    if($row){
      return $row[0];
    }else{
      return false;
    }
  }
  
  function _put_cache($url, $category)
  {
    $stmt = $this->con->prepare(
      "select category from bayescache ".
      " where url=:url ");
    $stmt->bindValue(":url",  $url, SQLITE3_TEXT );
    $cursor = $stmt->execute();
    
    $arr = $cursor->fetchArray(SQLITE3_NUM);
    
    $this->con->exec("BEGIN IMMEDIATE;");
    if($arr) {
      //insert
      $stmt = $this->con->prepare(
        "update bayescache ".
        "   set category = :cat".
        " where url=:url ");
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->bindValue(":url",  $url, SQLITE3_TEXT );
      $stmt->execute();
    }else{
      //insert
      $stmt = $this->con->prepare(
        "insert into bayescache ".
        " (url,category)".
        " values(:url,:cat) ");
      $stmt->bindValue(":url",  $url, SQLITE3_TEXT );
      $stmt->bindValue(":cat",  $category, SQLITE3_TEXT );
      $stmt->execute();
    }
    $this->con->exec("COMMIT;");
  }
  
  function optimize($live_urls) {
    $deletes = array();
    
    //生存リストにないurlエントリを削除
    $cur = $this->con->query("select url from bayescache");
    while($arr = $cur->fetchArray(SQLITE3_NUM))
    {
      $url = $arr[0];
      if(!in_array($url, $live_urls)){
        $deletes[] = $url;
      }
    }
    $stmt = $this->con->prepare(
      "delete from bayescache ".
      " where url =:url ");
    foreach($deletes as $url){
      $stmt->bindValue(":url", $url, SQLITE3_TEXT );
      $stmt->execute();
    }
    
    $this->con->exec("VACUUM");
  }
  
  function engine() {
    if(!$this->_engine){
      $this->_engine = new NaiveBayes();
    }
    return  $this->_engine;
  }
  
  function close()
  {
    if($this->_engine){
      $this->_engine->close();
      $this->_engine = false;
    }
    flock($this->lock, LOCK_UN);
    fclose($this->lock);
  }
  
  function train($category, $sentence, $url)
  {
    $bayes = $this->engine();
    $bayes->train($category, $sentence);
    //再学習
    $category = $bayes->classifier_list($sentence);
    $this->_put_cache($url, serialize( $category ) );
  }
  
  function classifier($sentence, $url, $force=false)
  {
    $cache = $this->_get_cache($url);
    if(!$force && $cache){
      return unserialize($cache);
    }else{
      $bayes = $this->engine();
      $category = $bayes->classifier_list($sentence);
      $this->_put_cache($url, serialize( $category ) );
      return $category;
    }
  }
};

//---- main ------
/*

$bayes = new FutaBayes();

//学習
$bayes->train("悪魔のリドル",
 "悪魔のリドルスレ定時っていうんだぜ コミックス３巻&ＮＴ発売中！  13話ネタバレはお控え願います トーク部分はオッケーです",
 "http://may.2chan.net/b/res/301188074.htm");
$bayes->train("悪魔のリドル",
 "悪魔のリドルスレ定時ボンバーガール コミックス３巻&NT発売中！  13話ネタバレはお控え願います トーク部分はオッケーです！",
 "http://may.2chan.net/b/res/301018982.htm");
$bayes->train("グラップラーバキ",
 "宮元武蔵さんってこのエテ公にやられたんじゃなかったっけ",
 "http://may.2chan.net/b/res/301201971.htm");

//調査
echo $bayes->classifier(
 "悪魔のリドル",
 "http://may.2chan.net/b/res/301018982.htm") . "\n";
echo $bayes->classifier(
 "宮元武蔵",
 "anyany") . "\n";
//*/

