
// 郵便番号データは
// http://www.post.japanpost.jp/zipcode/download.html
// から download して、
// $ nkf -Sw -Lu として、utf-8, 半角カナ -> 全角カナ 変換して使う。
//
// http://localhost:28017/test/zipcodes/?limit=-5 とすれば、最初の5 件を確認できる。
//

package mongodb

import scala.io.Source
import com.mongodb._
import com.osinka.mongodb._
import com.osinka.mongodb.Preamble._

object importZip {

    val FILE_PATH = "./data/zipcode/1.csv"
    val DB_PORT = 27017
    val DB_NAME = "test"
    val COLLECTION_NAME = "zipcodes"

    // "foo" なら '"' をはがして foo を返す
    def strip(line:String) = {
        if (line.length < 2) line
        else if ((line.charAt(0) == '\"') && (line.charAt(line.length - 1) == '\"')) line.substring(1, line.length -1)
        else line
    }

    def main(args: Array[String]) {

        val stat=System.nanoTime()
        action()
        val end = System.nanoTime
        println("time(s):" + 1.0 * (end - stat) /(1000 * 1000 * 1000) )
    }

    def action():Unit = {

        val mongo = new Mongo("localhost", DB_PORT).getDB(DB_NAME)
        val dbColl = mongo.getCollection(COLLECTION_NAME)

        // データを削除
        dbColl.drop
        val zipcodes = dbColl.asScala

        var count = 0
        var record = Array[String]()

        Source.fromFile(FILE_PATH).getLines.foreach { line =>
            val a = line.trim.split(",")

            // 複数行分割されている場合に対応
            if (a.size == 15) {
                record = a
            } else {
                println(line)
                println(a.size)
                record ++= a
            }
            if (record.size >= 15) {
                val zip = Map(
                    "jis" -> strip(record(0)),
                      "zip_old" -> strip(record(1)),
                      "zip" -> strip(record(2)),
                      "addr1_kana" -> strip(record(3)),
                      "addr2_kana" -> strip(record(4)),
                      "addr3_kana" -> strip(record(5)),
                      "addr1" -> strip(record(6)),
                      "addr2" -> strip(record(7)),
                      "addr3" -> strip(record(8)),
                      "c1" -> Integer.parseInt(record(9)),
                      "c2" -> Integer.parseInt(record(10)),
                      "c3" -> Integer.parseInt(record(11)),
                      "c4" -> Integer.parseInt(record(12)),
                      "c5" -> Integer.parseInt(record(13)),
                      "c6" -> Integer.parseInt(record(14))
                )
                //println(zip.toString)
                zipcodes << zip

                record = Array[String]()
                count += 1
                if (count % 10000 == 0) println("done ... " + count )
            }
        }
        println("count=" + count)
    }
}

