/*
 * Canonicalizer.scala
 *
 * To change this template, choose Tools | Template Manager
 * and open the template in the editor.
 */

package ixbrl

import java.io.ByteArrayInputStream

import javax.xml.parsers.DocumentBuilder
import javax.xml.parsers.DocumentBuilderFactory
import org.w3c.dom.Document

import org.apache.xml.security.c14n._

object Canonicalizer {

    org.apache.xml.security.Init.init()

    def main(args:Array[String]) : Unit = {

        val input = "" +
        "<!DOCTYPE doc [<!ATTLIST e9 attr CDATA \"default\">]>\n" +
        "<!-- Comment 2 --><doc>\n" +
        "   <e1   />\n" +
        "   <e2   ></e2>\n" +
        "   <e3    name = \"elem3\"   id=\"elem3\"    />\n" +
        "   <e4    name=\"elem4\"   id=\"elem4\"    ></e4>\n" +
        "   <e5 a:attr=\"out\" b:attr=\"sorted\" attr2=\"all\" attr=\"I'm\"\n" +
        "       xmlns:b=\"http://www.ietf.org\"\n" +
        "       xmlns:a=\"http://www.w3.org\"\n" +
        "       xmlns=\"http://example.org\"/>\n" +
        "   <e6 xmlns=\"\" xmlns:a=\"http://www.w3.org\">\n" +
        "       <e7 xmlns=\"http://www.ietf.org\">\n" +
        "           <e8 xmlns=\"\" xmlns:a=\"http://www.w3.org\">\n" +
        "               <e9 xmlns=\"\" xmlns:a=\"http://www.ietf.org\"/>\n" +
        "               <text>&#169;</text>\n" +
        "           </e8>\n" +
        "       </e7>\n" +
        "   </e6>\n" +
        "</doc><!-- Comment 3 -->\n"

        Console.println(canonicalize(input))
    }

    def canonicalize(input:String):String = {
        val dfactory = DocumentBuilderFactory.newInstance()

        dfactory.setNamespaceAware(true)
        dfactory.setValidating(true)

        val documentBuilder = dfactory.newDocumentBuilder()

        // this is to throw away all validation warnings
        documentBuilder.setErrorHandler(new org.apache.xml.security.utils
                                        .IgnoreAllErrorHandler())

        val inputBytes = input.getBytes()
        val doc = documentBuilder.parse(new ByteArrayInputStream(inputBytes))

        // after playing around, we have our document now
        val c14n = org.apache.xml.security.c14n.Canonicalizer.getInstance(
            "http://www.w3.org/TR/2001/REC-xml-c14n-20010315#WithComments")
        val outputBytes = c14n.canonicalizeSubtree(doc)

        new String(outputBytes)
    }
}
