/*
 * This software is distributed under following license based on modified BSD
 * style license.
 * ----------------------------------------------------------------------
 * 
 * Copyright 2009 The Nimbus2 Project. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer. 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE NIMBUS PROJECT ``AS IS'' AND ANY EXPRESS
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
 * NO EVENT SHALL THE NIMBUS PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * The views and conclusions contained in the software and documentation are
 * those of the authors and should not be interpreted as representing official
 * policies, either expressed or implied, of the Nimbus2 Project.
 */
package jp.ossc.nimbus.util.converter;

import java.io.*;
import java.util.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.w3c.dom.*;
import org.xml.sax.*;

import org.cyberneko.html.parsers.DOMParser;

/**
 * DOMHTMLRo[^B<p>
 * NekoHTMLgpB<br>
 * 
 * @author M.Takata
 */
public class DOMHTMLConverter implements StreamStringConverter, Serializable{
    
    private static final long serialVersionUID = -6085930913740530834L;
    
    /**
     * DOMHTML\ϊʒ萔B<p>
     */
    public static final int DOM_TO_HTML = OBJECT_TO_STREAM;
    
    /**
     * HTMLDOM\ϊʒ萔B<p>
     */
    public static final int HTML_TO_DOM = STREAM_TO_OBJECT;
    
    public static final Map<String, String> IANA2JAVA_ENCODING_MAP = new HashMap<String, String>();
    
    static{
        IANA2JAVA_ENCODING_MAP.put("ISO8859_1", "ISO-8859-1");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_2", "ISO-8859-2");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_3", "ISO-8859-3");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_4", "ISO-8859-4");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_5", "ISO-8859-5");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_6", "ISO-8859-6");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_7", "ISO-8859-7");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_8", "ISO-8859-8");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_9", "ISO-8859-9");
        IANA2JAVA_ENCODING_MAP.put("ISO8859_15", "ISO-8859-15");
        IANA2JAVA_ENCODING_MAP.put("Big5", "BIG5");
        IANA2JAVA_ENCODING_MAP.put("CP037", "EBCDIC-CP-US");
        IANA2JAVA_ENCODING_MAP.put("CP273", "IBM273");
        IANA2JAVA_ENCODING_MAP.put("CP277", "EBCDIC-CP-DK");
        IANA2JAVA_ENCODING_MAP.put("CP278", "EBCDIC-CP-FI");
        IANA2JAVA_ENCODING_MAP.put("CP280", "EBCDIC-CP-IT");
        IANA2JAVA_ENCODING_MAP.put("CP284", "EBCDIC-CP-ES");
        IANA2JAVA_ENCODING_MAP.put("CP285", "EBCDIC-CP-GB");
        IANA2JAVA_ENCODING_MAP.put("CP290", "EBCDIC-JP-KANA");
        IANA2JAVA_ENCODING_MAP.put("CP297", "EBCDIC-CP-FR");
        IANA2JAVA_ENCODING_MAP.put("CP420", "EBCDIC-CP-AR1");
        IANA2JAVA_ENCODING_MAP.put("CP424", "EBCDIC-CP-HE");
        IANA2JAVA_ENCODING_MAP.put("CP437", "IBM437");
        IANA2JAVA_ENCODING_MAP.put("CP500", "EBCDIC-CP-CH");
        IANA2JAVA_ENCODING_MAP.put("CP775", "IBM775");
        IANA2JAVA_ENCODING_MAP.put("CP850", "IBM850");
        IANA2JAVA_ENCODING_MAP.put("CP852", "IBM852");
        IANA2JAVA_ENCODING_MAP.put("CP855", "IBM855");
        IANA2JAVA_ENCODING_MAP.put("CP857", "IBM857");
        IANA2JAVA_ENCODING_MAP.put("CP858", "IBM00858");
        IANA2JAVA_ENCODING_MAP.put("CP860", "IBM860");
        IANA2JAVA_ENCODING_MAP.put("CP861", "IBM861");
        IANA2JAVA_ENCODING_MAP.put("CP862", "IBM862");
        IANA2JAVA_ENCODING_MAP.put("CP863", "IBM863");
        IANA2JAVA_ENCODING_MAP.put("CP864", "IBM864");
        IANA2JAVA_ENCODING_MAP.put("CP865", "IBM865");
        IANA2JAVA_ENCODING_MAP.put("CP866", "IBM866");
        IANA2JAVA_ENCODING_MAP.put("CP868", "IBM868");
        IANA2JAVA_ENCODING_MAP.put("CP869", "IBM869");
        IANA2JAVA_ENCODING_MAP.put("CP870", "EBCDIC-CP-ROECE");
        IANA2JAVA_ENCODING_MAP.put("CP871", "EBCDIC-CP-IS");
        IANA2JAVA_ENCODING_MAP.put("CP918", "EBCDIC-CP-AR2");
        IANA2JAVA_ENCODING_MAP.put("CP924", "IBM00924");
        IANA2JAVA_ENCODING_MAP.put("CP1026", "IBM1026");
        IANA2JAVA_ENCODING_MAP.put("Cp01140", "IBM01140");
        IANA2JAVA_ENCODING_MAP.put("Cp01141", "IBM01141");
        IANA2JAVA_ENCODING_MAP.put("Cp01142", "IBM01142");
        IANA2JAVA_ENCODING_MAP.put("Cp01143", "IBM01143");
        IANA2JAVA_ENCODING_MAP.put("Cp01144", "IBM01144");
        IANA2JAVA_ENCODING_MAP.put("Cp01145", "IBM01145");
        IANA2JAVA_ENCODING_MAP.put("Cp01146", "IBM01146");
        IANA2JAVA_ENCODING_MAP.put("Cp01147", "IBM01147");
        IANA2JAVA_ENCODING_MAP.put("Cp01148", "IBM01148");
        IANA2JAVA_ENCODING_MAP.put("Cp01149", "IBM01149");
        IANA2JAVA_ENCODING_MAP.put("EUCJIS", "EUC-JP");
        IANA2JAVA_ENCODING_MAP.put("GB2312", "GB2312");
        IANA2JAVA_ENCODING_MAP.put("ISO2022KR", "ISO-2022-KR");
        IANA2JAVA_ENCODING_MAP.put("ISO2022CN", "ISO-2022-CN");
        IANA2JAVA_ENCODING_MAP.put("JIS", "ISO-2022-JP");
        IANA2JAVA_ENCODING_MAP.put("KOI8_R", "KOI8-R");
        IANA2JAVA_ENCODING_MAP.put("KSC5601", "EUC-KR");
        IANA2JAVA_ENCODING_MAP.put("GB18030", "GB18030");
        IANA2JAVA_ENCODING_MAP.put("GBK", "GBK");
        IANA2JAVA_ENCODING_MAP.put("SJIS", "SHIFT_JIS");
        IANA2JAVA_ENCODING_MAP.put("MS932", "WINDOWS-31J");
        IANA2JAVA_ENCODING_MAP.put("UTF8", "UTF-8");
        IANA2JAVA_ENCODING_MAP.put("Unicode", "UTF-16");
        IANA2JAVA_ENCODING_MAP.put("UnicodeBig", "UTF-16BE");
        IANA2JAVA_ENCODING_MAP.put("UnicodeLittle", "UTF-16LE");
        IANA2JAVA_ENCODING_MAP.put("JIS0201", "X0201");
        IANA2JAVA_ENCODING_MAP.put("JIS0208", "X0208");
        IANA2JAVA_ENCODING_MAP.put("JIS0212", "ISO-IR-159");
        IANA2JAVA_ENCODING_MAP.put("CP1047", "IBM1047");
     }
    
    /**
     * ϊʁB<p>
     */
    protected int convertType;
    
    /**
     * DOMHTMLϊɎgp镶GR[fBOB<p>
     */
    protected String characterEncodingToStream;
    
    /**
     * HTMLDOMϊɎgp镶GR[fBOB<p>
     */
    protected String characterEncodingToObject;
    
    /**
     * DOMHTMLϊɎgpXSLt@C̃pXB<p>
     */
    protected String xslFilePath;
    
    /**
     * DOM̃p[X𓯊Iɍsǂ̃tOB<p>
     * ftHǵAfalseŁAȂB<br>
     */
    protected boolean isSynchronizedDomParse;
    
    /**
     * DOMHTMLϊsRo[^𐶐B<p>
     */
    public DOMHTMLConverter(){
        this(DOM_TO_HTML);
    }
    
    /**
     * w肳ꂽϊʂ̃Ro[^𐶐B<p>
     *
     * @param type ϊ
     * @see #DOM_TO_HTML
     * @see #HTML_TO_DOM
     */
    public DOMHTMLConverter(int type){
        convertType = type;
    }
    
    /**
     * ϊʂݒ肷B<p>
     *
     * @param type ϊ
     * @see #getConvertType()
     * @see #DOM_TO_HTML
     * @see #HTML_TO_DOM
     */
    @Override
    public void setConvertType(int type){
        convertType = type;
    }
    
    /**
     * ϊʂ擾B<p>
     *
     * @return ϊ
     * @see #setConvertType(int)
     */
    public int getConvertType(){
        return convertType;
    }
    
    /**
     * DOMHTMLϊɎgp镶GR[fBOݒ肷B<p>
     * 
     * @param encoding GR[fBO
     */
    @Override
    public void setCharacterEncodingToStream(String encoding){
        characterEncodingToStream = encoding;
    }
    
    /**
     * DOMHTMLϊɎgp镶GR[fBO擾B<p>
     * 
     * @return GR[fBO
     */
    public String getCharacterEncodingToStream(){
        return characterEncodingToStream;
    }
    
    /**
     * HTMLDOMϊɎgp镶GR[fBOݒ肷B<p>
     * 
     * @param encoding GR[fBO
     */
    @Override
    public void setCharacterEncodingToObject(String encoding){
        characterEncodingToObject = encoding;
    }
    
    /**
     * HTMLDOMϊɎgp镶GR[fBO擾B<p>
     * 
     * @return GR[fBO
     */
    public String getCharacterEncodingToObject(){
        return characterEncodingToObject;
    }
    
    /**
     * DOMHTMLϊɎgpXSLt@C̃pXݒ肷B<p>
     *
     * @param path XSLt@C̃pX
     */
    public void setXSLFilePath(String path){
        xslFilePath = path;
    }
    
    /**
     * DOMHTMLϊɎgpXSLt@C̃pX擾B<p>
     *
     * @return XSLt@C̃pX
     */
    public String getXSLFilePath(){
        return xslFilePath;
    }
    
    /**
     * DOM̃p[X𓯊Iɍsǂݒ肷B<p>
     * ftHǵAfalseŁAȂB<br>
     * 
     * @param isSync ꍇ́Atrue
     */
    public void setSynchronizedDomParse(boolean isSync){
        isSynchronizedDomParse = isSync;
    }
    
    /**
     * DOM̃p[X𓯊Iɍsǂ𔻒肷B<p>
     * 
     * @return truȅꍇA
     */
    public boolean isSynchronizedDomParse(){
        return isSynchronizedDomParse;
    }
    
    /**
     * w肳ꂽIuWFNgϊB<p>
     *
     * @param obj ϊΏۂ̃IuWFNg
     * @return ϊ̃IuWFNg
     * @exception ConvertException ϊɎsꍇ
     */
    @Override
    public Object convert(Object obj) throws ConvertException{
        if(obj == null){
            return null;
        }
        switch(convertType){
        case DOM_TO_HTML:
            return convertToStream(obj);
        case HTML_TO_DOM:
            if(obj instanceof File){
                return toDOM((File)obj);
            }else if(obj instanceof InputStream){
                return toDOM((InputStream)obj);
            }else{
                throw new ConvertException(
                    "Invalid input type : " + obj.getClass()
                );
            }
        default:
            throw new ConvertException(
                "Invalid convert type : " + convertType
            );
        }
    }
    
    /**
     * {@link Document}HTMLXg[ɕϊB<p>
     *
     * @param obj DOM
     * @return HTMLXg[
     * @exception ConvertException ϊɎsꍇ
     */
    @Override
    public InputStream convertToStream(Object obj) throws ConvertException{
        if(obj instanceof Document){
            return toHTML((Document)obj);
        }else{
            throw new ConvertException(
                "Invalid input type : " + obj.getClass()
            );
        }
    }
    
    /**
     * HTMLXg[{@link Document}ɕϊB<p>
     *
     * @param is HTMLXg[
     * @return DOM
     * @exception ConvertException ϊɎsꍇ
     */
    @Override
    public Object convertToObject(InputStream is) throws ConvertException{
        return toDOM(is);
    }
    
    protected Document toDOM(InputStream is) throws ConvertException{
        DOMParser parser = new DOMParser();
        try{
            final InputSource inputSource = new InputSource(is);
            if(characterEncodingToObject != null){
                String encoding = (String)IANA2JAVA_ENCODING_MAP
                    .get(characterEncodingToObject);
                if(encoding == null){
                    encoding = characterEncodingToObject;
                }
                inputSource.setEncoding(encoding);
            }
            if(isSynchronizedDomParse){
                final Object lock = parser.getClass();
                synchronized(lock){
                    parser.parse(inputSource);
                }
            }else{
                parser.parse(inputSource);
            }
            return parser.getDocument();
        }catch(SAXException e){
            throw new ConvertException(e);
        }catch (IOException e){
            throw new ConvertException(e);
        }
    }
    
    protected Document toDOM(File file) throws ConvertException{
        try{
            return toDOM(new FileInputStream(file));
        }catch(IOException e){
            throw new ConvertException(e);
        }
    }
    
    protected InputStream toHTML(Document document) throws ConvertException{
        try{
            final TransformerFactory tFactory
                 = TransformerFactory.newInstance();
            Transformer transformer = null;
            if(xslFilePath == null){
                transformer = tFactory.newTransformer();
            }else{
                transformer = tFactory.newTransformer(
                    new StreamSource(xslFilePath)
                );
            }
            if(characterEncodingToStream != null){
                String encoding = (String)IANA2JAVA_ENCODING_MAP
                    .get(characterEncodingToStream);
                if(encoding == null){
                    encoding = characterEncodingToStream;
                }
                transformer.setOutputProperty(
                    OutputKeys.ENCODING,
                    encoding
                );
            }
            final ByteArrayOutputStream baos = new ByteArrayOutputStream();
            transformer.transform(
                new DOMSource(document),
                new StreamResult(baos)
            );
            return new ByteArrayInputStream(baos.toByteArray());
        }catch(TransformerFactoryConfigurationError e){
            throw new ConvertException(e);
        }catch(TransformerConfigurationException e){
            throw new ConvertException(e);
        }catch(TransformerException e){
            throw new ConvertException(e);
        }
    }
}
