/* ----- BEGIN LICENSE BLOCK -----
 * Version: MPL 1.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the Kagetaka Libraries.
 *
 * The Initial Developer of the Original Code is Hizuya Atsuzaki
 * Portions created by the Initial Developer are Copyright (C) 2003
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s): Hizuya Atsuzaki <hizuya@hizlab.net>
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ----- END LICENSE BLOCK ----- */
package net.hizlab.kagetaka.build;

import net.hizlab.kagetaka.Reporter;
import net.hizlab.kagetaka.Resource;
import net.hizlab.kagetaka.rendering.Document;
import net.hizlab.kagetaka.token.EndToken;
import net.hizlab.kagetaka.token.MetaAttribute;
import net.hizlab.kagetaka.token.MiscToken;
import net.hizlab.kagetaka.token.StartToken;
import net.hizlab.kagetaka.token.TextToken;
import net.hizlab.kagetaka.token.Token;
import net.hizlab.kagetaka.token.TokenManager;
import net.hizlab.kagetaka.token.TokenTypes;
import net.hizlab.kagetaka.util.Charset;
import net.hizlab.kagetaka.util.ContentType;

import net.fclabs.util.Queue;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

/**
 * αǡեϤ뤿Υ饹Ǥ
 * 
 * @author  <A HREF="mailto:hizuya@hizlab.net">Hizuya Atsuzaki</A>
 * @version $Revision: 1.4 $
 */
public class TagParser
	implements InputStreamParser
{
	private static final int INPUT_BUFFER = 8192;
	
	private Document            document      = null;   // ɥ
	private InputStream         is            = null;   // ץåȥȥ꡼
	private BufferedInputStream bis           = null;   // Хåեץåȥȥ꡼
	private TagReader           tagReader     = null;   // ꡼
	private String              encoding      = null;   // 󥳡ǥ
	private Reporter            reporter      = null;   // 顼ݡ
	private CacheReporter       cacheReporter = null;   // 顼ݡ
	
	private String  lastElementName = "html"; // Ǹ˸줿̾
	
	private boolean commitEncoding  = false;  // 󥳡ǥ󥰤ꤷ
	private Queue   tokenQueue      = null;   // 󥳡ǥ󥰤ꤹޤǤɤ
	
	/**
	 * ΥǡեϤѡޤ
	 */
	public TagParser()
	{
	}
	
	/**
	 * ɸΥΥǡեɤ߹Υץåȥȥ꡼
	 * ꤷޤ
	 * 
	 * @param     document ɥȾ
	 * @param     is ϤǡΥץåȥȥ꡼
	 * @param     reporter 顼ݡ
	 */
	public void setInputStream(Document document, InputStream is, Reporter reporter)
	{
		if (this.is != null)
			throw new RuntimeException("Already set InputStream");
		
		this.document = document;
		this.is       = is;
		this.encoding = document.getEncoding();
		this.reporter = reporter;
		
		BufferedReader br = null;
		
		if (encoding != null && (br = createBufferedReader(is, encoding)) != null) {
			this.commitEncoding = true;
		} else {
			// 󥳡ǥ󥰤ǤƤʤ
			this.bis = new BufferedInputStream(is, INPUT_BUFFER);
			this.bis.mark(INPUT_BUFFER * 4);
			br = createBufferedReader(bis, Charset.getDefaultEncoding());
			this.tokenQueue    = new Queue(20);
			this.cacheReporter = new CacheReporter(reporter);
			this.reporter      = cacheReporter;
		}
		
		this.tagReader = new TagReader(br, reporter);
	}
	
	/** Хåե꡼ */
	private BufferedReader createBufferedReader(InputStream is, String encoding)
	{
		if (encoding != null) {
			try {
				BufferedReader br = new BufferedReader(new InputStreamReader(is, encoding), INPUT_BUFFER);
				document.setEncoding(encoding);
				return br;
			} catch (UnsupportedEncodingException e) {
				addWarning("charset.warning.invalid",
				           new String[]{encoding}, null);
			}
			return null;
		} else {
			return new BufferedReader(new InputStreamReader(is), INPUT_BUFFER);
		}
	}
	
	/**
	 * ѡ֤̾ޤ
	 * 
	 * @return    ѡ̾
	 */
	public String getParserName()
	{
		return Resource.getMessage("tagparser.name", null);
	}
	
	/**
	 * ѡʸ֤ޤ
	 * 
	 * @return    ѡ
	 */
	public String getParserDescription()
	{
		return Resource.getMessage("tagparser.description", null);
	}
	
	/**
	 * ǡեϤϰ֤ƬΥȡ֤ޤ
	 * 
	 * @return    Ϥ̤ƬΥȡ
	 *            ȡ¸ߤʤ <code>null</code>
	 * 
	 * @exception ParseException ˥顼ȯ
	 * @exception IOException    IO 顼ȯ
	 */
	public synchronized Token next()
		throws ParseException, IOException
	{
		if (this.is == null)
			throw new RuntimeException("No InputStream yet");
		
		Tag    tag   = null;
		Token  token = null;
		int    type, mode;
		
		// ɤߤȡ󤬤Ф֤
		if (commitEncoding && tokenQueue != null) {
			if ((token = (Token)tokenQueue.get()) != null)
				return token;
			tokenQueue = null;
		}
		
		for (;;) {
			// Υɤ߹
			if ((tag = tagReader.readTag()) == null) {
				if (!commitEncoding)
					commitEncoding = true;
				if (tokenQueue != null)
					return (Token)tokenQueue.get();
				return null;
			}
			
			switch (tag.getType()) {
			case Tag.TAG:                         // ̾άƤб
				if (tag.getElement() == null)
					tag.setElement(lastElementName);
				else
					lastElementName = tag.getElement();
				
				if (!tag.isEndTag())
					token = createStartToken(tag);
				else
					token = createEndToken(tag);
				break;
			case Tag.TEXT:                        // ƥȤξ
				token = createTextToken(tag);
				break;
			default:                              // ʳξ
				// 󥳡ǥ󥰤ǤƤʤǡ<?xml ... ?> ξ
				if (!commitEncoding &&
				    tag.getType   ()                  == Tag.PI &&
				    tag.getElement()                  != null   &&
				    tag.getElement().compareTo("xml") == 0      &&
				    tag.getAttribute()                != null   ) {
					resetEncoding((String)tag.getAttribute().get("encoding"), tag);
					continue;
				}
				token = createMiscToken(tag);
			}
			
			// ȡ null ξϼΥɤ߹߽
			if (token == null)
				continue;
			
			type = token.getType();
			
			// ȡƤ᤹٤Ǥʤ
			if ((mode = TokenTypes.isContent(type)) != TokenTypes.CONTENT_NORMAL) {
				String name    = token.getName();
				String content = tagReader.readContent(name, (mode == TokenTypes.CONTENT_PAIRTOKEN));
				((StartToken)token).setContent(content);
			}
			
			// 󥳡ǥ󥰤ꤷƤ顢ȡ򤽤Τޤ֤
			if (commitEncoding)
				return token;
			
			// 󥳡ǥ󥰤Ǥ뤫å
			tokenQueue.put(token);
			if (type == TokenTypes.META) {
				// META ξ硢charset λ꤬ʤå
				checkCharSet(token, tag);
			} else if (type == TokenTypes.HEAD_END ||
				         TokenTypes.isBody(type)) {
				// HEAD ä硢󥳡ǥ󥰤Ƚλ塼Ƭ֤
				cacheReporter.restart(false);
				cacheReporter  = null;
				commitEncoding = true;
				if ((token = (Token)tokenQueue.get()) != null)
					return token;
			}
		}
	}
	
	/** ϥȡ */
	private Token createStartToken(Tag tag)
		throws ParseException
	{
		// ȡ󥿥פ
		String fieldName = tag.getElement().toUpperCase() + "_START";
		int    tokenType = TokenTypes.getType(fieldName);
		
		if (tokenType == TokenTypes.UNKNOWN) {
			addWarning("tagparser.warning.tag.unknownstart",
			           new String[]{tag.getElement()}, tag);
			return null;
		}
		
		// ϥȡκ
		StartToken token = TokenManager.createStartToken(document,
		                                                 reporter,
		                                                 tag.getLineNumber  (),
		                                                 tag.getColumnNumber(),
		                                                 tokenType,
		                                                 false);
		if (token == null)
			return null;
		
		token.initAttribute(tag.getAttribute());
		
		// Ȥʤξǡ/> ǽäƤʤϷٹ
		if (TokenTypes.isEmpty(tokenType) && !tag.isEmpty())
//			addWarning("tagparser.warning.tag.notempty",
//			           new String[]{lastElementName}, tag);
			reporter.report(Reporter.INFO,
			                Resource.getMessage("tagparser.warning.tag.notempty", new String[]{lastElementName}),
			                tag.getLineNumber  (),
			                tag.getColumnNumber());
		
		return token;
	}
	
	/** λȡ */
	private Token createEndToken(Tag tag)
		throws ParseException
	{
		String fieldName = tag.getElement().toUpperCase() + "_END";
		int    tokenType = TokenTypes.getType(fieldName);
		
		if (tokenType == TokenTypes.UNKNOWN) {
			fieldName = tag.getElement().toUpperCase() + "_START";
			tokenType = TokenTypes.getType(fieldName);
			if (tokenType != TokenTypes.UNKNOWN) {
				addWarning("tagparser.warning.tag.notend",
				           new String[]{tag.getElement()}, tag);
				return null;
			}
			
			addWarning("tagparser.warning.tag.unknownend",
			           new String[]{tag.getElement()}, tag);
			return null;
		}
		
		return TokenManager.createEndToken(document,
		                                   reporter,
		                                   tag.getLineNumber  (),
		                                   tag.getColumnNumber(),
		                                   tokenType,
		                                   false);
	}
	
	/** ƥȤȡ */
	private Token createTextToken(Tag tag)
		throws ParseException
	{
		return new TextToken(document,
		                     reporter,
		                     tag.getLineNumber  (),
		                     tag.getColumnNumber(),
		                     tag.getText        ());
	}
	
	/** ϥȡ */
	private Token createMiscToken(Tag tag)
		throws ParseException
	{
		int type = TokenTypes.UNKNOWN;
		switch (tag.getType()) {
		case Tag.DTD    : type = TokenTypes.DTD    ; break;
		case Tag.COMMENT: type = TokenTypes.COMMENT; break;
		case Tag.PI     : type = TokenTypes.PI     ; break;
		case Tag.MISC   : type = TokenTypes.MISC   ; break;
		}
		
		return new MiscToken(document,
		                     reporter,
		                     tag.getLineNumber  (),
		                     tag.getColumnNumber(),
		                     tag.getInnerText   (),
		                     type);
	}
	
	/** META  CharSet ͭä饨󥳡ǥ󥰤ѹ */
	private void checkCharSet(Token token, Tag tag)
		throws IOException
	{
		if (token.getType() != TokenTypes.META)
			return;
		
		MetaAttribute meta = (MetaAttribute)((StartToken)token).getAttribute();
		if (meta == null)
			return;
		
		String httpEquiv = meta.getHttpEquiv();
		if (httpEquiv == null ||
		    httpEquiv.toLowerCase().compareTo("content-type") != 0)
			return;
		
		String content = meta.getContent();
		if (content == null)
			return;
		
		String charset = null;
		try {
			charset = (new ContentType(content)).getParameter("charset");
		} catch (java.text.ParseException e) {}
		if (charset == null)
			return;
		
		resetEncoding(charset, tag);
	}
	
	/** 󥳡ǥ󥰤ѹ */
	private void resetEncoding(String charset, Tag tag)
		throws IOException
	{
		if (charset == null)
			return;
		
		String encoding = Charset.toEncoding(charset);
		if (encoding == null) {
			addWarning("charset.warning.invalid",
			           new String[]{charset}, null);
			return;
		}
		
		try {
			bis.reset();
			tagReader = new TagReader(createBufferedReader(bis, encoding), reporter);
		} catch (IOException e) {
			// ꥻåȤ˼ԤǤ⡢󥳡ǥ󥰤ꤷ³
			addWarning("charset.warning.reset",
			           new String[]{charset}, tag);
		}
		
		this.tokenQueue.clear();
		this.tokenQueue     = null;
		this.cacheReporter.restart(true);
		this.cacheReporter  = null;
		this.encoding       = encoding;
		this.commitEncoding = true;
		
		reporter.report(Reporter.INFO,
		                Resource.getMessage("charset.info.encoding", new String[]{encoding}),
		                tag.getLineNumber  (),
		                tag.getColumnNumber());
	}
	
	/**
	 * ߤɤ߹ΥǡΥ󥳡ǥ֤̾ޤ
	 * 
	 * @return    󥳡ǥ̤̾ξ <code>null</code>
	 */
	public String getEncoding()
	{
		return encoding;
	}
	
	/**
	 * ѡλ꥽ޤ
	 */
	public void close()
	{
		try {
			if (bis != null)
				bis.close();
		} catch (IOException e) {
			addWarning("tagparser.warning.stream.close",
			           new String[]{e.toString()}, null);
		}
		try {
			if (is != null)
				is.close();
		} catch (IOException e) {
			addWarning("tagparser.warning.stream.close",
			           new String[]{e.toString()}, null);
		}
	}
	
	/** ٹɲä */
	private void addWarning(String key, String[] args, Tag tag)
	{
		reporter.report(Reporter.WARNING,
		                Resource.getMessage(key, args),
		                (tag != null ? tag.getLineNumber  () : 0),
		                (tag != null ? tag.getColumnNumber() : 0));
	}
}
