/*******************************************************************************
 * ManjyuRss
 * Copyright (C) 2012 Toshiki IGA
 * 
 * This library is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this library.  If not, see <http://www.gnu.org/licenses/>.
 *******************************************************************************/
/*******************************************************************************
 * Copyright (c) 2012 Toshiki IGA and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *      Toshiki IGA - initial API and implementation
 *******************************************************************************/
/*******************************************************************************
 * Copyright 2012 Toshiki IGA and others.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package org.manjyu.rss;

import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.logging.Logger;

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamSource;

import org.manjyu.rss.util.ManjyuRssDateUtil;
import org.manjyu.rss.vo.ManjyuRssCategory;
import org.manjyu.rss.vo.ManjyuRssChannel;
import org.manjyu.rss.vo.ManjyuRssCloud;
import org.manjyu.rss.vo.ManjyuRssEnclosure;
import org.manjyu.rss.vo.ManjyuRssImage;
import org.manjyu.rss.vo.ManjyuRssItem;
import org.manjyu.rss.vo.ManjyuRssSource;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * RSS 2.0 Parser
 * 
 * @author Toshiki Iga
 */
public abstract class ManjyuRssParser {
	/**
	 * User code entry point to read channel.
	 * 
	 * @throws IOException
	 */
	protected abstract void processChannel(final ManjyuRssChannel channel) throws IOException;

	/**
	 * User code entry point to read each item.
	 * 
	 * @throws IOException
	 */
	protected abstract void processItem(final ManjyuRssItem item) throws IOException;

	public void parse(final InputStream inStream) throws IOException {
		final ManjyuRssParserHandler handler = new ManjyuRssParserHandler(this);

		try {
			// Prefer XMLReader to control XML parser settings.
			XMLReader reader = null;
			try {
				reader = XMLReaderFactory.createXMLReader();
			} catch (SAXException exCreateXMLReader) {
				////////////////////////////////////////////////
				// In case of simple parser

				Logger.getLogger("org.manjyu.rss").fine(
						"Fail to get XMLReader instance: " + exCreateXMLReader.toString());

				// Some environment (ie: JDK 1.4.2?) causes error to get XMLReader.。

				// When calling XMLReaderFactory.createXMLReader(), 
				// org.xml.sax.SAXException: System property org.xml.sax.driver
				// not specified , sometimes occures.
				final SAXResult result = new SAXResult(handler);
				result.setHandler(handler);
				final TransformerFactory tf = TransformerFactory.newInstance();
				try {
					final Transformer transformer = tf.newTransformer();
					transformer.transform(new StreamSource(inStream), result);
					return;
				} catch (TransformerException e2) {
					throw new IOException("Fail to parse XML: " + e2.toString(), e2);
				}
			}

			////////////////////////////////////////////////
			// In case of XMLReader

			try {
				// Avoid to read external DTD.
				reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
			} catch (SAXNotRecognizedException exAvoidExternalDtd) {
				Logger.getLogger("org.manjyu.rss").finest(
						"Fail to avoid reading external DTD defs: " + exAvoidExternalDtd.toString());
			}

			reader.setContentHandler(handler);

			reader.parse(new InputSource(inStream));
			return;
		} catch (SAXException ex) {
			throw new IOException("Error occured processing XML parse: " + ex.toString(), ex);
		}
	}
}

class ManjyuRssParserHandler implements ContentHandler {
	protected ManjyuRssParser parser;

	protected ManjyuRssChannel channel = null;
	protected ManjyuRssItem item = null;

	protected ManjyuRssCategory category = null;
	protected ManjyuRssImage image = null;

	protected CharArrayWriter charactersWriter = new CharArrayWriter();

	ManjyuRssParserHandler(final ManjyuRssParser targetParser) {
		this.parser = targetParser;
	}

	@Override
	public void setDocumentLocator(final Locator locator) {
	}

	@Override
	public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
	}

	@Override
	public void endPrefixMapping(final String prefix) throws SAXException {
	}

	@Override
	public void startDocument() throws SAXException {
	}

	@Override
	public void endDocument() throws SAXException {
	}

	@Override
	public void startElement(final String uri, final String localName, final String qName, final Attributes atts)
			throws SAXException {
		charactersWriter.reset();

		if ("channel".equals(localName)) {
			// [rss]
			channel = new ManjyuRssChannel();
		} else if ("item".equals(localName)) {
			// [channel]
			if (channel != null) {
				try {
					// fire channel processing.
					parser.processChannel(channel);
				} catch (IOException ex) {
					throw new SAXException(ex);
				}
				channel = null;
			}
			item = new ManjyuRssItem();
		} else if ("category".equals(localName)) {
			category = new ManjyuRssCategory();
			// [channel]
			// [item]
			// channel and item both sets domain attribute.
			category.setDomain(atts.getValue("domain"));
		} else if ("subject".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 

			if (category == null) {
				category = new ManjyuRssCategory();
			}
			// [channel]
			// [item]
		} else if ("cloud".equals(localName)) {
			// [channel]
			if (channel != null) {
				final ManjyuRssCloud cloud = new ManjyuRssCloud();
				channel.setCloud(cloud);
				cloud.setDomain(atts.getValue("domain"));
				cloud.setPort(atts.getValue("port"));
				cloud.setPath(atts.getValue("path"));
				cloud.setRegisterProcedure(atts.getValue("registerProcedure"));
				cloud.setRegisterProcedure(atts.getValue("protocol"));
			}
		} else if ("image".equals(localName)) {
			// [channel]
			image = new ManjyuRssImage();

			if ("http://purl.org/rss/1.0/".equals(uri)) {
				///////////////////////////////
				// for RDF/RSS 1.0 
				image.setUrl(atts.getValue("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "resource"));
				image.setLink(image.getUrl());
			}
		} else if ("enclosure".equals(localName)) {
			// [item]
			if (item != null) {
				if (item.getEnclosure() == null) {
					item.setEnclosure(new ManjyuRssEnclosure());
				}
				item.getEnclosure().setUrl(atts.getValue("url"));
				item.getEnclosure().setLength(atts.getValue("length"));
				item.getEnclosure().setType(atts.getValue("type"));
			}
		} else if ("source".equals(localName)) {
			// [item]
			if (item != null) {
				if (item.getSource() == null) {
					item.setSource(new ManjyuRssSource());
				}
				item.getSource().setUrl(atts.getValue("url"));
			}
		}
	}

	@Override
	public void endElement(final String uri, final String localName, final String qName) throws SAXException {
		charactersWriter.flush();
		final String characters = charactersWriter.toString();

		if ("channel".equals(localName)) {
			if (channel != null) {
				try {
					// fire channel processing.
					parser.processChannel(channel);
				} catch (IOException ex) {
					throw new SAXException(ex);
				}
				channel = null;
			}
		} else if ("item".equals(localName)) {
			// [item]
			if (item != null) {
				try {
					// fire channel processing.
					parser.processItem(item);
				} catch (IOException ex) {
					throw new SAXException(ex);
				}
				item = null;
			}
		} else if ("title".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setTitle(characters);
			}
			// [item]
			if (item != null) {
				item.setTitle(characters);
			}
			// [image]
			if (image != null) {
				image.setTitle(characters);
			}
		} else if ("link".equals(localName)) {
			// [channel]
			if (channel != null) {
				if (localName.equals(qName)) {
					// avoid to override by atom:link
					channel.setLink(characters);
				}
			}
			// [item]
			if (item != null) {
				item.setLink(characters);
			}
			// [image]
			if (image != null) {
				image.setLink(characters);
			}
		} else if ("description".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setDescription(characters);
			}
			// [item]
			if (item != null) {
				item.setDescription(characters);
			}
			// [image]
			if (image != null) {
				image.setDescription(characters);
			}
		} else if ("language".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setLanguage(characters);
			}
		} else if ("copyright".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setCopyright(characters);
			}
		} else if ("managingEditor".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setManagingEditor(characters);
			}
		} else if ("webMaster".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setWebMaster(characters);
			}
		} else if ("pubDate".equals(localName)) {
			try {
				final Date date = ManjyuRssDateUtil.rfc822String2Date(characters);
				// [channel]
				if (channel != null) {
					channel.setPubDate(date);
				}
				// [item]
				if (item != null) {
					item.setPubDate(date);
				}
			} catch (IOException e) {
				throw new SAXException(e);
			}
		} else if ("lastBuildDate".equals(localName)) {
			try {
				final Date date = ManjyuRssDateUtil.rfc822String2Date(characters);
				// [channel]
				if (channel != null) {
					channel.setLastBuildDate(date);
				}
			} catch (IOException e) {
				throw new SAXException(e);
			}
		} else if ("category".equals(localName)) {
			// category created in startElement.
			category.setCharacters(characters);

			// [channel]
			if (channel != null) {
				channel.getCategoryList().add(category);
			}
			// [item]
			if (item != null) {
				item.getCategoryList().add(category);
			}
			category = null;
		} else if ("subject".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 

			// category created in startElement.
			category.setCharacters(characters);

			// [channel]
			if (channel != null) {
				channel.getCategoryList().add(category);
			}
			// [item]
			if (item != null) {
				item.getCategoryList().add(category);
			}
			category = null;
		} else if ("generator".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setGenerator(characters);
			}
		} else if ("docs".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setDocs(characters);
			}
		} else if ("ttl".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setTtl(characters);
			}
		} else if ("image".equals(localName)) {
			// [channel]
			if (channel != null) {
				channel.setImage(image);
			}
			image = null;
		} else if ("rating".equals(localName)) {
			// [channel]
			// TODO Not implemented!
		} else if ("textInput".equals(localName)) {
			// [channel]
			// TODO Not implemented!
		} else if ("skipHours".equals(localName)) {
			// [channel]
			// TODO Not implemented!
		} else if ("skipDays".equals(localName)) {
			// [channel]
			// TODO Not implemented!
		} else if ("author".equals(localName)) {
			// [item]
			if (item != null) {
				item.setAuthor(characters);
			}
		} else if ("comments".equals(localName)) {
			// [item]
			if (item != null) {
				item.setComments(characters);
			}
		} else if ("guid".equals(localName)) {
			// [item]
			if (item != null) {
				item.setGuid(characters);
			}
		} else if ("source".equals(localName)) {
			// [item]
			if (item != null) {
				if (item.getSource() == null) {
					item.setSource(new ManjyuRssSource());
				}
				item.getSource().setCharacters(characters);
			}
		} else if ("url".equals(localName)) {
			// [image]
			if (image != null) {
				image.setUrl(characters);
			}
		} else if ("width".equals(localName)) {
			// [image]
			if (image != null) {
				image.setWidth(characters);
			}
		} else if ("height".equals(localName)) {
			// [image]
			if (image != null) {
				image.setHeight(characters);
			}
		} else if ("date".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 
			try {
				final Date date = ManjyuRssDateUtil.w3cdtfString2Date(characters);
				// [channel]
				if (channel != null) {
					channel.setPubDate(date);
				}
				// [item]
				if (item != null) {
					item.setPubDate(date);
				}
			} catch (IOException e) {
				throw new SAXException(e);
			}
		} else if ("rights".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 

			// [channel]
			if (channel != null) {
				channel.setCopyright(characters);
			}
		} else if ("publisher".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 

			// [channel]
			if (channel != null) {
				channel.setWebMaster(characters);
			}
		} else if ("creator".equals(localName) && "http://purl.org/dc/elements/1.1/".equals(uri)) {
			///////////////////////////////
			// for RDF/RSS 1.0 

			// [channel]
			// TODO Note: ignore channel's creator

			// [item]
			if (item != null) {
				item.setAuthor(characters);
			}
		} else if ("li".equals(localName)) {
			///////////////////////////////
			// for RDF/RSS 1.0

			// ignore RSS 1.0 tag
		} else if ("Seq".equals(localName)) {
			///////////////////////////////
			// for RDF/RSS 1.0

			// ignore RSS 1.0 tag
		} else if ("items".equals(localName)) {
			///////////////////////////////
			// for RDF/RSS 1.0

			// ignore RSS 1.0 tag
		} else if ("RDF".equals(localName)) {
			///////////////////////////////
			// for RDF/RSS 1.0

			// ignore RSS 1.0 tag
		} else {
			Logger.getLogger("org.manjyu.rss").fine(
					"Skipped endElement tag: " + localName + " [" + uri + "]: value=" + characters);
		}

		charactersWriter.reset();
	}

	@Override
	public void characters(final char[] ch, final int start, final int length) throws SAXException {
		charactersWriter.write(ch, start, length);
	}

	@Override
	public void ignorableWhitespace(final char[] ch, final int start, final int length) throws SAXException {
		charactersWriter.write(ch, start, length);
	}

	@Override
	public void processingInstruction(final String target, final String data) throws SAXException {
	}

	@Override
	public void skippedEntity(final String name) throws SAXException {
	}
}
