/////////////////////////////////////////////////////////////////////////////
/*
  Copyright 2001-2,2004 Ronald S. Burkey.
  Latex support Copyright 2001 Joe Cherry.

  This file is part of GutenMark.

  GutenMark is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.

  GutenMark is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with GutenMark; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

  Filename:	OutputHtml.c 
  Purpose:	Writes the output HTML file.
  Mods:		01/13/02 RSB	Split off from AutoMark.c.
  		01/18/02 RSB	Added NoPrefatory and PageBreaks.
		08/08/02 RSB	Accounted for --title and --author.
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <zlib.h>
#include "AutoMark.h"

// These are the HTML character-entity names of all alphabetics with accents.
static const char *DiacriticalNames[64] = {
  "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig",
  "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute",
  "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc",
  "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
  "Uuml", "Yacute", "THORN", "szlig",
  "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig",
  "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute",
  "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc",
  "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
  "uuml", "yacute", "thorn", "yuml"
};

//--------------------------------------------------------------------------
// Outputs a single character (from the text) to the HTML output file.
// This is more useful in some cases than directly outputting the character,
// because automatic conversion to HTML character entities is done.

static void
OutputCharacter (int ForceNumeric, FILE * OutputFile, int c)
{
  if (ForceNumeric)
    {
      if (c == '<')
	fprintf (OutputFile, "&#60;");
      else if (c == '>')
	fprintf (OutputFile, "&#62;");
      else if (c == '&')
	fprintf (OutputFile, "&#38;");
      else if (c < 128)
	putc (c, OutputFile);
      else
	fprintf (OutputFile, "&#%d;", c);
    }
  else
    {
      if (c == '<')
	fprintf (OutputFile, "&lt;");
      else if (c == '>')
	fprintf (OutputFile, "&gt;");
      else if (c == '&')
	fprintf (OutputFile, "&amp;");
      else if (c < 128)
	putc (c, OutputFile);
      else if (c >= 192 && c <= 255)
	fprintf (OutputFile, "&%s;", DiacriticalNames[c - 192]);
      else
	fprintf (OutputFile, "&#%d;", c);
    }
}

//--------------------------------------------------------------------------
// Handles just the output, after all analysis has been completed.
// Returns 0 on success, non-zero on error.

int
OutputHtml (FILE * OutputFile, AnalysisDataset * Dataset)
{
  int ErrorCode, c, lastc, Remove, AtEnd;
  MarkupRecord Mark;
  unsigned long Offset;
  char s[256];
  int InParagraph = 0, CharsInLine = 0;
  int InPreformatted = 0, LastNewline = 1;
  int AfterBreak = 0;
  char *InputFilename = NULL;
  int Removed = -1;
  int JumpedPastHeader = 0;
  int InPrefatoryArea = 0;
  int HeaderCount = 0;

  // Print output header.
  fprintf
    (OutputFile, "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 3.2//EN\">\n");
  fprintf (OutputFile, "<html>\n\n");
  fprintf (OutputFile, "<head>\n");
  
  // The document title.  If the user has given us one, then we use it.
  // Otherwise, we attempt to deduce it.
  if (Dataset->TitleBuffer[0])
    {
      strcpy (s, Dataset->TitleBuffer);
      if (Dataset->AuthorBuffer[0])
        {
	  int i;
	  i = strlen (s) - 1;
	  if (!ispunct (s[i]))
	    strcat (s, ",");
	  strcat (s, Dataset->AuthorBuffer);  
	}
    }
  else
    {  
      fseek (Dataset->InputFile, 0, SEEK_SET);
      fgets (s, sizeof (s) - 1, Dataset->InputFile);
      fgets (s + strlen (s), sizeof (s) - strlen (s) - 1, Dataset->InputFile);
      NormalizeTitle (s, sizeof (s));
    }
  fprintf (OutputFile, "<title>%s</title>\n", s);
  fprintf (OutputFile, "</head>\n\n");
  fprintf (OutputFile, "<body>\n");
  fprintf (OutputFile, "<!-- Short-line cutoffs are %u and %u -->\n\n",
	   Dataset->ShortLineSize, Dataset->ReallyShortLineSize);

  // Print marked-up text.  This is basically a matter 
  // of merging the markup file with the input file.
  fseek (Dataset->MarkupFile, 0, SEEK_SET);
  fseek (Dataset->InputFile, 0, SEEK_SET);
  if (Dataset->YesHeader)
    {
      ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile);
      if (ErrorCode != 1)
	Mark.Type = MarkNoMoreMarks;
      Offset = 0;
    }
  else
    {
      do
	{
	  ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile);
	  if (ErrorCode != 1)
	    {
	      Mark.Type = MarkNoMoreMarks;
	      break;
	    }
	}
      while (Mark.Offset < Dataset->TextStart);
      fseek (Dataset->InputFile, Dataset->TextStart, SEEK_SET);
      Offset = Dataset->TextStart;
    }
  for (Remove = AtEnd = c = 0;
       (lastc = c, c = fgetc (Dataset->InputFile)) != EOF;
       Offset++, CharsInLine++)
    {
      if (!isspace (c))
	AfterBreak = 0;
    LastShot:
      while (Offset == Mark.Offset && Mark.Type != MarkNoMoreMarks)
	{
	  switch (Mark.Type)
	    {
	    case MarkTolower:

	      //if (Removed != Offset)
	      putc (DiacriticalTolower (c), OutputFile);
	      Remove = 1;
	      break;
	    case MarkToupper:

	      //if (Removed != Offset)
	      putc (DiacriticalToupper (c), OutputFile);
	      Remove = 1;
	      break;
	    case MarkBeginSmartQuote:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8220;");
	      else
		fprintf (OutputFile, "&ldquo;");
	      Remove = 1;
	      break;
	    case MarkEndSmartQuote:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8221;");
	      else
		fprintf (OutputFile, "&rdquo;");
	      Remove = 1;
	      break;
	    case MarkLsquo:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8216;");
	      else
		fprintf (OutputFile, "&lsquo;");
	      Remove = 1;
	      break;
	    case MarkRsquo:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8217;");
	      else
		fprintf (OutputFile, "&rsquo;");
	      Remove = 1;
	      break;
	    case MarkRemoveChar:
	      Remove = 1;
	      Removed = Offset;
	      break;
	    case MarkInsertChar:
	      //putc (Mark.Insert, OutputFile);
	      OutputCharacter (Dataset->ForceNumeric, OutputFile,
			       Mark.Insert);
	      break;
	    case MarkNbsp:
	      CharsInLine += 5;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#160;");
	      else
		fprintf (OutputFile, "&nbsp;");
	      break;
	    case MarkBeginJustifiedParagraph:
	      InParagraph = 1;
	      CharsInLine = 0;
	      if (Dataset->NoJustify)
		{
		  CharsInLine += 2;
		  fprintf (OutputFile, "<p>");
		}
	      else
		{
		  CharsInLine += 18;
		  fprintf (OutputFile, "<p align=\"justify\">");
		}
	      break;
	    case MarkBeginRaggedParagraph:
	      InParagraph = 1;
	      CharsInLine = 2;
	      fprintf (OutputFile, "<p>");
	      // 03/20/04 RSB.
	      //if (InPrefatoryArea)
		//fprintf (OutputFile, "<font size=\"-1\">");
	      break;
	    case MarkBeginCenteredParagraph:
	      InParagraph = 1;
	      CharsInLine = 17;
	      fprintf (OutputFile, "<p align=\"center\">");
	      break;
	    case MarkEndParagraph:
	      InParagraph = 0;
	      // 03/20/04 RSB
	      //if (InPrefatoryArea)
		//fprintf (OutputFile, "</font>");
	      fprintf (OutputFile, "</p>");
	      break;
	    case MarkBeginItalics:
	      CharsInLine += 2;
	      fprintf (OutputFile, "<i>");
	      break;
	    case MarkEndItalics:
	      CharsInLine += 3;
	      fprintf (OutputFile, "</i>");
	      break;
	    case MarkBeginBold:
	      CharsInLine += 2;
	      fprintf (OutputFile, "<b>");
	      break;
	    case MarkEndBold:
	      CharsInLine += 3;
	      fprintf (OutputFile, "</b>");
	      break;
	    case MarkBeginUnderline:
	      CharsInLine += 2;
	      fprintf (OutputFile, "<u>");
	      break;
	    case MarkEndUnderline:
	      CharsInLine += 3;
	      fprintf (OutputFile, "</u>");
	      break;
	    case MarkBreak:
	      CharsInLine = 0;
	      AfterBreak = 1;
	      fprintf (OutputFile, "<br>\n");
	      break;
	    case MarkBlockquote:	// I think, not used.
	      InParagraph = 1;
	      CharsInLine = 12;
	      fprintf (OutputFile, "<blockquote>");
	      break;
	    case MarkEndBlockquote:	// I think, not used.
	      InParagraph = 0;
	      fprintf (OutputFile, "</blockquote>");
	      break;
	    case MarkHeader1:
	      InPrefatoryArea = 0;
	      if (HeaderCount == 0)
		{
		  if (Dataset->NoPrefatory)
		    fprintf (OutputFile, "</DIV>\n");
		}
	      else
		{
		  if (Dataset->PageBreaks)
		    fprintf (OutputFile, "<!--NewPage-->\n");
		}
	      HeaderCount++;
	      fprintf (OutputFile, "<h1>");
	      break;
	    case MarkEndHeader1:
	      fprintf (OutputFile, "</h1>");
	      break;
	    case MarkBeginSubtitle:
	      fprintf (OutputFile, "<p><b>");
	      break;
	    case MarkEndSubtitle:
	      fprintf (OutputFile, "</b></p>");
	      break;
	    case MarkBeginTable:
	      InPreformatted = 1;
	      LastNewline = 0;
	      fprintf (OutputFile, "<pre>");
	      break;
	    case MarkEndTable:
	      InPreformatted = 0;
	      fprintf (OutputFile, "</pre>");
	      break;
	    case MarkInsertMdash:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8212;");
	      else
		fprintf (OutputFile, "&mdash;");
	      break;
	    case MarkInsertNdash:
	      CharsInLine += 6;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#8211;");
	      else
		fprintf (OutputFile, "&ndash;");
	      break;
	    case MarkSoftHyphen:
	      CharsInLine += 4;
	      if (Dataset->ForceNumeric)
		fprintf (OutputFile, "&#173;");
	      else
		fprintf (OutputFile, "&shy;");
	      break;
	    case MarkJumpPastGutenbergHeader:
	      JumpedPastHeader = 1;
	      fprintf (OutputFile,
		       "<h1>Project Gutenberg Fine Print</h1>\n<pre>\n");
	      break;
	    case MarkEndOfGutenbergHeader:
	      if (JumpedPastHeader)
		fprintf (OutputFile, "\n</pre>\n\n");
	      if (Dataset->LowestNonPrefatoryLine > 0)
		{
		  if (Dataset->NoPrefatory)
		    fprintf (OutputFile, "<DIV class=NOPRINT>\n");
		  else
		    HeaderCount++;
		  InPrefatoryArea = 1;
		  fprintf (OutputFile, "<h1>Prefatory Materials</h1>\n\n");
		  fprintf (OutputFile,
			   "<blockquote><i><font size=\"-1\">\n"
			   "This is a modified etext created by <b>GutenMark</b> "
			   "software.\n");
		  if (InputFilename != NULL)
		    fprintf (OutputFile,
			     "&nbsp; The original text was contained in a "
			     "computer file named \"%s.\"\n", InputFilename);
		  fprintf (OutputFile,
			   "&nbsp; Any comments below about etext "
			   "preparation refer to the\n"
			   "<u>original</u>, and not to this "
			   "modified version of the etext.&nbsp;\n"
			   "No individuals named below bear "
			   "responsibility for changes to the text.\n"
			   "</font></i></blockquote>\n\n");
		}
	      break;
	    case MarkGutenbergEnder:
	      if (Dataset->YesHeader)
		{
		}
	      else
		AtEnd = 1;
	      break;
	    case MarkNoMoreMarks:
	      break;
	    }
	  ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile);
	  if (ErrorCode != 1)
	    Mark.Type = MarkNoMoreMarks;
	}
      if (AtEnd)
	break;
      if (Remove)
	Remove = 0;
      else
	{
	  if (c != '\r' && !(AfterBreak && isspace (c)))
	    {
	      if (InParagraph)
		{
		  if (isspace (c) && CharsInLine > HTML_LENGTH)
		    {
		      c = '\n';
		      CharsInLine = 0;
		    }
		  else if (c == '\n')
		    c = ' ';
		}
	      if (c == '\n')
		{
		  if (LastNewline > 1 && !InPreformatted)
		    continue;
		  LastNewline++;
		}
	      else
		LastNewline = 0;
	      OutputCharacter (Dataset->ForceNumeric, OutputFile, c);
	    }
	}
    }

  // What this does is to allow us to process any remaining markups that
  // are supposed to occur at the very end of the input (such as </p>).  
  if (AtEnd == 0)
    {
      AtEnd = 1;
      goto LastShot;
    }

  // Last stage:  Close the output HTML stream.          
  fprintf (OutputFile, "\n</body>\n");
  fprintf (OutputFile, "</html>\n");

  return (0);
}
