// @(#) $Id: fix-mime-charset.cc,v 1.19 2003/11/26 00:12:28 balu Exp $

#include "config.h"
#include <string>
#include <iostream>
#include "input.h"
#include "output.h"
#include "parser.h"
#include "decoder.h"
#include "detector.h"
#include "charsetlist.h"
#if HAVE_ARGP_H
#include <argp.h>
#else
#include "argp-local.h"
#endif

using namespace std;

Input *input;
Output output;

bool mailbox=false;
bool alwaysOverride=false;
string defaultCharset="us-ascii";
CharsetList onlyCharsets(true);
CharsetList ignoreCharsets(false);
bool breakSignature=false;
string lang="ru";

FileList fileList;

const char *argp_program_version="fix-mime-charset " VERSION;
const char *argp_program_bug_address="<balu@migdal.ru>";

void showLanguages()
{
const Detector::LangList &langs=Detector::getLanguages();
cout<<"Available languages:"<<endl
    <<endl;
for(Detector::LangList::const_iterator i=langs.begin();i!=langs.end();i++)
   cout<<"  "<<i->code<<" - "<<i->name<<endl;
cout<<endl
    <<"Detector is "<<Detector::getDetectorName()<<"."<<endl;
exit(0);
}

error_t parseArg(int key,char *arg,argp_state *state)
{
switch(key)
      {
      case 'B':
           breakSignature=true;
           break;
      case 'c':
           defaultCharset=arg;
           break;
      case 'f':
           alwaysOverride=true;
           break;
      case 'I':
           ignoreCharsets+=arg;
           break;
      case 'l':
           lang=arg;
           if(!Detector::isLanguageValid(lang))
             {
             cerr<<state->argv[0]<<": unknown language ("<<lang<<")"<<endl;
             exit(1);
             }
           break;
      case 'L':
           showLanguages();
           break;
      case 'm':
           mailbox=true;
           break;
      case 'O':
           onlyCharsets+=arg;
           break;
      case ARGP_KEY_ARG:
           fileList.push_back(arg);
           break;
      }
return 0;
}

void initArgs(int argc,char *argv[])
{
static const argp_option options[]=
     {
      {"break-signature",'B',0,0,
        "Fix charset in multipart/signed messages though this will break"
        " the signature"},
      {"charset",'c',"charset",0,
        "Default charset to be used when autodetection fails"},
      {"force-override",'f',0,0,
        "Always override Content-Type charset"},
      {"ignore-charsets",'I',"charset,...",0,
        "Ignore messages with given charsets"},
      {"lang",'l',"language",0,
        "Language for autodetection (default is \"ru\")"},
      {"show-langs",'L',0,0,
        "Give list of available languages"},
      {"mailbox",'m',0,0,
        "Process mailbox (default is to process an individual message)"},
      {"only-charsets",'O',"charset,...",0,
        "Process messages with given charsets only"},
      {0}
     };
static const argp aargp={options,
                         &parseArg,
                         "[file...]",
                         "Fix incorrect charset specifications in"
                         " MIME messages"};

argp_parse(&aargp,argc,argv,0,0,0);
}

void init(int argc,char *argv[])
{
initArgs(argc,argv);
ios::sync_with_stdio(false);
input=new InputFile(&fileList);
}

string processPlain(Input *input,const string &encoding,const Header &header,
                    ContentType &ct,int eoh)
{
string charset=ct["charset"];

if(!onlyCharsets.contains(charset) ||
   ignoreCharsets.contains(charset))
  {
  while(*input)
       output.putLine(input->nextLine());
  return charset;
  }

Detector *detector=Detector::defaultDetector(!alwaysOverride ? charset
                                                             : defaultCharset,
                                             lang);
int enc=encoding=="quoted-printable" ? TE_QUOTED_PRINTABLE :
        encoding=="base64" ? TE_BASE64 : TE_IDENT;

while(*input)
     {
     string s=input->nextLine();
     detector->scan(decode(s,enc));
     output.putLine(s);
     }
charset=detector->getCharset();

if(!charset.empty() && !isSameCharset(charset,ct["charset"]))
  {
  ct["charset"]=charset;
  if(header.getLength()!=0)
    output.replace(header.getPosition(),header.getLength(),ct.getHeader());
  else
    {
    Output::Replacement r;
    r.push_back(ct.getHeader());
    if(encoding.empty())
      r.push_back(string("Content-Transfer-Encoding: ")+
                  (detector->isAscii() ? "7bit" : "8bit"));
    r.push_back("");
    output.replace(eoh,1,r);
    }
  }
delete detector;
return charset;
}

inline int decodedLength(int n)
{
return n/4*3;
}

inline int min(int a,int b)
{
return a<b ? a : b;
}

void processSubject(const Header &subject,const string &charset)
{
if(subject.isMissing())
  return;
if(!onlyCharsets.contains(charset) || ignoreCharsets.contains(charset))
  return;
string body=decodeHeader(subject.getBody());

Detector *detector=Detector::defaultDetector(charset,lang);
detector->scan(body);
if(detector->isAscii() || detector->getCharset().empty())
  return;
int i=0,j=0;
for(string::iterator p=body.begin();p!=body.end() && i<=75;p++,i++)
   {
   if((unsigned char)*p>=0x80)
     break;
   if(*p==' ')
     j=i;
   }
   
Output::Replacement r;
int clen=detector->getCharset().length()+7;
string s=body.substr(0,j);
int pad=74-j-clen;
if(pad>=4)
  {
  pad=decodedLength(pad);
  s+=" =?"+detector->getCharset()+"?B?"+encode(body.substr(j+1,pad))+"?=";
  j+=pad+1;
  r.push_back(s);
  }
while(j<body.length())
     {
     pad=min(body.length()-j,decodedLength(73-clen));
     r.push_back(" =?"+detector->getCharset()+"?B?"+encode(body.substr(j,pad))+
                 "?=");
     j+=pad;
     }
output.replace(subject.getPosition(),subject.getLength(),r);
delete detector;
}

string processMessage(Input *input);

string processMultipart(Input *input,const string &boundary,
                        const Header &subject)
{
string s,charset;
bool resumed=false;

while(*input)
     {
     s=input->nextLine();
     output.putLine(s);
     if(s.substr(0,boundary.length()+2)=="--"+boundary)
       break;
     }
do
  {
  BoundaryFilter filter(input,boundary);
  string chr=processMessage(&filter);
  if(!chr.empty())
    {
    charset=chr;
    if(!resumed)
      {
      processSubject(subject,charset); // We do this here to unblock output
      output.resume();                 // as soon as possible
      resumed=true;
      }
    }
  s=input->nextLine();
  output.putLine(s);
  }
while(*input && s.substr(0,boundary.length()+4)!="--"+boundary+"--");
if(!resumed)
  {
  processSubject(subject,charset);
  output.resume();
  }
while(*input)
     output.putLine(input->nextLine());
return charset;
}

string processMessage(Input *input)
{
string enc;
Header subject,contentType;
Header *header=NULL;

output.suspend();
while(*input)
     {
     string s=input->nextLine();
     if(s=="")
       break;
     if(header!=NULL)
       if(isspace(s[0]))
         *header+=s.substr(1);
       else
         header=NULL;
     if(header==NULL)
       {
       string c=getHeaderName(s);
       if(c=="content-type")
         header=&contentType;
       else if(c=="subject")
         header=&subject;
       else if(c=="content-transfer-encoding")
         enc=getTransferEncoding(s);
       if(header!=NULL)
         {
         header->setPosition(output.tell());
         *header=s;
         }
       }
     output.putLine(s);
     }
if(!*input)
  {
  processSubject(subject,"");
  output.resume();
  return "";
  }

int eoh=output.tell();
output.putLine("");

ContentType ct(contentType,defaultCharset);
string charset;
if(ct.getFullType()=="text/plain")
  {
  charset=processPlain(input,enc,contentType,ct,eoh);
  processSubject(subject,charset);
  output.resume();
  }
else if(ct.getType()=="multipart" &&
        (breakSignature || ct.getSubtype()!="signed"))
  charset=processMultipart(input,ct["boundary"],subject);
else if(ct.getFullType()=="message/rfc822")
  {
  charset=processMessage(input);
  processSubject(subject,charset);
  output.resume();
  }
else
  {
  output.resume();
  while(*input)
       output.putLine(input->nextLine());
  }
return charset;
}

void run()
{
if(mailbox)
  while(*input)
       {
       output.putLine(input->nextLine());
       Input *from=new FromFilter(input);
       processMessage(from);
       delete from;
       }
else
  processMessage(input);
}

void done()
{
}

int main(int argc,char *argv[])
{
init(argc,argv);
run();
done();
}
