/*
 * Decompiled with CFR 0.152.
 */
package org.jpedal.examples.text.extractheadlines;

import java.io.File;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.jpedal.PdfDecoder;
import org.jpedal.examples.text.ExtractTextInRectangle;
import org.jpedal.examples.text.extractheadlines.HeadlineConfiguration;
import org.jpedal.examples.text.extractheadlines.Output;
import org.jpedal.exception.PdfException;
import org.jpedal.exception.PdfSecurityException;
import org.jpedal.grouping.PdfGroupingAlgorithms;
import org.jpedal.utils.LogWriter;

public class ExtractHeadlines
extends ExtractTextInRectangle {
    private static final boolean debug = false;
    Output currentOutput = new Output();
    private String configDir = "config" + System.getProperty("file.separator");
    HeadlineConfiguration config = new HeadlineConfiguration(this.configDir);
    private static String testFile = "timesPDFS";
    String[] sectionTokens = null;
    private int[] x1;
    private int[] x2;
    private int[] y1;
    private int[] y2;

    public static void main(String[] args) {
        showMessages = false;
        if (showMessages) {
            System.out.println("Simple demo to extract text objects");
        }
        String file_name = testFile;
        if (args.length == 1) {
            file_name = args[0];
            System.out.println("File :" + file_name);
        } else {
            System.out.println("Please call with Filename");
            System.exit(1);
        }
        File pdf_file = new File(file_name);
        if (!pdf_file.exists()) {
            System.out.println("File " + file_name + " not found");
        }
        ExtractHeadlines text1 = new ExtractHeadlines(file_name);
    }

    private String extractSection(String extractedText) {
        if (showMessages) {
            System.out.println(extractedText);
        }
        if (extractedText == null) {
            return null;
        }
        HashMap<String, String> sections = new HashMap<String, String>();
        int sectionTokenCount = this.sectionTokens.length;
        int i = 0;
        while (i < sectionTokenCount) {
            sections.put(this.sectionTokens[i], "x");
            ++i;
        }
        Object pageNumber = null;
        String section = null;
        String currentToken = null;
        StringTokenizer tokens = new StringTokenizer(extractedText, "<>");
        while (tokens.hasMoreTokens()) {
            if (section != null && pageNumber != null) break;
            currentToken = tokens.nextToken();
            if (sections.get(currentToken) == null) continue;
            String font = currentToken;
            currentToken = tokens.nextToken();
            boolean isNumber = false;
            if (isNumber || currentToken.length() <= 2) continue;
            StringBuffer sectionName = new StringBuffer();
            while (tokens.hasMoreTokens() && !currentToken.equals("/font")) {
                if (currentToken.indexOf("SpaceC") != -1) {
                    sectionName.append(' ');
                } else {
                    sectionName.append(currentToken);
                }
                currentToken = tokens.nextToken();
            }
            section = sectionName.toString().trim();
            if (font.equals("font face=\"TimesClassicDisplay\" style=\"font-size:16pt\"")) break;
        }
        return section;
    }

    public ExtractHeadlines(String file_name) {
        File output_path;
        int tagCount = Integer.parseInt(this.config.getValue("xmlCount"));
        this.sectionTokens = new String[tagCount];
        int j = 0;
        while (j < tagCount) {
            this.sectionTokens[j] = this.config.getValue("xmlTag_" + j);
            if (showMessages) {
                System.out.println(this.sectionTokens[j]);
            }
            ++j;
        }
        tagCount = Integer.parseInt(this.config.getValue("locationCount"));
        this.x1 = new int[tagCount];
        this.x2 = new int[tagCount];
        this.y1 = new int[tagCount];
        this.y2 = new int[tagCount];
        String key = "locTag";
        String[] coords = new String[]{"x1", "y1", "x2", "y2"};
        int i = 0;
        while (i < tagCount) {
            int coord = 0;
            while (coord < 4) {
                String currentKey = String.valueOf(key) + '_' + i + '_' + coords[coord];
                String value = this.config.getValue(currentKey);
                int numberValue = Integer.parseInt(value);
                switch (coord) {
                    case 0: {
                        this.x1[i] = numberValue;
                        break;
                    }
                    case 1: {
                        this.y1[i] = numberValue;
                        break;
                    }
                    case 2: {
                        this.x2[i] = numberValue;
                        break;
                    }
                    case 3: {
                        this.y2[i] = numberValue;
                    }
                }
                ++coord;
            }
            ++i;
        }
        if (!this.user_dir.endsWith(this.separator)) {
            this.user_dir = String.valueOf(this.user_dir) + this.separator;
        }
        if (!(output_path = new File(this.outputDir)).exists()) {
            output_path.mkdirs();
        }
        if (file_name.toLowerCase().endsWith(".pdf")) {
            this.decodeFile("", file_name);
        } else {
            String[] files = null;
            File inputFiles = null;
            if (!file_name.endsWith(this.separator)) {
                file_name = String.valueOf(file_name) + this.separator;
            }
            try {
                inputFiles = new File(file_name);
                if (!inputFiles.isDirectory()) {
                    System.err.println(String.valueOf(file_name) + " is not a directory. Exiting program");
                }
                files = inputFiles.list();
            }
            catch (Exception ee) {
                LogWriter.writeLog("Exception trying to access file " + ee.getMessage());
            }
            long fileCount = files.length;
            int i2 = 0;
            while ((long)i2 < fileCount) {
                if (showMessages) {
                    System.out.println(String.valueOf(i2) + "/ " + fileCount + ' ' + files[i2]);
                }
                if (files[i2].toLowerCase().endsWith(".pdf")) {
                    if (showMessages) {
                        System.out.println(String.valueOf(file_name) + files[i2]);
                    }
                    this.decodeFile(file_name, files[i2]);
                }
                ++i2;
            }
        }
    }

    protected void decodeFile(String path, String name) {
        String file_name = String.valueOf(path) + name;
        String paper = name.substring(0, 3);
        String pageNumber = name.substring(3, 5);
        String edition = name.substring(5, 8);
        String date = name.substring(8, 10);
        String outputDir = "TimesSections/";
        File newDir = new File(outputDir);
        newDir.mkdir();
        this.currentOutput.open(String.valueOf(outputDir) + paper + '.' + edition + '.' + date + ".txt");
        String section = null;
        try {
            this.decodePdf = new PdfDecoder(false);
            this.decodePdf.setExtractionMode(1);
            this.decodePdf.init(true);
            this.decodePdf.openPdfFile(file_name);
            if (showMessages) {
                System.out.println("file_name=" + file_name);
            }
        }
        catch (PdfSecurityException se) {
            System.err.println("Security Exception " + se + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
        }
        catch (PdfException se) {
            System.err.println("Pdf Exception " + se + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
        }
        catch (Exception e) {
            System.err.println("Exception " + e + " in pdf code for text extraction on file " + this.decodePdf.getObjectStore().getCurrentFilename());
            e.printStackTrace();
        }
        if (this.decodePdf.isEncrypted() && !this.decodePdf.isPasswordSupplied() && !this.decodePdf.isExtractionAllowed()) {
            if (showMessages) {
                System.out.println("Encrypted settings");
                System.out.println("Please look at SimpleViewer for code sample to handle such files");
                System.out.println("Or get support/consultancy");
            }
        } else {
            int start = 1;
            int end = this.decodePdf.getPageCount();
            int possSetsCoordinates = this.x2.length;
            section = null;
            try {
                int page = start;
                while (page < end + 1) {
                    this.decodePdf.decodePage(page);
                    int coordSet = 0;
                    while (coordSet < possSetsCoordinates) {
                        int x1 = this.x1[coordSet];
                        int x2 = this.x2[coordSet];
                        int y1 = this.y1[coordSet];
                        int y2 = this.y2[coordSet];
                        if (showMessages) {
                            System.out.println("Using (" + x1 + ',' + y1 + ") (" + x2 + ',' + y2 + ')');
                        }
                        PdfGroupingAlgorithms currentGrouping = this.decodePdf.getGroupingObject();
                        this.text = null;
                        try {
                            this.text = currentGrouping.extractTextInRectangle(x1, y1, x2, y2, page, false, true);
                        }
                        catch (PdfException e) {
                            this.decodePdf.closePdfFile();
                            System.err.println("Exception " + e.getMessage() + " in file " + this.decodePdf.getObjectStore().fullFileName);
                            e.printStackTrace();
                        }
                        if (this.text == null) {
                            if (showMessages) {
                                System.out.println("No text found");
                            }
                        } else {
                            section = this.extractSection(this.text);
                            if (section != null) {
                                coordSet = possSetsCoordinates;
                            }
                        }
                        ++coordSet;
                    }
                    this.decodePdf.flushObjectValues(false);
                    if (section != null) {
                        if (showMessages) {
                            System.out.println("section=" + section);
                        }
                        this.currentOutput.outputSection(section, pageNumber, name);
                    }
                    if (showMessages) {
                        System.out.println("----");
                    }
                    ++page;
                }
            }
            catch (Exception e) {
                this.decodePdf.closePdfFile();
                e.printStackTrace();
                System.out.println(this.decodePdf.getObjectStore().getCurrentFilename());
            }
            this.decodePdf.flushObjectValues(true);
            if (showMessages) {
                System.out.println("Text read");
            }
        }
        this.currentOutput.close();
        this.decodePdf.closePdfFile();
    }
}

