/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.robot.extractor.impl;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.CompositeParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.SecureContentHandler;
import org.codelibs.robot.RobotSystemException;
import org.codelibs.robot.entity.ExtractData;
import org.codelibs.robot.extractor.ExtractException;
import org.codelibs.robot.extractor.Extractor;
import org.codelibs.robot.util.StreamUtil;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.annotation.tiger.InitMethod;
import org.seasar.framework.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaExtractor
implements Extractor {
    private static final Logger logger = LoggerFactory.getLogger(TikaExtractor.class);
    public String outputEncoding = "UTF-8";
    public boolean readAsTextIfFailed = true;
    public long maxCompressionRatio = 100L;
    public long maxUncompressionSize = 1000000L;
    public int initialBufferSize = 10000;
    public TikaConfig tikaConfig;
    protected Map<String, String> pdfPasswordMap = new HashMap<String, String>();

    @InitMethod
    public void init() {
        if (this.tikaConfig == null) {
            this.tikaConfig = TikaConfig.getDefaultConfig();
        }
        if (logger.isDebugEnabled()) {
            Parser parser = this.tikaConfig.getParser();
            logger.debug("supportedTypes: {}", (Object)parser.getSupportedTypes(new ParseContext()));
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Loose catch block
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    @Override
    public ExtractData getText(InputStream inputStream, Map<String, String> params) {
        if (inputStream == null) {
            throw new RobotSystemException("The inputstream is null.");
        }
        File tempFile = null;
        try {
            tempFile = File.createTempFile("tikaExtractor-", ".out");
        }
        catch (IOException e) {
            throw new ExtractException("Could not create a temp file.", e);
        }
        try {
            String pdfPassword;
            FileOutputStream out = null;
            try {
                out = new FileOutputStream(tempFile);
                StreamUtil.drain(inputStream, out);
            }
            catch (Throwable throwable) {
                IOUtils.closeQuietly(out);
                throw throwable;
            }
            IOUtils.closeQuietly((OutputStream)out);
            FileInputStream in = new FileInputStream(tempFile);
            PrintStream originalOutStream = System.out;
            ByteArrayOutputStream outStream = new ByteArrayOutputStream();
            System.setOut(new PrintStream(outStream, true));
            PrintStream originalErrStream = System.err;
            ByteArrayOutputStream errStream = new ByteArrayOutputStream();
            System.setErr(new PrintStream(errStream, true));
            String resourceName = params == null ? null : params.get("resourceName");
            String contentType = params == null ? null : params.get("Content-Type");
            String contentEncoding = params == null ? null : params.get("Content-Encoding");
            String string = pdfPassword = params == null ? null : params.get("org.apache.tika.parser.pdf.password");
            if (pdfPassword == null && params != null) {
                pdfPassword = this.getPdfPassword(params.get("url"), resourceName);
            }
            Metadata metadata = this.createMetadata(resourceName, contentType, contentEncoding, pdfPassword);
            DetectParser parser = new DetectParser();
            ParseContext parseContext = new ParseContext();
            parseContext.set(Parser.class, (Object)parser);
            StringWriter writer = new StringWriter(this.initialBufferSize);
            parser.parse(in, (ContentHandler)new BodyContentHandler((Writer)writer), metadata, parseContext);
            String content = this.normalizeContent(writer);
            if (StringUtil.isBlank((String)content)) {
                if (resourceName != null) {
                    IOUtils.closeQuietly((InputStream)in);
                    if (logger.isDebugEnabled()) {
                        logger.debug("retry without a resource name: {}", (Object)resourceName);
                    }
                    in = new FileInputStream(tempFile);
                    Metadata metadata2 = this.createMetadata(null, contentType, contentEncoding, pdfPassword);
                    StringWriter writer2 = new StringWriter(this.initialBufferSize);
                    parser.parse(in, (ContentHandler)new BodyContentHandler((Writer)writer2), metadata2, parseContext);
                    content = this.normalizeContent(writer2);
                }
                if (StringUtil.isBlank((String)content) && contentType != null) {
                    IOUtils.closeQuietly((InputStream)in);
                    if (logger.isDebugEnabled()) {
                        logger.debug("retry without a content type: {}", (Object)contentType);
                    }
                    in = new FileInputStream(tempFile);
                    Metadata metadata3 = this.createMetadata(null, null, contentEncoding, pdfPassword);
                    StringWriter writer3 = new StringWriter(this.initialBufferSize);
                    parser.parse(in, (ContentHandler)new BodyContentHandler((Writer)writer3), metadata3, parseContext);
                    content = this.normalizeContent(writer3);
                }
                if (this.readAsTextIfFailed && StringUtil.isBlank((String)content)) {
                    IOUtils.closeQuietly((InputStream)in);
                    if (logger.isDebugEnabled()) {
                        logger.debug("read the content as a text.");
                    }
                    if (contentEncoding == null) {
                        contentEncoding = "UTF-8";
                    }
                    BufferedReader br = null;
                    try {
                        String line;
                        br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(tempFile), contentEncoding));
                        StringWriter writer4 = new StringWriter(this.initialBufferSize);
                        while ((line = br.readLine()) != null) {
                            writer4.write(line.replaceAll("\\p{Cntrl}", " ").replaceAll("\\s+", " ").trim());
                            writer4.write(32);
                        }
                        content = writer4.toString().trim();
                    }
                    catch (Exception e) {
                        try {
                            logger.warn("Could not read " + tempFile.getAbsolutePath(), (Throwable)e);
                        }
                        catch (Throwable throwable) {
                            IOUtils.closeQuietly(br);
                            throw throwable;
                        }
                        IOUtils.closeQuietly((Reader)br);
                    }
                    IOUtils.closeQuietly((Reader)br);
                }
            }
            ExtractData extractData = new ExtractData(content);
            Object[] names = metadata.names();
            Arrays.sort(names);
            for (Object name : names) {
                extractData.putValues((String)name, metadata.getValues((String)name));
            }
            if (logger.isDebugEnabled()) {
                logger.debug("Result: metadata: {}", (Object)metadata);
            }
            ExtractData extractData2 = extractData;
            IOUtils.closeQuietly((InputStream)in);
            if (originalOutStream != null) {
                System.setOut(originalOutStream);
            }
            if (originalErrStream != null) {
                System.setErr(originalErrStream);
            }
            try {
                byte[] bs;
                if (logger.isInfoEnabled() && (bs = outStream.toByteArray()).length != 0) {
                    logger.info(new String(bs, this.outputEncoding));
                }
                if (!logger.isWarnEnabled() || (bs = errStream.toByteArray()).length == 0) return extractData2;
                logger.warn(new String(bs, this.outputEncoding));
                return extractData2;
            }
            catch (Exception e) {
                // empty catch block
            }
            return extractData2;
            catch (TikaException e) {
                block52: {
                    Extractor xmlExtractor;
                    if (e.getMessage().indexOf("bomb") >= 0) {
                        throw e;
                    }
                    Throwable cause = e.getCause();
                    if (!(cause instanceof SAXException) || (xmlExtractor = (Extractor)SingletonS2Container.getComponent((String)"xmlExtractor")) == null) break block52;
                    IOUtils.closeQuietly((InputStream)in);
                    in = new FileInputStream(tempFile);
                    ExtractData extractData3 = xmlExtractor.getText(in, params);
                    IOUtils.closeQuietly((InputStream)in);
                    if (originalOutStream != null) {
                        System.setOut(originalOutStream);
                    }
                    if (originalErrStream != null) {
                        System.setErr(originalErrStream);
                    }
                    try {
                        byte[] bs;
                        if (logger.isInfoEnabled() && (bs = outStream.toByteArray()).length != 0) {
                            logger.info(new String(bs, this.outputEncoding));
                        }
                        if (logger.isWarnEnabled() && (bs = errStream.toByteArray()).length != 0) {
                            logger.warn(new String(bs, this.outputEncoding));
                        }
                    }
                    catch (Exception e2) {
                        // empty catch block
                    }
                    if (tempFile == null || tempFile.delete()) return extractData3;
                    logger.warn("Failed to delete " + tempFile.getAbsolutePath());
                    return extractData3;
                }
                try {
                    throw e;
                    {
                        catch (Throwable throwable) {
                            IOUtils.closeQuietly((InputStream)in);
                            if (originalOutStream != null) {
                                System.setOut(originalOutStream);
                            }
                            if (originalErrStream != null) {
                                System.setErr(originalErrStream);
                            }
                            try {
                                byte[] bs;
                                if (logger.isInfoEnabled() && (bs = outStream.toByteArray()).length != 0) {
                                    logger.info(new String(bs, this.outputEncoding));
                                }
                                if (!logger.isWarnEnabled() || (bs = errStream.toByteArray()).length == 0) throw throwable;
                                logger.warn(new String(bs, this.outputEncoding));
                                throw throwable;
                            }
                            catch (Exception e3) {
                                // empty catch block
                            }
                            throw throwable;
                        }
                    }
                }
                catch (Exception e4) {
                    throw new ExtractException("Could not extract a content.", e4);
                }
            }
        }
        finally {
            if (tempFile != null && !tempFile.delete()) {
                logger.warn("Failed to delete " + tempFile.getAbsolutePath());
            }
        }
    }

    private String normalizeContent(StringWriter writer) {
        return writer.toString().replaceAll("\\s+", " ").trim();
    }

    String getPdfPassword(String url, String resourceName) {
        if (this.pdfPasswordMap.isEmpty()) {
            return null;
        }
        String value = null;
        if (StringUtil.isNotEmpty((String)url)) {
            value = url;
        } else if (StringUtil.isNotEmpty((String)resourceName)) {
            value = resourceName;
        }
        if (value != null) {
            for (Map.Entry<String, String> entry : this.pdfPasswordMap.entrySet()) {
                if (!value.matches(entry.getKey())) continue;
                return entry.getValue();
            }
        }
        return null;
    }

    private Metadata createMetadata(String resourceName, String contentType, String contentEncoding, String pdfPassword) {
        Metadata metadata = new Metadata();
        if (StringUtil.isNotEmpty((String)resourceName)) {
            metadata.set("resourceName", resourceName);
        }
        if (StringUtil.isNotBlank((String)contentType)) {
            metadata.set("Content-Type", contentType);
        }
        if (StringUtil.isNotBlank((String)contentEncoding)) {
            metadata.set("Content-Encoding", contentEncoding);
        }
        if (pdfPassword != null) {
            metadata.add("org.apache.tika.parser.pdf.password", pdfPassword);
        }
        if (logger.isDebugEnabled()) {
            logger.debug("metadata: {}", (Object)metadata);
        }
        return metadata;
    }

    public void addPdfPassword(String regex, String password) {
        this.pdfPasswordMap.put(regex, password);
    }

    protected class DetectParser
    extends CompositeParser {
        private final Detector detector;

        public DetectParser() {
            this(tikaExtractor.tikaConfig);
        }

        public DetectParser(TikaConfig config) {
            super(config.getMediaTypeRegistry(), new Parser[]{config.getParser()});
            this.detector = config.getDetector();
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         */
        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
            TemporaryResources tmp = new TemporaryResources();
            try {
                TikaInputStream tis = TikaInputStream.get((InputStream)stream, (TemporaryResources)tmp);
                MediaType type = this.detector.detect((InputStream)tis, metadata);
                metadata.set("Content-Type", type.toString());
                SecureContentHandler sch = new SecureContentHandler(handler, tis);
                sch.setMaximumCompressionRatio(TikaExtractor.this.maxCompressionRatio);
                sch.setOutputThreshold(TikaExtractor.this.maxUncompressionSize);
                if (logger.isDebugEnabled()) {
                    logger.debug("type: {}, metadata: {}, maxCompressionRatio: {}, maxUncompressionSize: {}", new Object[]{type, metadata, TikaExtractor.this.maxCompressionRatio, TikaExtractor.this.maxUncompressionSize});
                }
                try {
                    super.parse((InputStream)tis, (ContentHandler)sch, metadata, context);
                }
                catch (SAXException e) {
                    sch.throwIfCauseOf(e);
                    throw e;
                }
            }
            finally {
                tmp.dispose();
            }
        }

        public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException, SAXException, TikaException {
            ParseContext context = new ParseContext();
            context.set(Parser.class, (Object)this);
            this.parse(stream, handler, metadata, context);
        }
    }
}

