/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.bi.search.extract;

import com.ibm.bi.search.extract.AbstractExtractData;
import com.ibm.bi.search.extract.ExtractUtils;
import java.io.IOException;
import java.io.InputStream;
import java.lang.invoke.MethodHandles;
import java.util.HashSet;
import java.util.Set;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ExtractPDFData
extends AbstractExtractData {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private static final int MAX_NUM_TOKEN = 999999;
    private static final int MAX_NUM_PAGES = 9999;
    Set<String> tokenSet = new HashSet<String>();

    ExtractPDFData(String fileName, boolean compressed) throws IOException {
        super(fileName, compressed);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public String analyzeData() throws IOException {
        this.tokenSet.clear();
        PDDocument pdDoc = null;
        try {
            pdDoc = PDDocument.load((InputStream)this.fileInput, (MemoryUsageSetting)MemoryUsageSetting.setupTempFileOnly());
        }
        catch (Exception e) {
            LOG.warn("\t extract PDF text failed: " + this.extractFileName + " failed to process file", (Throwable)e);
            ExtractPDFData.closeDocument(pdDoc);
            return null;
        }
        try {
            this.analyzePDF(pdDoc);
        }
        catch (Exception e) {
            LOG.warn("\t extract PDF text failed: " + this.extractFileName + " failed to process file", (Throwable)e);
            String string = null;
            return string;
        }
        finally {
            ExtractPDFData.closeDocument(pdDoc);
        }
        return this.tokenSet.toString();
    }

    private static void closeDocument(PDDocument pdDoc) {
        if (pdDoc != null) {
            try {
                pdDoc.close();
            }
            catch (Exception e) {
                LOG.warn("\t closeDocument failed", (Throwable)e);
            }
        }
    }

    public int getTokenCount() {
        return this.tokenSet.size();
    }

    public String getTokens() {
        return this.tokenSet.toString();
    }

    private void analyzePDF(PDDocument pdDoc) throws IOException {
        int numberTokens = 0;
        int numberPages = pdDoc.getNumberOfPages();
        if (numberPages > 9999) {
            numberPages = 9999;
        }
        for (int currentPage = 1; currentPage <= numberPages && numberTokens < 999999; ++currentPage) {
            this.analyzePDFPage(pdDoc, currentPage);
            numberTokens = this.tokenSet.size();
        }
        pdDoc.close();
    }

    private void analyzePDFPage(PDDocument pdDoc, int page) throws IOException {
        String[] tokens;
        PDFTextStripper pdfStripper = new PDFTextStripper();
        pdfStripper.setStartPage(page);
        pdfStripper.setEndPage(page);
        String pageText = pdfStripper.getText(pdDoc);
        for (String token : tokens = pageText.split("\\s")) {
            if (!ExtractUtils.isQualifyingToken(token)) continue;
            this.tokenSet.add(token);
        }
    }
}

