/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.neo.dataimport.cdf.scrape.dsv;

import com.ibm.bi.platform.commons.messages.IMessageKey;
import com.ibm.icu.text.CharsetDetector;
import com.ibm.icu.text.CharsetMatch;
import com.ibm.neo.dataimport.api.EImportMessageCode;
import com.ibm.neo.dataimport.api.EImportMessageContext;
import com.ibm.neo.dataimport.api.WAImportException;
import com.ibm.neo.dataimport.cdf.CDFService;
import com.ibm.neo.dataimport.cdf.DocumentType;
import com.ibm.neo.dataimport.cdf.scrape.AbstractDocumentScraper;
import com.ibm.neo.dataimport.cdf.scrape.ScrapeOptions;
import com.ibm.neo.dataimport.cdf.scrape.ScrapeResult;
import com.ibm.neo.dataimport.cdf.scrape.dsv.DSVScraperFactory;
import com.ibm.neo.dataimport.cdf.sheet.ICDFCell;
import com.ibm.neo.dataimport.cdf.sheet.ICDFCellFactory;
import com.ibm.neo.dataimport.cdf.sheet.ICDFSheet;
import com.ibm.neo.dataimport.nodel.DocumentModel;
import com.ibm.neo.dataimport.nodel.DocumentSection;
import com.ibm.neo.dataimport.nodel.EDataType;
import com.ibm.neo.exception.WatsonMessageContext;
import com.ibm.neo.io.BOMDetector;
import com.ibm.neo.io.FastBufferedReader;
import com.ibm.neo.io.FastInputStreamReader;
import com.ibm.neo.messages.exceptions.NeoImportError;
import com.ibm.neo.util.FileSampler;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.NullArgumentException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class AbstractDSVScraper
extends AbstractDocumentScraper {
    private static final int INPUT_BUFFER_SIZE = 8092;
    private static final int INPUT_SAMPLE_BUFFER_SIZE = 517888;
    private static final int READER_BUFFER_SIZE = 8092;
    private static final Charset ISO_8859_1_CHARSET = Charset.forName("ISO-8859-1");
    private static final Charset UTF8_CHARSET = Charset.forName("UTF-8");
    public static final String PROP_CHARSET = "charset";
    public static final String PROP_DELIMITER_CHAR = "delimiter-char";
    public static final String PROP_QUOTE_CHAR = "quote-char";
    public static final String PROP_RECORD_SEPARATOR = "record-separator";
    private static final Logger LOGGER = LoggerFactory.getLogger(AbstractDSVScraper.class);

    public AbstractDSVScraper(CDFService cdfService) {
        super(cdfService);
    }

    @Override
    public DocumentType getDocumentType() {
        return DSVScraperFactory.DSV_DOC_TYPE;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public ScrapeResult scrape(String fileName, String mimeType, File file, ScrapeOptions options) throws WAImportException {
        ScrapeResult scrapeResult;
        if (null == fileName) {
            throw new NullArgumentException("fileName");
        }
        if (null == file) {
            throw new NullArgumentException("file");
        }
        if (null == options) {
            throw new NullArgumentException("options");
        }
        FileInputStream fin = new FileInputStream(file);
        try {
            scrapeResult = this.scrape(fileName, mimeType, fin, options);
        }
        catch (Throwable throwable) {
            try {
                fin.close();
                throw throwable;
            }
            catch (IOException ex) {
                throw WAImportException.newBuilder().withMessage((IMessageKey)NeoImportError.NEO_IS_XLS_SCRAPER_IO_ERROR).withCause((Throwable)ex).withConditionCode(EImportMessageCode.INTERNAL_ERROR).withContextAttribute(EImportMessageContext.FILE_NAME, (Object)fileName).withContextAttribute(EImportMessageContext.FILE_CONTENT_TYPE, (Object)mimeType).build();
            }
            catch (WAImportException ex) {
                Long byteOffset;
                WatsonMessageContext context;
                if (EImportMessageCode.ILLEGAL_CSV_FORMAT.equals((Object)ex.getCode()) && (context = ex.getWatsonMessageContext()) != null && (byteOffset = (Long)context.getItem((Enum)EImportMessageContext.CSV_BYTE_OFFSET, Long.class)) != null && byteOffset >= 0L) {
                    FastBufferedReader reader = null;
                    try {
                        reader = this.openBufferedReader(new BufferedInputStream(FileUtils.openInputStream((File)file), 8092), new DocumentModel(), options);
                        String sample = FileSampler.getLeadingSample((File)file, (Reader)reader, (long)byteOffset, (long)200L);
                        context.putItemIfNotNull((Enum)EImportMessageContext.CSV_SAMPLE, (Object)sample);
                        IOUtils.closeQuietly((Reader)reader);
                    }
                    catch (Exception e) {
                        LOGGER.warn("Could not sample a dsv file in response to a parsing error.", (Throwable)e);
                    }
                    finally {
                        IOUtils.closeQuietly(reader);
                    }
                }
                throw ex;
            }
        }
        fin.close();
        return scrapeResult;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public ScrapeResult scrape(String fileName, String mimeType, InputStream input, ScrapeOptions options) throws WAImportException {
        if (null == fileName) {
            throw new NullArgumentException("fileName");
        }
        if (null == input) {
            throw new NullArgumentException("input");
        }
        if (null == options) {
            throw new NullArgumentException("options");
        }
        DocumentModel doc = new DocumentModel(this.getDocumentType().getTypeName(), this.makeTitle(fileName));
        ScrapeResult result = new ScrapeResult(doc);
        doc.getProperties().put("filename", fileName);
        if (null != mimeType) {
            doc.getProperties().put("mime-type", mimeType);
        }
        DocumentSection root = doc.createSection();
        root.setName("DSV");
        root.setType("dsv-table");
        try {
            ICDFSheet sheet = null;
            try (BufferedInputStream bufferedInput = new BufferedInputStream(input, 8092);){
                FastBufferedReader bufferedReader = this.openBufferedReader(bufferedInput, doc, options);
                CSVFormat format = AbstractDSVScraper.detectCSVFormat(bufferedReader, doc, options);
                sheet = this.parseToSheet((Reader)bufferedReader, format, options);
            }
            sheet.saveMetadata();
            this.getCDFService().getSheetLibrary().store(sheet);
            root.setSheetInfo(sheet.getSheetInfo());
            result.getSheets().put(sheet.getId(), sheet);
        }
        catch (IOException ex) {
            throw WAImportException.newBuilder().withMessage((IMessageKey)NeoImportError.NEO_IS_XLS_SCRAPER_IO_ERROR).withCause((Throwable)ex).withConditionCode(EImportMessageCode.INTERNAL_ERROR).withContextAttribute(EImportMessageContext.FILE_NAME, (Object)fileName).withContextAttribute(EImportMessageContext.FILE_CONTENT_TYPE, (Object)mimeType).build();
        }
        return result;
    }

    protected abstract ICDFSheet parseToSheet(Reader var1, CSVFormat var2, ScrapeOptions var3) throws IOException, WAImportException;

    private FastBufferedReader openBufferedReader(BufferedInputStream bufferedInput, DocumentModel doc, ScrapeOptions options) throws IOException, WAImportException {
        int len;
        if (null != options.getCharset()) {
            doc.getProperties().put(PROP_CHARSET, options.getCharset().name());
            return new FastBufferedReader((Reader)new FastInputStreamReader((InputStream)bufferedInput, options.getCharset()), 8092);
        }
        BOMDetector.BOM bom = BOMDetector.detect((InputStream)bufferedInput);
        if (null != bom) {
            bufferedInput.skip(bom.getByteLength());
            doc.getProperties().put(PROP_CHARSET, bom.getCharsetName());
            return new FastBufferedReader((Reader)new FastInputStreamReader((InputStream)bufferedInput, bom.getCharsetName()), 8092);
        }
        CharsetDetector detector = new CharsetDetector();
        detector.setText((InputStream)bufferedInput);
        CharsetMatch bestMatch = detector.detect();
        if (bestMatch.getConfidence() > 50) {
            try {
                Charset charset = Charset.forName(bestMatch.getName());
                if (null != charset) {
                    doc.getProperties().put(PROP_CHARSET, charset.name());
                    return new FastBufferedReader((Reader)new FastInputStreamReader((InputStream)bufferedInput, bestMatch.getName()), 8092);
                }
            }
            catch (UnsupportedCharsetException charset) {
            }
            catch (IllegalCharsetNameException charset) {
                // empty catch block
            }
        }
        ByteBuffer sampleBuf = ByteBuffer.allocate(517888);
        byte[] readBuf = new byte[8092];
        Charset cs = null;
        bufferedInput.mark(517888);
        while (sampleBuf.hasRemaining() && (len = bufferedInput.read(readBuf)) >= 0) {
            sampleBuf.put(readBuf, 0, len);
        }
        bufferedInput.reset();
        sampleBuf.flip();
        int utf8Len = AbstractDSVScraper.getDecodedLength(sampleBuf, UTF8_CHARSET);
        sampleBuf.rewind();
        int iso88591Len = AbstractDSVScraper.getDecodedLength(sampleBuf, ISO_8859_1_CHARSET);
        if (iso88591Len == -1) {
            if (utf8Len == -1) {
                throw WAImportException.newBuilder().withMessage((IMessageKey)NeoImportError.CHARSET_ENCODING_UNKNOWN).withConditionCode(EImportMessageCode.CHARSET_ENCODING_UNKNOWN).build();
            }
            cs = UTF8_CHARSET;
        } else {
            cs = utf8Len == -1 ? ISO_8859_1_CHARSET : (iso88591Len <= utf8Len ? ISO_8859_1_CHARSET : UTF8_CHARSET);
        }
        doc.getProperties().put(PROP_CHARSET, cs.name());
        return new FastBufferedReader((Reader)new FastInputStreamReader((InputStream)bufferedInput, cs), 8092);
    }

    private static int getDecodedLength(ByteBuffer buffer, Charset charset) {
        try {
            return charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).decode(buffer).remaining();
        }
        catch (CharacterCodingException ex) {
            return -1;
        }
    }

    private static CSVFormat detectCSVFormat(FastBufferedReader reader, DocumentModel doc, ScrapeOptions options) throws IOException {
        int sampleSize;
        int charsRead;
        CSVFormat.CSVFormatBuilder builder = CSVFormat.newBuilder((char)',').withQuoteChar(null).withEscape(null).withIgnoreEmptyLines(false).withIgnoreSurroundingSpaces(true);
        String filename = (String)doc.getProperties().get("filename");
        String mimeType = (String)doc.getProperties().get("mime-type");
        boolean forceTabDelimited = false;
        if (filename.endsWith(".tsv") || filename.endsWith(".tab")) {
            forceTabDelimited = true;
        } else if ("text/tab-separated-values".equals(mimeType)) {
            forceTabDelimited = true;
        }
        StringBuilder sample = new StringBuilder(8092);
        char[] cbuf = new char[1024];
        reader.mark(8092);
        for (sampleSize = 0; sampleSize < 8092 && (charsRead = reader.read(cbuf, 0, Math.min(cbuf.length, 8092 - sampleSize))) >= 0; sampleSize += charsRead) {
            sample.append(cbuf, 0, charsRead);
        }
        reader.reset();
        int carriageReturnCount = 0;
        int lineFeedCount = 0;
        int commaCount = 0;
        int tabCount = 0;
        int pipeCount = 0;
        int semiColonCount = 0;
        block9: for (int i = 0; i < sampleSize; ++i) {
            switch (sample.charAt(i)) {
                case '\n': {
                    ++lineFeedCount;
                    continue block9;
                }
                case '\r': {
                    ++carriageReturnCount;
                    continue block9;
                }
                case ',': {
                    ++commaCount;
                    continue block9;
                }
                case '\t': {
                    ++tabCount;
                    continue block9;
                }
                case ';': {
                    ++semiColonCount;
                    continue block9;
                }
                case '|': {
                    ++pipeCount;
                }
            }
        }
        char recordSeparator = '\r';
        if (lineFeedCount >= carriageReturnCount) {
            recordSeparator = '\n';
        }
        builder = builder.withRecordSeparator(recordSeparator);
        char delim = ',';
        if (forceTabDelimited) {
            delim = '\t';
        } else {
            int lineCount = Math.max(lineFeedCount, carriageReturnCount);
            if (lineCount > 0 && tabCount / lineCount < 2) {
                tabCount = 0;
            }
            delim = AbstractDSVScraper.detectDelimeter(sample, recordSeparator);
        }
        if (options != null && options.getFieldDelimiter() != null) {
            delim = options.getFieldDelimiter().charAt(0);
        }
        builder = builder.withDelimiter(delim);
        int potentialSingleQuoteEscapes = 0;
        for (int i = 0; i < sampleSize; ++i) {
            boolean run;
            if (sample.charAt(i) != '\'' || i <= 0 || sample.charAt(i - 1) != delim && sample.charAt(i - 1) != recordSeparator) continue;
            boolean bl = run = ++i < sampleSize && sample.charAt(i) != delim && sample.charAt(i) != recordSeparator;
            while (run) {
                if (sample.charAt(i) == '\'') {
                    potentialSingleQuoteEscapes += 2;
                    run = false;
                } else {
                    run = i < sampleSize && sample.charAt(i) != delim && sample.charAt(i) != recordSeparator;
                }
                ++i;
            }
        }
        int potentialDoubleQuoteEscapes = 0;
        for (int i = 0; i < sampleSize; ++i) {
            if (sample.charAt(i) != '\"') continue;
            if (i > 0) {
                char previousChar = sample.charAt(i - 1);
                if (previousChar == '\\') {
                    ++potentialDoubleQuoteEscapes;
                } else if (previousChar == delim) {
                    ++potentialDoubleQuoteEscapes;
                }
            }
            if (i >= sampleSize - 1 || sample.charAt(i + 1) != delim) continue;
            ++potentialDoubleQuoteEscapes;
        }
        boolean quotesEncapsulate = true;
        if (delim == '\t') {
            if (0.5 * (double)Math.max(potentialSingleQuoteEscapes, potentialDoubleQuoteEscapes) / (double)tabCount < 0.9) {
                quotesEncapsulate = false;
            }
        } else if (delim == '|') {
            if (0.5 * (double)Math.max(potentialSingleQuoteEscapes, potentialDoubleQuoteEscapes) / (double)pipeCount < 0.9) {
                quotesEncapsulate = false;
            }
        } else if (delim == ';' && 0.5 * (double)Math.max(potentialSingleQuoteEscapes, potentialDoubleQuoteEscapes) / (double)semiColonCount < 0.9) {
            quotesEncapsulate = false;
        }
        builder = quotesEncapsulate ? (potentialSingleQuoteEscapes > potentialDoubleQuoteEscapes ? builder.withQuoteChar('\'') : builder.withQuoteChar('\"')) : builder.withQuoteChar(null);
        if (options != null && options.getQuoteChar() != null) {
            builder.withQuoteChar(options.getQuoteChar().charValue());
        }
        CSVFormat format = builder.build();
        doc.getProperties().put(PROP_DELIMITER_CHAR, String.valueOf(format.getDelimiter()));
        doc.getProperties().put(PROP_RECORD_SEPARATOR, format.getRecordSeparator());
        if (format.getQuoteChar() != null) {
            doc.getProperties().put(PROP_QUOTE_CHAR, String.valueOf(format.getQuoteChar()));
        }
        return format;
    }

    private static char detectDelimeter(StringBuilder sample, char recordSeparator) {
        int start = 0;
        int end = sample.indexOf(String.valueOf(recordSeparator), start);
        if (end < 0) {
            end = sample.length();
        }
        int delim = 44;
        while (end > 0) {
            String line = sample.substring(start, end);
            int commaCount = 0;
            int pipeCount = 0;
            int semiColonCount = 0;
            int tabCount = 0;
            block7: for (int i = 0; i < line.length(); ++i) {
                switch (line.charAt(i)) {
                    case ',': {
                        ++commaCount;
                        continue block7;
                    }
                    case '\t': {
                        ++tabCount;
                        continue block7;
                    }
                    case ';': {
                        ++semiColonCount;
                        continue block7;
                    }
                    case '|': {
                        ++pipeCount;
                    }
                }
            }
            int separators = commaCount;
            if (tabCount > commaCount) {
                delim = 9;
                separators = tabCount;
            }
            if (semiColonCount > tabCount && semiColonCount > commaCount) {
                delim = 59;
                separators = semiColonCount;
            }
            if (pipeCount > semiColonCount && pipeCount > tabCount && pipeCount > commaCount) {
                delim = 124;
                separators = pipeCount;
            }
            if (separators > 0) {
                return (char)delim;
            }
            start = end + 1;
            end = sample.indexOf(String.valueOf(recordSeparator), start);
            delim = 44;
        }
        return (char)delim;
    }

    protected static ICDFCell[] parseFields(long rowIndex, String[] fields, ICDFCellFactory cellFactory, ScrapeOptions options) {
        ICDFCell[] cells = new ICDFCell[fields.length];
        for (int i = 0; i < fields.length; ++i) {
            String text = fields[i];
            if (null == text || text.length() == 0) {
                cells[i] = cellFactory.createNull(EDataType.STRING, null);
                continue;
            }
            int lengthLimit = options.getFieldStringLengthLimit(i);
            if (lengthLimit > 0 && text.length() > lengthLimit) {
                text = text.substring(0, lengthLimit);
            }
            cells[i] = cellFactory.createString(text, null);
        }
        return cells;
    }
}

