/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.bi.predict.source.builder.csv;

import com.ibm.bi.predict.dataaccess.csv.CSVBadDataException;
import com.ibm.bi.predict.math.NumberFormatter;
import com.ibm.bi.predict.source.Category;
import com.ibm.bi.predict.source.ColumnIdentifier;
import com.ibm.bi.predict.source.builder.csv.AutoCharset;
import com.ibm.bi.predict.source.builder.csv.AutoDateFormat;
import com.ibm.bi.predict.source.builder.csv.AutoHeaders;
import com.ibm.bi.predict.source.builder.csv.ColumnNaming;
import com.ibm.bi.predict.source.builder.csv.DeducedDateFormat;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.IntStream;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

public class CSVtoSourceComponents {
    public static final AutoDateFormat DATE_PARSER = new AutoDateFormat(Locale.US);
    private final int rowsToUseForAnalysis;
    private final double fractionNeededForNumeric;
    private final Map<Integer, Problem> problems;
    private ColumnNaming columnNaming;
    private DeducedDateFormat[] dateFormats;
    private Integer[] numericPlaces;
    private List<Object[]> rows;

    public CSVtoSourceComponents(int rowsToUseForAnalysis, double fractionNeededForNumeric) {
        this.rowsToUseForAnalysis = rowsToUseForAnalysis;
        this.fractionNeededForNumeric = fractionNeededForNumeric;
        this.problems = new TreeMap<Integer, Problem>();
    }

    public List<Object[]> rows() {
        return this.rows;
    }

    public DeducedDateFormat[] dateFormats() {
        return this.dateFormats;
    }

    public ColumnIdentifier[] columnNames() {
        return this.columnNaming.names();
    }

    public Integer[] decimalPlacesForNumericColumn() {
        return this.numericPlaces;
    }

    public Map<Integer, Problem> problems() {
        return this.problems;
    }

    public int rowCount() {
        return this.rows.size();
    }

    public CSVtoSourceComponents process() {
        this.replaceEmptyWithMissing();
        this.removeLinesOfWrongLength();
        this.findDateColumns();
        this.findNumericColumns();
        this.replaceDataWithTyped();
        return this;
    }

    public CSVtoSourceComponents usePath(Path path, HeaderHandling hasHeaders) throws IOException {
        Objects.requireNonNull(hasHeaders, "HeaderHandling must be defined");
        Charset charset = AutoCharset.guess(path);
        CSVFormat csvFormat = CSVFormat.DEFAULT.withIgnoreEmptyLines(false).withIgnoreSurroundingSpaces();
        try {
            CSVParser parser = CSVParser.parse((Path)path, (Charset)charset, (CSVFormat)csvFormat);
            Iterator iterator = parser.iterator();
            List<CSVRecord> initialRecords = new ArrayList<CSVRecord>();
            while (iterator.hasNext() && initialRecords.size() < this.rowsToUseForAnalysis) {
                initialRecords.add((CSVRecord)iterator.next());
            }
            initialRecords = this.handleHeaders(hasHeaders, initialRecords);
            this.rows = this.readRows(initialRecords, iterator);
        }
        catch (Exception e) {
            throw new CSVBadDataException("Error parsing file as CSV: " + path, e);
        }
        return this;
    }

    int columnCount() {
        return this.columnNaming.columnCount();
    }

    private Double parseDouble(Object s) {
        try {
            return Double.parseDouble((String)s);
        }
        catch (NumberFormatException e) {
            return null;
        }
    }

    private void replaceDataWithTyped() {
        HashMap<Object, Double> numericCache = new HashMap<Object, Double>();
        HashMap<Object, Category> dateCache = new HashMap<Object, Category>();
        HashMap<Object, Category> stringCache = new HashMap<Object, Category>();
        int nColumns = this.columnCount();
        for (Object[] row : this.rows) {
            for (int c = 0; c < nColumns; ++c) {
                if (row[c] == null) continue;
                if (this.numericPlaces[c] != null) {
                    row[c] = numericCache.computeIfAbsent(row[c], this::parseDouble);
                    continue;
                }
                if (this.dateFormats[c] != null) {
                    row[c] = dateCache.computeIfAbsent(row[c], this.dateFormats[c]::parseDate);
                    continue;
                }
                row[c] = stringCache.computeIfAbsent(row[c], o -> new Category(o, o.toString()));
            }
        }
    }

    private void replaceEmptyWithMissing() {
        for (Object[] data : this.rows) {
            for (int i = 0; i < data.length; ++i) {
                if (!data[i].equals("")) continue;
                data[i] = null;
            }
        }
    }

    private void findDateColumns() {
        this.dateFormats = (DeducedDateFormat[])IntStream.range(0, this.columnCount()).mapToObj(i -> this.rows.stream().limit(this.rowsToUseForAnalysis).map(row -> (String)row[i])).map(DATE_PARSER::findBest).map(opt -> opt.orElse(null)).toArray(DeducedDateFormat[]::new);
    }

    private void findNumericColumns() {
        HashSet knownGood = new HashSet();
        this.numericPlaces = (Integer[])IntStream.range(0, this.columnCount()).mapToObj(i -> this.placesForNumeric(i, knownGood)).toArray(Integer[]::new);
    }

    private Integer placesForNumeric(int i, Set<String> knownGood) {
        if (this.dateFormats[i] != null) {
            return null;
        }
        String exceptionCategory = null;
        int validCount = 0;
        int successCount = 0;
        double min = Double.POSITIVE_INFINITY;
        double max = Double.NEGATIVE_INFINITY;
        for (Object[] row : this.rows) {
            block9: {
                Object s = row[i];
                if (s == null) continue;
                String value = (String)s;
                if (knownGood.contains(value)) {
                    ++successCount;
                } else {
                    try {
                        double d = Double.parseDouble(value);
                        knownGood.add(value);
                        if (Double.isFinite(d)) {
                            min = Math.min(min, d);
                            max = Math.max(max, d);
                        }
                        ++successCount;
                    }
                    catch (NumberFormatException e) {
                        if (exceptionCategory == null) {
                            exceptionCategory = value;
                        }
                        if (exceptionCategory.equals(value)) break block9;
                        return null;
                    }
                }
            }
            if (++validCount != this.rowsToUseForAnalysis) continue;
            break;
        }
        if ((double)successCount >= this.fractionNeededForNumeric * (double)validCount) {
            return NumberFormatter.decimalPlacesForRange((double)min, (double)max);
        }
        return null;
    }

    private void removeLinesOfWrongLength() {
        int offset;
        ListIterator<Object[]> rowsIt = this.rows.listIterator();
        int n = offset = this.columnNaming.usedFirstLine() ? 1 : 0;
        while (rowsIt.hasNext()) {
            Object[] data = rowsIt.next();
            int length = data.length;
            if (length == 0 || length == 1 && data[0] == null) {
                this.problems.put(rowsIt.previousIndex() + offset, Problem.blank);
                rowsIt.remove();
                continue;
            }
            if (length == this.columnCount()) continue;
            this.problems.put(rowsIt.previousIndex() + offset, Problem.lengthMismatch);
            rowsIt.remove();
        }
    }

    private List<CSVRecord> handleHeaders(HeaderHandling hasHeaders, List<CSVRecord> initialRecords) {
        if (hasHeaders == HeaderHandling.autoDetect) {
            hasHeaders = AutoHeaders.firstCSVRowIsLikelyHeaders(initialRecords);
        }
        CSVRecord firstRecord = initialRecords.get(0);
        if (hasHeaders == HeaderHandling.firstRow) {
            this.columnNaming = new ColumnNaming(firstRecord.size(), firstRecord.iterator());
            initialRecords = initialRecords.subList(1, initialRecords.size());
        } else {
            this.columnNaming = new ColumnNaming(firstRecord.size());
        }
        return initialRecords;
    }

    private List<Object[]> readRows(List<CSVRecord> initialRecords, Iterator<CSVRecord> iterator) {
        HashMap<String, String> cache = new HashMap<String, String>();
        ArrayList<Object[]> rows = new ArrayList<Object[]>(initialRecords.size());
        for (CSVRecord record : initialRecords) {
            rows.add(this.rowFromCSVRecord(record, cache));
        }
        while (iterator.hasNext()) {
            CSVRecord record = iterator.next();
            rows.add(this.rowFromCSVRecord(record, cache));
        }
        return rows;
    }

    private Object[] rowFromCSVRecord(CSVRecord record, Map<String, String> cache) {
        Object[] result = new Object[record.size()];
        for (int i = 0; i < result.length; ++i) {
            result[i] = cache.computeIfAbsent(record.get(i), key -> key);
        }
        return result;
    }

    public static enum Problem {
        blank,
        lengthMismatch;

    }

    public static enum HeaderHandling {
        firstRow,
        noHeaders,
        autoDetect;

    }
}

