/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.bi.predict.data;

import com.ibm.bi.predict.data.AnalyzeTarget;
import com.ibm.bi.predict.data.Binner;
import com.ibm.bi.predict.data.CategoriesMerger;
import com.ibm.bi.predict.data.Config;
import com.ibm.bi.predict.data.DataContext;
import com.ibm.bi.predict.data.DataPrep;
import com.ibm.bi.predict.data.HandleMissingValuesInPredictors;
import com.ibm.bi.predict.data.HandleOutliersInTarget;
import com.ibm.bi.predict.data.NonDegeneratedColumn;
import com.ibm.bi.predict.data.ParamsInfo;
import com.ibm.bi.predict.dataaccess.DataAccessProvider;
import com.ibm.bi.predict.dataaccess.MetaData;
import com.ibm.bi.predict.dataaccess.types.FieldType;
import com.ibm.bi.predict.result.StatusCode;
import java.util.Optional;
import java.util.function.BiPredicate;
import java.util.function.Consumer;
import java.util.stream.IntStream;

public class StandardDataPrep {
    private StandardDataPrep() {
    }

    public static DataPrep prepareData(DataAccessProvider provider, ParamsInfo params) {
        return StandardDataPrep.prepareData(provider, new DataContext().addParamsInfo(params), params.getTargetIndex(), params.getDriversIndexes());
    }

    public static DataPrep prepareData(DataAccessProvider provider, ParamsInfo params, Config config) {
        return StandardDataPrep.prepareData(provider, DataContext.configToContext(config).addParamsInfo(params), params.getTargetIndex(), params.getDriversIndexes());
    }

    public static DataPrep prepareData(DataAccessProvider provider, DataContext context, Optional<Integer> targetIndex, int[] driversIndexes) {
        Consumer<DataPrep> targetOperations = v -> {
            v.replaceTarget(new HandleOutliersInTarget(context.getBoolean("handleOutliersInTarget", true), context.getDouble("outlierMultiplierConstant", 1.5), context.getDouble("outlierPercentile", 0.05))::test);
            v.replaceTarget(new CategoriesMerger(context.getInt("maxCategoriesToProcess", 50), context.getDouble("minRatioOfCategoriesNotInOther", 0.75), context.getInt("minUnmergedCategoryFrequency", 3))::mergeCategories);
        };
        Consumer<DataPrep> driverOperations = v -> {
            v.replaceDrivers(new HandleMissingValuesInPredictors(context.getBoolean("missingValuesInSeparateCategory", true))::test);
            v.replaceDrivers(new Binner(context.getInt("binCount", 5), context.getBoolean("missingValuesInSeparateCategory", true))::bin);
            v.replaceDrivers(new CategoriesMerger(context.getInt("maxCategoriesToProcess", 50), context.getDouble("minRatioOfCategoriesNotInOther", 0.75), context.getInt("minUnmergedCategoryFrequency", 3))::mergeCategories);
        };
        return StandardDataPrep.performDataPrep(provider, context, targetIndex, driversIndexes, Optional.of(targetOperations), Optional.of(driverOperations), true);
    }

    public static DataPrep prepareTimeSeriesData(DataAccessProvider provider, ParamsInfo params, Config config) {
        return StandardDataPrep.prepareTimeSeriesData(provider, params, DataContext.configToContext(config).addParamsInfo(params));
    }

    public static DataPrep prepareOutliers(DataAccessProvider provider, DataContext context) {
        MetaData meta = provider.getMetaData();
        int[] driverIndices = IntStream.range(0, provider.getMetaData().fieldCount()).filter(i -> {
            String id = meta.getFieldIdentifier(i);
            return !id.startsWith("rowCount:") && !id.startsWith("sumSqr:");
        }).toArray();
        return StandardDataPrep.performDataPrep(provider, context, Optional.empty(), driverIndices, Optional.empty(), Optional.empty(), false);
    }

    public static DataPrep prepareTimeSeriesData(DataAccessProvider provider, ParamsInfo params, DataContext context) {
        return StandardDataPrep.performDataPrep(provider, context.addParamsInfo(params), params.getTargetIndex(), params.getDriversIndexes(), Optional.empty(), Optional.empty(), true);
    }

    private static DataPrep performDataPrep(DataAccessProvider provider, DataContext context, Optional<Integer> targetIndex, int[] driversIndexes, Optional<Consumer<DataPrep>> targetOperations, Optional<Consumer<DataPrep>> driverOperations, boolean handleDegenerateColumns) {
        BiPredicate<Integer, MetaData> degenerateColumnIdentifier = handleDegenerateColumns ? new NonDegeneratedColumn(context.getDouble("maxRatioOfUniquesToTotalRows", 0.5), context.getInt("minCategoriesToBeUseful", 2))::isNonDegenerated : (x, m) -> true;
        DataPrep preparation = new DataPrep(provider, context, targetIndex, driversIndexes);
        AnalyzeTarget analyzeTarget = new AnalyzeTarget(provider, context, targetIndex);
        BiPredicate<Double, FieldType> analyzeTargetFilter = analyzeTarget::filter;
        preparation.readData(degenerateColumnIdentifier, analyzeTargetFilter);
        return StandardDataPrep.handleDataPrepByStatus(targetOperations, driverOperations, preparation, analyzeTarget);
    }

    protected static DataPrep handleDataPrepByStatus(Optional<Consumer<DataPrep>> targetOperations, Optional<Consumer<DataPrep>> driverOperations, DataPrep preparation, AnalyzeTarget analyzeTarget) {
        if (preparation.getStatus() == StatusCode.FAILURE) {
            return preparation;
        }
        analyzeTarget.setTargetZeroInflationStatus(preparation.targetColumn());
        targetOperations.ifPresent(v -> v.accept(preparation));
        driverOperations.ifPresent(v -> v.accept(preparation));
        preparation.removeDegenerate();
        return preparation;
    }
}

