/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.bi.predict.algorithms.table;

import com.ibm.bi.predict.algorithms.ThreeLevelScale;
import com.ibm.bi.predict.algorithms.summaries.Means;
import com.ibm.bi.predict.algorithms.table.results.InfluentialCategory;
import com.ibm.bi.predict.data.matrix.Matrix;
import com.ibm.bi.predict.math.NumericUtils;
import com.ibm.bi.predict.math.TopNSelector;
import com.ibm.bi.predict.utils.Logger;
import com.ibm.bi.predict.utils.PredictLoggerFactory;
import com.spss.math.statistics.DistributionFunctions;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.mutable.MutableInt;

public class UnusualCellDetection {
    private static final Logger log = PredictLoggerFactory.getLogger(UnusualCellDetection.class);
    private static final double SIGNIFICANCE_LEVEL = 0.05;
    private final Matrix values;
    private final Matrix counts;
    private final Matrix sumOfSquares;
    private final long totalRecordCount;
    private final int numberOfCategories;

    public UnusualCellDetection(Matrix values, Matrix counts, Matrix sumOfSquares, long totalRecordCount, int numberOfCategories) {
        this.values = values;
        this.counts = counts;
        this.sumOfSquares = sumOfSquares;
        this.totalRecordCount = totalRecordCount;
        this.numberOfCategories = numberOfCategories;
    }

    public List<InfluentialCategory> compute() {
        log.debug("Computing unusual cells - rows={} cols={}", (Object)this.values.rowDimension(), (Object)this.values.columnDimension());
        log.perfStart();
        List<InfluentialCategory> significantCells = this.getSignificantCells();
        Collections.sort(significantCells, (c1, c2) -> Double.compare(c2.effectSize, c1.effectSize));
        log.perfLog("Finished sorting unusual cells");
        double[] effectSizes = significantCells.stream().mapToDouble(c -> c.effectSize).toArray();
        int topN = TopNSelector.selectTopN((double[])effectSizes, (int)this.totalDataPointCount());
        log.perfLog("Finished finding top-N based on effect size - topN={}", (Object)topN);
        log.perfStop();
        return significantCells.subList(0, topN);
    }

    private List<InfluentialCategory> getSignificantCells() {
        ArrayList<InfluentialCategory> influentialCells = new ArrayList<InfluentialCategory>();
        double fullModelMeanSquaredError = this.fullModelMeanSquaredError();
        log.perfLog("Computed full model MSE");
        if (NumericUtils.isMissingValue((double)fullModelMeanSquaredError)) {
            return influentialCells;
        }
        double overallMean = this.overallMean();
        MutableInt cellIndex = new MutableInt();
        this.values.walkAll((row, col, value) -> {
            this.updateInfluentialCells(influentialCells, fullModelMeanSquaredError, overallMean, cellIndex.intValue(), row, col, value);
            cellIndex.increment();
        });
        log.perfLog("Finished finding unusual cells - size={}", (Object)influentialCells.size());
        return influentialCells;
    }

    private void updateInfluentialCells(List<InfluentialCategory> influentialCells, double fullModelMeanSquaredError, double overallMean, int cellIndex, int row, int col, double value) {
        double categoryCount = this.counts.getValue(row, col);
        double categoryMean = value;
        if (categoryCount <= 0.0) {
            return;
        }
        double stdError = UnusualCellDetection.calculateStandardError(fullModelMeanSquaredError, categoryCount);
        double tStatistic = UnusualCellDetection.calculateTStatistic(stdError, overallMean, categoryMean);
        double pValue = this.pValue(tStatistic);
        if (UnusualCellDetection.isSignificant(pValue)) {
            double effectSize = UnusualCellDetection.effectSize(tStatistic, categoryCount);
            ThreeLevelScale direction = tStatistic > 0.0 ? ThreeLevelScale.HIGH : ThreeLevelScale.LOW;
            influentialCells.add(new InfluentialCategory(cellIndex, row, col, pValue, effectSize, categoryMean, direction));
        }
    }

    private double fullModelMeanSquaredError() {
        double fullModelError = this.fullModelError();
        double df = this.fullModelDegreesOfFreedom();
        if (NumericUtils.equals((double)fullModelError, (double)0.0, (double)1.0E-12) || df <= 0.0) {
            return Double.NaN;
        }
        return fullModelError / df;
    }

    private double fullModelError() {
        return this.sumOfSquares.sum();
    }

    private double fullModelDegreesOfFreedom() {
        return this.totalRecordCount - (long)this.numberOfCategories;
    }

    private static double calculateStandardError(double fullModelMeanSquaredError, double count) {
        return Math.sqrt(fullModelMeanSquaredError / count);
    }

    private static double calculateTStatistic(double standardError, double overallMean, double value) {
        return (value - overallMean) / standardError;
    }

    private double pValue(double tStatistic) {
        return 2.0 * ((1.0 - UnusualCellDetection.cdfT(Math.abs(tStatistic), this.fullModelDegreesOfFreedom())) * (double)this.numberOfCategories);
    }

    private static boolean isSignificant(double pValue) {
        return pValue <= 0.05;
    }

    private static double effectSize(double tStatistic, double count) {
        return Math.abs(tStatistic) / Math.sqrt(count);
    }

    private double overallMean() {
        return Means.weightedMean(this.values, this.counts);
    }

    private static double cdfT(double tStatistic, double df) {
        return Double.isNaN(tStatistic) ? 0.0 : DistributionFunctions.cdfT((double)tStatistic, (double)df);
    }

    private int totalDataPointCount() {
        return this.values.rowDimension() * this.values.columnDimension();
    }
}

