/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.bi.platform.datasetutils.parquet;

import com.ibm.bi.platform.datasetutils.metadata.Column;
import com.ibm.bi.platform.datasetutils.metadata.ColumnMetadata;
import com.ibm.bi.platform.datasetutils.metadata.RowSchema;
import com.ibm.bi.platform.datasetutils.metadata.TabularRowSchema;
import com.ibm.bi.platform.datasetutils.metadata.types.BaseType;
import com.ibm.bi.platform.datasetutils.metadata.types.TypeFactory;
import com.ibm.bi.platform.datasetutils.parquet.ParquetMetadataGenerator;
import com.ibm.bi.platform.datasetutils.parquet.ParquetOptions;
import com.ibm.bi.platform.datasetutils.parquet.ParquetVersionEnum;
import com.ibm.json.java.JSONObject;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.parquet.format.Util;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.hadoop.metadata.FileMetaData;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.io.ParquetDecodingException;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import shaded.org.apache.hadoop.conf.Configuration;
import shaded.org.apache.hadoop.fs.Path;

public class DatasetParquetReadHelper {
    private static final Logger LOGGER = LoggerFactory.getLogger(ParquetMetadataGenerator.class);
    private final ParquetOptions options;
    private ParquetMetadata parquetMeta;
    private MessageType requestedParquetSchema;
    private RowSchema recordSchema;

    public DatasetParquetReadHelper() {
        this.options = new ParquetOptions();
    }

    public DatasetParquetReadHelper(ParquetOptions options) {
        this.options = new ParquetOptions(options);
    }

    public void readFooter(Path file, Configuration config) throws IOException {
        this.parquetMeta = ParquetFileReader.readFooter((Configuration)config, (Path)file, (ParquetMetadataConverter.MetadataFilter)ParquetMetadataConverter.SKIP_ROW_GROUPS);
        this.requestedParquetSchema = DatasetParquetReadHelper.getProjectedMessageType(this.getParquetFileSchema(), this.options, config);
        DatasetParquetReadHelper.setOptionsFromMetadata(this.options, this.getKeyValueMetadata());
        this.recordSchema = DatasetParquetReadHelper.getRecordSchema(this.requestedParquetSchema, this.getKeyValueMetadata(), this.options);
    }

    public void readFileMetadata(InputStream input, Configuration config) throws IOException {
        this.processFileMetaData(Util.readFileMetaData((InputStream)input), config);
    }

    public void processFileMetaData(org.apache.parquet.format.FileMetaData fileMetaData, Configuration config) throws IOException {
        ParquetMetadataConverter metaConverter = new ParquetMetadataConverter();
        this.parquetMeta = metaConverter.fromParquetMetadata(fileMetaData);
        this.requestedParquetSchema = DatasetParquetReadHelper.getProjectedMessageType(this.getParquetFileSchema(), this.options, config);
        DatasetParquetReadHelper.setOptionsFromMetadata(this.options, this.getKeyValueMetadata());
        this.recordSchema = DatasetParquetReadHelper.getRecordSchema(this.requestedParquetSchema, this.getKeyValueMetadata(), this.options);
    }

    public ParquetMetadata getParquetMetadata() {
        return this.parquetMeta;
    }

    public FileMetaData getFileMetaData() {
        return this.parquetMeta.getFileMetaData();
    }

    public Map<String, String> getKeyValueMetadata() {
        return this.getFileMetaData().getKeyValueMetaData();
    }

    public ParquetOptions getParquetOptions() {
        return this.options;
    }

    public MessageType getParquetFileSchema() {
        return this.getFileMetaData().getSchema();
    }

    public MessageType getRequestedParquetSchema() {
        return this.requestedParquetSchema;
    }

    public RowSchema getRecordSchema() {
        return this.recordSchema;
    }

    static void setOptionsFromMetadata(ParquetOptions options, Map<String, String> extraMetadata) {
        if (extraMetadata.containsKey("parquet.time.zone-id")) {
            options.put("parquet.time.zone-id", extraMetadata.get("parquet.time.zone-id"));
        }
        if (extraMetadata.containsKey("parquet.time.zone-offset-id")) {
            options.put("parquet.time.zone-offset-id", extraMetadata.get("parquet.time.zone-offset-id"));
        }
        if (extraMetadata.containsKey("parquet.ca.version")) {
            options.put("parquet.ca.version", extraMetadata.get("parquet.ca.version"));
        }
        options.computeIfAbsent("parquet.ca.version", k -> {
            if (extraMetadata.containsKey("rowType")) {
                return ParquetVersionEnum.LEGACY.name();
            }
            return ParquetVersionEnum.VERSION_1.name();
        });
        if (options.getParquetCAVersion() == ParquetVersionEnum.LEGACY) {
            LOGGER.debug("Forcing legacy parquet options");
            options.setLegacyParquetCompatibility();
            if (!extraMetadata.containsKey("parquet.ca.version")) {
                options.put("parquet.legacy.zone-offset-in-minutes-behind-utc", "true");
            }
        }
    }

    static RowSchema getRecordSchema(MessageType requestedParquetSchema, Map<String, String> keyValueMetadata, ParquetOptions options) {
        RowSchema decodedSchema = null;
        if (keyValueMetadata.containsKey("com.ibm.bi.platform.dataset.parquet.row.metadata")) {
            decodedSchema = DatasetParquetReadHelper.decodeRecordSchemaFromJson(keyValueMetadata.get("com.ibm.bi.platform.dataset.parquet.row.metadata"));
        } else if (keyValueMetadata.containsKey("rowType")) {
            decodedSchema = DatasetParquetReadHelper.decodeRecordSchemaFromRequestedPQSchema(requestedParquetSchema, keyValueMetadata.get("rowType"));
        }
        if (null != decodedSchema) {
            LOGGER.debug("Decoded record schema from parquet metadata: {}", (Object)decodedSchema);
            return DatasetParquetReadHelper.projectRecordSchema(decodedSchema, requestedParquetSchema.getFields().stream().map(Type::getName).collect(Collectors.toList()));
        }
        return DatasetParquetReadHelper.inferRecordSchema(requestedParquetSchema, options);
    }

    private static RowSchema inferRecordSchema(MessageType parquetSchema, ParquetOptions options) {
        TabularRowSchema recordSchema = new TabularRowSchema();
        for (int fieldIndex = 0; fieldIndex < parquetSchema.getFieldCount(); ++fieldIndex) {
            Type fieldType = parquetSchema.getType(fieldIndex);
            ColumnMetadata colMeta = DatasetParquetReadHelper.inferColumnMetadata(fieldType, options);
            recordSchema.appendColumn(colMeta);
        }
        LOGGER.debug("Inferred record schema: {}", (Object)recordSchema);
        return recordSchema;
    }

    private static ColumnMetadata inferColumnMetadata(Type parquetType, ParquetOptions options) {
        BaseType recordType;
        boolean nullable;
        String name;
        block25: {
            block23: {
                PrimitiveType primitiveType;
                block24: {
                    name = parquetType.getName();
                    nullable = parquetType.isRepetition(Type.Repetition.OPTIONAL);
                    recordType = null;
                    if (!parquetType.isPrimitive()) break block23;
                    primitiveType = parquetType.asPrimitiveType();
                    if (null == primitiveType.getOriginalType()) break block24;
                    switch (primitiveType.getOriginalType()) {
                        case INT_8: {
                            recordType = TypeFactory.BYTE_TYPE;
                            break block25;
                        }
                        case INT_16: {
                            recordType = TypeFactory.SHORT_TYPE;
                            break block25;
                        }
                        case INT_32: {
                            recordType = TypeFactory.INTEGER_TYPE;
                            break block25;
                        }
                        case INT_64: {
                            recordType = TypeFactory.LONG_TYPE;
                            break block25;
                        }
                        case UTF8: {
                            recordType = TypeFactory.STRING_TYPE;
                            break block25;
                        }
                        case DECIMAL: {
                            recordType = TypeFactory.getDecimalType(primitiveType.getDecimalMetadata());
                            break block25;
                        }
                        case DATE: {
                            recordType = TypeFactory.DATE_TYPE;
                            break block25;
                        }
                        case TIME_MILLIS: {
                            recordType = TypeFactory.TIME_TYPE;
                            break block25;
                        }
                        case TIMESTAMP_MILLIS: {
                            recordType = TypeFactory.TIMESTAMP_TYPE;
                            break block25;
                        }
                        default: {
                            throw new ParquetDecodingException("Unsupported OriginalType: " + primitiveType.getOriginalType().name());
                        }
                    }
                }
                switch (primitiveType.getPrimitiveTypeName()) {
                    case BOOLEAN: {
                        recordType = TypeFactory.BOOLEAN_TYPE;
                        break;
                    }
                    case FLOAT: {
                        recordType = TypeFactory.FLOAT_TYPE;
                        break;
                    }
                    case DOUBLE: {
                        recordType = TypeFactory.DOUBLE_TYPE;
                        break;
                    }
                    case INT32: {
                        recordType = TypeFactory.INTEGER_TYPE;
                        break;
                    }
                    case INT64: {
                        if (options.isDateAsEpochMillisEnabled()) {
                            recordType = TypeFactory.DATE_TYPE;
                            break;
                        }
                        recordType = TypeFactory.LONG_TYPE;
                        break;
                    }
                    case INT96: {
                        recordType = TypeFactory.TIMESTAMP_TYPE;
                        break;
                    }
                    case BINARY: 
                    case FIXED_LEN_BYTE_ARRAY: {
                        recordType = TypeFactory.BINARY_TYPE;
                    }
                }
                break block25;
            }
            GroupType groupType = parquetType.asGroupType();
            if (groupType.containsField("ms") && groupType.containsField("ns")) {
                recordType = groupType.containsField("tz") ? TypeFactory.TIMESTAMP_TZ_TYPE : TypeFactory.TIMESTAMP_TYPE;
            } else {
                LOGGER.error("Failed to infer metadata for unrecognized GroupType: {}", (Object)groupType);
                throw new ParquetDecodingException("Unsupported GroupType: " + groupType);
            }
        }
        return new Column(name, nullable, recordType);
    }

    private static RowSchema decodeRecordSchemaFromJson(String jsonString) {
        try {
            JSONObject json = JSONObject.parse((String)jsonString);
            return TabularRowSchema.fromJson(json);
        }
        catch (Exception ex) {
            throw new ParquetDecodingException("Failed to decode record schema from parquet metadata", (Throwable)ex);
        }
    }

    private static RowSchema decodeRecordSchemaFromRequestedPQSchema(MessageType thePQTypeFields, String theXqeRowTypeString) {
        TabularRowSchema rowSchema = new TabularRowSchema();
        for (Type fieldType : thePQTypeFields.getFields()) {
            String columnName = fieldType.getName();
            if (fieldType.isPrimitive()) {
                Column columnMetadata = null;
                OriginalType originalType = fieldType.getOriginalType();
                if (null != originalType) {
                    switch (originalType) {
                        case INT_8: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.BYTE_TYPE);
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case INT_16: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.SHORT_TYPE);
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case INT_32: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.INTEGER_TYPE);
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case INT_64: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.LONG_TYPE);
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case DATE: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.DATE_TYPE);
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case DECIMAL: {
                            columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.getDecimalType(fieldType.asPrimitiveType().getDecimalMetadata()));
                            rowSchema.appendColumn(columnMetadata);
                            break;
                        }
                        case UTF8: {
                            rowSchema.appendColumn(DatasetParquetReadHelper.inferColumnFromXQERowType(fieldType, theXqeRowTypeString));
                            break;
                        }
                        default: {
                            LOGGER.debug("Data type for column name '{}' cannot be inferred from parquet original field type. Falling to primitive type information provided by the parquet type for inference.", (Object)columnName);
                        }
                    }
                }
                if (null != columnMetadata) continue;
                switch (fieldType.asPrimitiveType().getPrimitiveTypeName()) {
                    case INT32: {
                        columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.INTEGER_TYPE);
                        rowSchema.appendColumn(columnMetadata);
                        break;
                    }
                    case INT64: 
                    case INT96: {
                        rowSchema.appendColumn(DatasetParquetReadHelper.inferColumnFromXQERowType(fieldType, theXqeRowTypeString));
                        break;
                    }
                    case BINARY: 
                    case FIXED_LEN_BYTE_ARRAY: {
                        columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.BINARY_TYPE);
                        rowSchema.appendColumn(columnMetadata);
                        break;
                    }
                    case FLOAT: {
                        columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.FLOAT_TYPE);
                        rowSchema.appendColumn(columnMetadata);
                        break;
                    }
                    case DOUBLE: {
                        columnMetadata = new Column(columnName, Type.Repetition.OPTIONAL == fieldType.getRepetition(), TypeFactory.DOUBLE_TYPE);
                        rowSchema.appendColumn(columnMetadata);
                        break;
                    }
                    default: {
                        throw new ParquetDecodingException(String.format("Cannot decode primitive parquet type (%s) for column (%s).", fieldType.asPrimitiveType().getPrimitiveTypeName().name(), columnName));
                    }
                }
                continue;
            }
            rowSchema.appendColumn(DatasetParquetReadHelper.inferColumnFromXQERowType(fieldType, theXqeRowTypeString));
        }
        return rowSchema;
    }

    private static ColumnMetadata inferColumnFromXQERowType(Type theFieldType, String xqeRowTypeString) {
        String columnName = theFieldType.getName();
        String columnNameWithQuotes = "\"" + columnName + "\"";
        int openingQuoteColumnIndx = xqeRowTypeString.indexOf(columnNameWithQuotes);
        int closingQuoteColumnIndx = openingQuoteColumnIndx + columnName.length() + 1;
        if (openingQuoteColumnIndx < 0) {
            throw new ParquetDecodingException(String.format("Column (%s) cannot be found in the XQE rowType [%s].", columnName, xqeRowTypeString));
        }
        String typePart = xqeRowTypeString.substring(closingQuoteColumnIndx + 1);
        int commaIndex = xqeRowTypeString.indexOf(44, closingQuoteColumnIndx + 1);
        int closingParenthesisIndex = xqeRowTypeString.indexOf(41, closingQuoteColumnIndx + 1);
        if (commaIndex < 0 && closingParenthesisIndex < 0) {
            throw new ParquetDecodingException(String.format("Invalid XQE type part (%s) associated with column (%s).", typePart, columnName));
        }
        String refinedTypePart = commaIndex > 0 ? xqeRowTypeString.substring(closingQuoteColumnIndx + 1, commaIndex).trim() : xqeRowTypeString.substring(closingQuoteColumnIndx + 1, closingParenthesisIndex).trim();
        if (TypeFactory.INTERVAL_DAY_TIME_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.INTERVAL_DAY_TIME_TYPE);
        }
        if (TypeFactory.INTERVAL_YEAR_MONTH_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.INTERVAL_YEAR_MONTH_TYPE);
        }
        if (null != theFieldType.getOriginalType() && OriginalType.UTF8 == theFieldType.getOriginalType()) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.STRING_TYPE);
        }
        if (TypeFactory.LONG_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.LONG_TYPE);
        }
        if (TypeFactory.DATE_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.DATE_TYPE);
        }
        if (TypeFactory.TIME_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.TIME_TYPE);
        }
        if (TypeFactory.TIME_TZ_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart) || "TIME TIME ZONE".equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.TIME_TZ_TYPE);
        }
        if (TypeFactory.TIMESTAMP_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.TIMESTAMP_TYPE);
        }
        if (TypeFactory.TIMESTAMP_TZ_TYPE.toXQETypeString().equalsIgnoreCase(refinedTypePart)) {
            return new Column(columnName, Type.Repetition.OPTIONAL == theFieldType.getRepetition(), TypeFactory.TIMESTAMP_TZ_TYPE);
        }
        throw new ParquetDecodingException(String.format("Invalid XQE type part (%s) associated with column (%s).", refinedTypePart, columnName));
    }

    private static RowSchema projectRecordSchema(RowSchema recordSchema, List<String> fields) {
        TabularRowSchema projectedSchema = new TabularRowSchema();
        for (String name : fields) {
            ColumnMetadata col = recordSchema.getColumnMetadata(name);
            projectedSchema.appendColumn(col);
        }
        return projectedSchema;
    }

    static MessageType getProjectedMessageType(MessageType fileSchema, ParquetOptions options, Configuration config) {
        String projectionSchemaStr = options.getOrDefault("parquet.projection.schema", config.get("parquet.projection.schema"));
        String projectionFieldsStr = options.getOrDefault("parquet.projection.fields", config.get("parquet.projection.fields"));
        if (StringUtils.isNoneEmpty((CharSequence[])new CharSequence[]{projectionSchemaStr})) {
            LOGGER.debug("Parquet metadata contains projection schema: {}", (Object)projectionSchemaStr);
            return ReadSupport.getSchemaForRead((MessageType)fileSchema, (String)projectionSchemaStr);
        }
        if (StringUtils.isNoneEmpty((CharSequence[])new CharSequence[]{projectionFieldsStr})) {
            LOGGER.debug("Parquet metadata contains projection fields: {}", (Object)projectionFieldsStr);
            List<String> projectedFieldList = DatasetParquetReadHelper.parseStringList(projectionFieldsStr);
            List<Type> fields = DatasetParquetReadHelper.selectParquetTypes(fileSchema, projectedFieldList);
            return ReadSupport.getSchemaForRead((MessageType)fileSchema, (MessageType)new MessageType("SCHEMA", fields));
        }
        return fileSchema;
    }

    private static List<String> parseStringList(String fieldListStr) {
        String[] elements = fieldListStr.split(",");
        ArrayList<String> fieldList = new ArrayList<String>(elements.length);
        for (String e : elements) {
            String name = e.trim();
            if (!StringUtils.isNoneEmpty((CharSequence[])new CharSequence[]{name})) continue;
            fieldList.add(name);
        }
        return fieldList;
    }

    private static List<Type> selectParquetTypes(MessageType schema, List<String> fields) {
        ArrayList<Type> selectedTypes = new ArrayList<Type>(fields.size());
        for (String name : fields) {
            Type fieldType = schema.getType(name);
            if (null == fieldType) {
                throw new ParquetDecodingException("Selected field not found: " + name);
            }
            selectedTypes.add(fieldType);
        }
        return selectedTypes;
    }
}

