/*
 * Decompiled with CFR 0.152.
 */
package com.ibm.dltj.uima_annotator.flow;

import com.ibm.dltj.DLTException;
import com.ibm.dltj.Dictionary;
import com.ibm.dltj.fst.MatchBuffer;
import com.ibm.dltj.gloss.TokenClassGloss;
import com.ibm.dltj.uima_annotator.flow.LrwCasCopier;
import com.ibm.langware.annotator.Copyright;
import com.ibm.langware.annotator.Messages;
import java.io.IOException;
import java.io.InputStream;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.HashSet;
import java.util.Set;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasMultiplier_ImplBase;
import org.apache.uima.cas.AbstractCas;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;

@Copyright(value="\n\n(C) Copyright IBM Corp. 2006, 2013.\n\n")
public class ChunkerAnnotator
extends CasMultiplier_ImplBase {
    private Logger _logger;
    private int _segmentSize;
    private int _range;
    private CharacterIterator _characterIter;
    private TextChunkSeparator separator;
    private boolean _chunkingActive;
    private String _document;
    private int _documentSize;
    private int _offset;
    private CAS _sourceCas;
    private final Set<String> _discardTypes = new HashSet<String>();
    private String _stateTypeName;
    static final int SEGMENT_IDENTIFIER_STANDALONE = 0;
    private static final int SEGMENT_IDENTIFIER_START = 1;
    private static final int SEGMENT_IDENTIFIER_INTERMEDIATE = 2;
    static final int SEGMENT_IDENTIFIER_END = 4;
    static final String SEGMENT_STATE = "segmentState";

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this._logger = uimaContext.getLogger();
        this._stateTypeName = (String)uimaContext.getConfigParameterValue("StateType");
        this._segmentSize = (Integer)uimaContext.getConfigParameterValue("SegmentSize");
        this._logger.log(Level.CONFIG, Messages.getFormattedMessage("ChunkerAnnotator.segsize", this._segmentSize));
        String[] stringArray = (String[])uimaContext.getConfigParameterValue("DiscardTypes");
        for (String string : stringArray) {
            this._discardTypes.add(string);
        }
        try {
            InputStream inputStream = uimaContext.getResourceAsStream("BreakRuleDictionary");
            if (inputStream != null) {
                this.separator = new TextChunkSeparator(inputStream);
                inputStream.close();
            }
        }
        catch (ResourceAccessException resourceAccessException) {
            this._logger.log(Level.SEVERE, Messages.getMessage("ChunkerAnnotator.initerror"), (Throwable)resourceAccessException);
        }
        catch (DLTException dLTException) {
            this._logger.log(Level.SEVERE, Messages.getMessage("ChunkerAnnotator.initerror"), (Throwable)dLTException);
        }
        catch (IOException iOException) {
            this._logger.log(Level.SEVERE, Messages.getMessage("ChunkerAnnotator.initerror"), (Throwable)iOException);
        }
        this._range = this._segmentSize / 20;
        if (this._range < 200) {
            this._range = 200;
        }
        if (this._segmentSize < this._range) {
            this._range = this._segmentSize;
        }
    }

    private void reset() {
        this._document = null;
        this._documentSize = 0;
        this._offset = 0;
        this._sourceCas = null;
        this._chunkingActive = false;
    }

    public void process(CAS cAS) {
        this._sourceCas = cAS;
        this._chunkingActive = false;
        this._document = this._sourceCas.getDocumentText();
        this._documentSize = this._document.length();
        if (this._documentSize <= this._segmentSize) {
            this.reset();
            return;
        }
        TypeSystem typeSystem = this._sourceCas.getTypeSystem();
        Type type = typeSystem.getType(this._stateTypeName);
        if (type == null) {
            this.reset();
            return;
        }
        this._offset = 0;
        this._characterIter = new StringCharacterIterator(this._document);
        this._chunkingActive = true;
    }

    public boolean hasNext() {
        if (!this._chunkingActive) {
            return false;
        }
        return this._offset < this._documentSize;
    }

    public AbstractCas next() {
        CAS cAS = this.getEmptyCAS();
        cAS.setDocumentLanguage(this._sourceCas.getDocumentLanguage());
        int n = this._offset + this._segmentSize;
        int n2 = 0;
        if (this._offset == 0) {
            n2 = 1;
        }
        if (n > this._documentSize) {
            n = this._documentSize;
            n2 |= 4;
        }
        if (n2 == 0) {
            n2 = 2;
        }
        this._characterIter.setIndex(this._offset);
        int n3 = this.separator.getBestBreakPoint(this._characterIter, n - this._range, this._range);
        cAS.setDocumentText(this._document.substring(this._offset, n3));
        this.copyAnnotations(cAS, this._offset, n3);
        TypeSystem typeSystem = cAS.getTypeSystem();
        Type type = typeSystem.getType(this._stateTypeName);
        Feature feature = type.getFeatureByBaseName(SEGMENT_STATE);
        AnnotationFS annotationFS = cAS.createAnnotation(type, 0, 0);
        annotationFS.setIntValue(feature, n2);
        cAS.getIndexRepository().addFS((FeatureStructure)annotationFS);
        this._offset = n3;
        if (this._offset >= this._documentSize) {
            this.reset();
        }
        return cAS;
    }

    private void copyAnnotations(CAS cAS, int n, int n2) {
        LrwCasCopier lrwCasCopier = new LrwCasCopier(this._sourceCas, cAS);
        HashSet<Object> hashSet = new HashSet<Object>();
        hashSet.add(this._sourceCas.getDocumentAnnotation());
        TypeSystem typeSystem = this._sourceCas.getTypeSystem();
        TypeSystem typeSystem2 = cAS.getTypeSystem();
        Feature feature = typeSystem.getFeatureByFullName("uima.tcas.Annotation:begin");
        Feature feature2 = typeSystem.getFeatureByFullName("uima.tcas.Annotation:end");
        Feature feature3 = typeSystem2.getFeatureByFullName("uima.tcas.Annotation:begin");
        Feature feature4 = typeSystem2.getFeatureByFullName("uima.tcas.Annotation:end");
        for (FeatureStructure featureStructure : this._sourceCas.getAnnotationIndex()) {
            int n3;
            int n4;
            if (this._discardTypes.contains(featureStructure.getType().getName()) || hashSet.contains(featureStructure)) continue;
            hashSet.add(featureStructure);
            int n5 = featureStructure.getIntValue(feature);
            int n6 = featureStructure.getIntValue(feature2);
            if (n5 == 0 && n6 == 0) {
                n4 = 0;
                n3 = 0;
            } else {
                n4 = n5 - n;
                n3 = n6 - n;
            }
            if (n3 < 0 || n4 >= n2) continue;
            if (n3 > n2) {
                n3 = n2;
            }
            FeatureStructure featureStructure2 = lrwCasCopier.copyFS(featureStructure);
            featureStructure2.setIntValue(feature3, n4);
            featureStructure2.setIntValue(feature4, n3);
            cAS.addFsToIndexes(featureStructure2);
        }
    }

    private static class TextChunkSeparator {
        private final MatchBuffer _matchBuffer = new MatchBuffer();
        private final Dictionary _breakRule;

        TextChunkSeparator(InputStream inputStream) throws DLTException {
            this._breakRule = new Dictionary(inputStream, 0);
        }

        int getBestBreakPoint(CharacterIterator characterIterator, int n, int n2) {
            TokenClassGloss tokenClassGloss;
            int n3;
            int n4 = characterIterator.getEndIndex();
            int n5 = n > n4 ? n4 : n;
            int n6 = n + n2;
            if (n6 >= n4) {
                return n4;
            }
            if (this._breakRule == null) {
                return n5;
            }
            characterIterator.setIndex(n5);
            int n7 = n6;
            this._matchBuffer.clear();
            int n8 = n;
            while (n8 < n6 && this._breakRule.lookupLongest(characterIterator, this._matchBuffer) && (n3 = (tokenClassGloss = (TokenClassGloss)this._matchBuffer.gloss[0].first()).getMatchEnd(n8, this._matchBuffer.index[0])) > n5 && n3 <= n6) {
                n5 = n3;
                if (tokenClassGloss.is(32)) {
                    return n5;
                }
                if (tokenClassGloss.is(16)) {
                    n7 = n5;
                }
                characterIterator.setIndex(n5);
                n8 = characterIterator.getIndex();
            }
            if (n7 < n6) {
                return n7;
            }
            return n5;
        }
    }
}

