/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.MultiTokenTag;
import edu.stanford.nlp.ling.tokensregex.EnvLookup;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.ChunkAnnotationUtils;
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.ArrayMap;
import edu.stanford.nlp.util.CollectionValuedMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.XMLUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CleanXmlAnnotator
implements Annotator {
    private final Pattern xmlTagMatcher;
    public static final String DEFAULT_XML_TAGS = ".*";
    private final Pattern sentenceEndingTagMatcher;
    public static final String DEFAULT_SENTENCE_ENDERS = "";
    private Pattern singleSentenceTagMatcher = null;
    public static final String DEFAULT_SINGLE_SENTENCE_TAGS = null;
    private final Pattern dateTagMatcher;
    public static final String DEFAULT_DATE_TAGS = "datetime|date";
    private Pattern docIdTagMatcher;
    public static final String DEFAULT_DOCID_TAGS = "docid";
    private Pattern docTypeTagMatcher;
    public static final String DEFAULT_DOCTYPE_TAGS = "doctype";
    private Pattern utteranceTurnTagMatcher = null;
    public static final String DEFAULT_UTTERANCE_TURN_TAGS = "turn";
    private Pattern speakerTagMatcher = null;
    public static final String DEFAULT_SPEAKER_TAGS = "speaker";
    private final CollectionValuedMap<Class, Pair<Pattern, Pattern>> docAnnotationPatterns = new CollectionValuedMap();
    public static final String DEFAULT_DOC_ANNOTATIONS_PATTERNS = "docID=doc[id],doctype=doc[type],docsourcetype=doctype[source]";
    private final CollectionValuedMap<Class, Pair<Pattern, Pattern>> tokenAnnotationPatterns = new CollectionValuedMap();
    public static final String DEFAULT_TOKEN_ANNOTATIONS_PATTERNS = null;
    private Pattern sectionTagMatcher = null;
    public static final String DEFAULT_SECTION_TAGS = null;
    private Pattern ssplitDiscardTokensMatcher = null;
    private final CollectionValuedMap<Class, Pair<Pattern, Pattern>> sectionAnnotationPatterns = new CollectionValuedMap();
    public static final String DEFAULT_SECTION_ANNOTATIONS_PATTERNS = null;
    private final boolean allowFlawedXml;
    public static final boolean DEFAULT_ALLOW_FLAWS = true;
    private static final Pattern TAG_ATTR_PATTERN = Pattern.compile("(.*)\\[(.*)\\]");

    public CleanXmlAnnotator() {
        this(DEFAULT_XML_TAGS, DEFAULT_SENTENCE_ENDERS, DEFAULT_DATE_TAGS, true);
    }

    public CleanXmlAnnotator(String xmlTagsToRemove, String sentenceEndingTags, String dateTags, boolean allowFlawedXml) {
        this.allowFlawedXml = allowFlawedXml;
        if (xmlTagsToRemove != null) {
            this.xmlTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(xmlTagsToRemove);
            this.sentenceEndingTagMatcher = sentenceEndingTags != null && sentenceEndingTags.length() > 0 ? CleanXmlAnnotator.toCaseInsensitivePattern(sentenceEndingTags) : null;
        } else {
            this.xmlTagMatcher = null;
            this.sentenceEndingTagMatcher = null;
        }
        this.dateTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(dateTags);
    }

    private static Pattern toCaseInsensitivePattern(String tags) {
        if (tags != null) {
            return Pattern.compile(tags, 2);
        }
        return null;
    }

    public void setSsplitDiscardTokensMatcher(String tags) {
        this.ssplitDiscardTokensMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(tags);
    }

    public void setSingleSentenceTagMatcher(String tags) {
        this.singleSentenceTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(tags);
    }

    public void setDocIdTagMatcher(String docIdTags) {
        this.docIdTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(docIdTags);
    }

    public void setDocTypeTagMatcher(String docTypeTags) {
        this.docTypeTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(docTypeTags);
    }

    public void setSectionTagMatcher(String sectionTags) {
        this.sectionTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(sectionTags);
    }

    public void setDiscourseTags(String utteranceTurnTags, String speakerTags) {
        this.utteranceTurnTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(utteranceTurnTags);
        this.speakerTagMatcher = CleanXmlAnnotator.toCaseInsensitivePattern(speakerTags);
    }

    public void setDocAnnotationPatterns(String conf) {
        this.docAnnotationPatterns.clear();
        CleanXmlAnnotator.addAnnotationPatterns(this.docAnnotationPatterns, conf, true);
    }

    public void setTokenAnnotationPatterns(String conf) {
        this.tokenAnnotationPatterns.clear();
        CleanXmlAnnotator.addAnnotationPatterns(this.tokenAnnotationPatterns, conf, true);
    }

    public void setSectionAnnotationPatterns(String conf) {
        this.sectionAnnotationPatterns.clear();
        CleanXmlAnnotator.addAnnotationPatterns(this.sectionAnnotationPatterns, conf, false);
    }

    private static void addAnnotationPatterns(CollectionValuedMap<Class, Pair<Pattern, Pattern>> annotationPatterns, String conf, boolean attrOnly) {
        String[] annoPatternStrings;
        for (String annoPatternString : annoPatternStrings = conf == null ? StringUtils.EMPTY_STRING_ARRAY : conf.trim().split("\\s*,\\s*")) {
            Pattern tagPattern;
            String[] annoPattern = annoPatternString.split("\\s*=\\s*", 2);
            if (annoPattern.length != 2) {
                throw new IllegalArgumentException("Invalid annotation to tag pattern: " + annoPatternString);
            }
            String annoKeyString = annoPattern[0];
            String pattern = annoPattern[1];
            Class annoKey = EnvLookup.lookupAnnotationKeyWithClassname(null, annoKeyString);
            if (annoKey == null) {
                throw new IllegalArgumentException("Cannot resolve annotation key " + annoKeyString);
            }
            Matcher m = TAG_ATTR_PATTERN.matcher(pattern);
            if (m.matches()) {
                tagPattern = CleanXmlAnnotator.toCaseInsensitivePattern(m.group(1));
                Pattern attrPattern = CleanXmlAnnotator.toCaseInsensitivePattern(m.group(2));
                annotationPatterns.add(annoKey, Pair.makePair(tagPattern, attrPattern));
                continue;
            }
            if (attrOnly) {
                throw new IllegalArgumentException("Invalid tag pattern: " + pattern + " for annotation key " + annoKeyString);
            }
            tagPattern = CleanXmlAnnotator.toCaseInsensitivePattern(pattern);
            annotationPatterns.add(annoKey, Pair.makePair(tagPattern, null));
        }
    }

    @Override
    public void annotate(Annotation annotation) {
        if (annotation.containsKey(CoreAnnotations.TokensAnnotation.class)) {
            List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> newTokens = this.process(annotation, tokens);
            annotation.set(CoreAnnotations.TokensAnnotation.class, newTokens);
        }
    }

    public List<CoreLabel> process(List<CoreLabel> tokens) {
        return this.process(null, tokens);
    }

    private static String tokensToString(Annotation annotation, List<CoreLabel> tokens) {
        String annotationText;
        if (tokens.isEmpty()) {
            return DEFAULT_SENTENCE_ENDERS;
        }
        String string = annotationText = annotation != null ? (String)annotation.get(CoreAnnotations.TextAnnotation.class) : null;
        if (annotationText != null) {
            CoreLabel firstToken = tokens.get(0);
            CoreLabel lastToken = tokens.get(tokens.size() - 1);
            int firstCharOffset = (Integer)firstToken.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
            int lastCharOffset = (Integer)lastToken.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
            return annotationText.substring(firstCharOffset, lastCharOffset);
        }
        return StringUtils.joinWords(tokens, " ");
    }

    private static Set<Class> annotateWithTag(Annotation annotation, CoreMap cm, XMLUtils.XMLTag tag, CollectionValuedMap<Class, Pair<Pattern, Pattern>> annotationPatterns, Map<Class, List<CoreLabel>> savedTokens, Collection<Class> toAnnotate, Map<Class, Stack<Pair<String, String>>> savedTokenAnnotations) {
        HashSet<Class> foundAnnotations = new HashSet<Class>();
        if (annotationPatterns == null) {
            return foundAnnotations;
        }
        if (toAnnotate == null) {
            toAnnotate = annotationPatterns.keySet();
        }
        block0: for (Class key : toAnnotate) {
            Iterator iterator = annotationPatterns.get(key).iterator();
            while (iterator.hasNext()) {
                Pair pattern = (Pair)iterator.next();
                Pattern tagPattern = (Pattern)pattern.first;
                Pattern attrPattern = (Pattern)pattern.second;
                if (!tagPattern.matcher(tag.name).matches()) continue;
                boolean matched = false;
                if (attrPattern != null) {
                    Stack<Pair<String, String>> stack;
                    if (tag.attributes != null) {
                        for (Map.Entry<String, String> entry : tag.attributes.entrySet()) {
                            if (!attrPattern.matcher(entry.getKey()).matches()) continue;
                            if (savedTokenAnnotations != null) {
                                Stack<Pair<String, String>> stack2 = savedTokenAnnotations.get(key);
                                if (stack2 == null) {
                                    stack2 = new Stack();
                                    savedTokenAnnotations.put(key, stack2);
                                }
                                stack2.push(Pair.makePair(tag.name, entry.getValue()));
                            }
                            cm.set(key, entry.getValue());
                            foundAnnotations.add(key);
                            matched = true;
                            break;
                        }
                    }
                    if (savedTokenAnnotations != null && tag.isEndTag && (stack = savedTokenAnnotations.get(key)) != null && !stack.isEmpty()) {
                        Pair<String, String> p = stack.peek();
                        if (((String)p.first).equalsIgnoreCase(tag.name)) {
                            stack.pop();
                            if (!stack.isEmpty()) {
                                cm.set(key, stack.peek().second);
                            } else {
                                cm.remove(key);
                            }
                        }
                    }
                } else if (savedTokens != null) {
                    if (tag.isEndTag && !tag.isSingleTag) {
                        List<CoreLabel> saved = savedTokens.remove(key);
                        if (saved != null && saved.size() > 0) {
                            cm.set(key, CleanXmlAnnotator.tokensToString(annotation, saved));
                            foundAnnotations.add(key);
                            matched = true;
                        }
                    } else {
                        savedTokens.put(key, new ArrayList());
                    }
                }
                if (!matched) continue;
                continue block0;
            }
        }
        return foundAnnotations;
    }

    public List<CoreLabel> process(Annotation annotation, List<CoreLabel> tokens) {
        CoreLabel lastToken;
        Stack<String> enclosingTags = new Stack<String>();
        List currentTagSet = null;
        int matchDepth = 0;
        ArrayList<CoreLabel> newTokens = new ArrayList<CoreLabel>();
        StringBuilder removedText = new StringBuilder();
        HashSet<Class> toAnnotate = new HashSet<Class>(this.docAnnotationPatterns.keySet());
        int utteranceIndex = 0;
        boolean inUtterance = false;
        boolean inSpeakerTag = false;
        String currentSpeaker = null;
        ArrayList<CoreLabel> speakerTokens = new ArrayList<CoreLabel>();
        ArrayList<CoreLabel> docDateTokens = new ArrayList<CoreLabel>();
        ArrayList<CoreLabel> docTypeTokens = new ArrayList<CoreLabel>();
        ArrayList<CoreLabel> docIdTokens = new ArrayList<CoreLabel>();
        ArrayCoreMap tokenAnnotations = this.tokenAnnotationPatterns != null && !this.tokenAnnotationPatterns.isEmpty() ? new ArrayCoreMap() : null;
        ArrayMap<Class, Stack<Pair<String, String>>> savedTokenAnnotations = new ArrayMap<Class, Stack<Pair<String, String>>>();
        XMLUtils.XMLTag sectionStartTag = null;
        CoreLabel sectionStartToken = null;
        ArrayCoreMap sectionAnnotations = null;
        HashMap<Class, List<CoreLabel>> savedTokensForSection = new HashMap<Class, List<CoreLabel>>();
        boolean markSingleSentence = false;
        for (CoreLabel token : tokens) {
            CoreLabel previous;
            String word = token.word().trim();
            XMLUtils.XMLTag tag = XMLUtils.parseTag(word);
            if (tag == null) {
                token.setWord(XMLUtils.unescapeStringForXML(token.word()));
                if (matchDepth > 0 || this.xmlTagMatcher == null || this.xmlTagMatcher.matcher(DEFAULT_SENTENCE_ENDERS).matches()) {
                    newTokens.add(token);
                    if (inUtterance) {
                        token.set(CoreAnnotations.UtteranceAnnotation.class, utteranceIndex);
                        if (currentSpeaker != null) {
                            token.set(CoreAnnotations.SpeakerAnnotation.class, currentSpeaker);
                        }
                    }
                    if (markSingleSentence) {
                        token.set(CoreAnnotations.ForcedSentenceUntilEndAnnotation.class, true);
                        markSingleSentence = false;
                    }
                    if (tokenAnnotations != null) {
                        ChunkAnnotationUtils.copyUnsetAnnotations(tokenAnnotations, token);
                    }
                }
                if (removedText.length() > 0) {
                    boolean added = false;
                    String before = (String)token.get(CoreAnnotations.BeforeAnnotation.class);
                    if (before != null) {
                        token.set(CoreAnnotations.BeforeAnnotation.class, removedText + (String)before);
                        added = true;
                    }
                    if (added && newTokens.size() > 1) {
                        CoreLabel previous2 = (CoreLabel)newTokens.get(newTokens.size() - 2);
                        String after = (String)previous2.get(CoreAnnotations.AfterAnnotation.class);
                        if (after != null) {
                            previous2.set(CoreAnnotations.AfterAnnotation.class, after + removedText);
                        } else {
                            previous2.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
                        }
                    }
                    removedText = new StringBuilder();
                }
                if (currentTagSet == null) {
                    currentTagSet = Collections.unmodifiableList(new ArrayList(enclosingTags));
                }
                token.set(CoreAnnotations.XmlContextAnnotation.class, currentTagSet);
                if (this.dateTagMatcher != null && currentTagSet.size() > 0 && this.dateTagMatcher.matcher((CharSequence)currentTagSet.get(currentTagSet.size() - 1)).matches()) {
                    docDateTokens.add(token);
                }
                if (this.docIdTagMatcher != null && currentTagSet.size() > 0 && this.docIdTagMatcher.matcher((CharSequence)currentTagSet.get(currentTagSet.size() - 1)).matches()) {
                    docIdTokens.add(token);
                }
                if (this.docTypeTagMatcher != null && currentTagSet.size() > 0 && this.docTypeTagMatcher.matcher((CharSequence)currentTagSet.get(currentTagSet.size() - 1)).matches()) {
                    docTypeTokens.add(token);
                }
                if (inSpeakerTag) {
                    speakerTokens.add(token);
                }
                if (sectionStartTag == null) continue;
                boolean okay = true;
                if (this.ssplitDiscardTokensMatcher != null) {
                    boolean bl = okay = !this.ssplitDiscardTokensMatcher.matcher(token.word()).matches();
                }
                if (!okay) continue;
                if (sectionStartToken == null) {
                    sectionStartToken = token;
                }
                for (List saved : savedTokensForSection.values()) {
                    saved.add(token);
                }
                continue;
            }
            String currentRemoval = (String)token.get(CoreAnnotations.BeforeAnnotation.class);
            if (currentRemoval != null) {
                removedText.append(currentRemoval);
            }
            if ((currentRemoval = (String)token.get(CoreAnnotations.OriginalTextAnnotation.class)) != null) {
                removedText.append(currentRemoval);
            }
            if (token == tokens.get(tokens.size() - 1) && (currentRemoval = (String)token.get(CoreAnnotations.AfterAnnotation.class)) != null) {
                removedText.append(currentRemoval);
            }
            if (!toAnnotate.isEmpty() && tag.attributes != null) {
                Set<Class> foundAnnotations = CleanXmlAnnotator.annotateWithTag(annotation, annotation, tag, this.docAnnotationPatterns, null, toAnnotate, null);
                toAnnotate.removeAll(foundAnnotations);
            }
            if (this.sectionTagMatcher != null && this.sectionTagMatcher.matcher(tag.name).matches()) {
                if (tag.isEndTag) {
                    CleanXmlAnnotator.annotateWithTag(annotation, sectionAnnotations, tag, this.sectionAnnotationPatterns, savedTokensForSection, null, null);
                    if (sectionStartToken != null) {
                        sectionStartToken.set(CoreAnnotations.SectionStartAnnotation.class, sectionAnnotations);
                    }
                    if (newTokens.size() > 0) {
                        previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                        previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
                        previous.set(CoreAnnotations.SectionEndAnnotation.class, sectionStartTag.name);
                    }
                    savedTokensForSection.clear();
                    sectionStartTag = null;
                    sectionStartToken = null;
                    sectionAnnotations = null;
                } else if (!tag.isSingleTag) {
                    sectionStartTag = tag;
                    sectionAnnotations = new ArrayCoreMap();
                    sectionAnnotations.set(CoreAnnotations.SectionAnnotation.class, sectionStartTag.name);
                }
            }
            if (sectionStartTag != null) {
                CleanXmlAnnotator.annotateWithTag(annotation, sectionAnnotations, tag, this.sectionAnnotationPatterns, savedTokensForSection, null, null);
            }
            if (tokenAnnotations != null) {
                CleanXmlAnnotator.annotateWithTag(annotation, tokenAnnotations, tag, this.tokenAnnotationPatterns, null, null, savedTokenAnnotations);
            }
            if (this.sentenceEndingTagMatcher != null && this.sentenceEndingTagMatcher.matcher(tag.name).matches() && newTokens.size() > 0) {
                previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
            }
            if (this.utteranceTurnTagMatcher != null && this.utteranceTurnTagMatcher.matcher(tag.name).matches()) {
                if (newTokens.size() > 0) {
                    previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                    previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
                }
                boolean bl = inUtterance = !tag.isEndTag && !tag.isSingleTag;
                if (inUtterance) {
                    ++utteranceIndex;
                }
                if (!inUtterance) {
                    currentSpeaker = null;
                }
            }
            if (this.speakerTagMatcher != null && this.speakerTagMatcher.matcher(tag.name).matches()) {
                if (newTokens.size() > 0) {
                    previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                    previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
                }
                boolean bl = inSpeakerTag = !tag.isEndTag && !tag.isSingleTag;
                if (tag.isEndTag) {
                    currentSpeaker = CleanXmlAnnotator.tokensToString(annotation, speakerTokens);
                    MultiTokenTag.Tag mentionTag = new MultiTokenTag.Tag(currentSpeaker, "Speaker", speakerTokens.size());
                    int i = 0;
                    for (CoreLabel t : speakerTokens) {
                        t.set(CoreAnnotations.SpeakerAnnotation.class, currentSpeaker);
                        t.set(CoreAnnotations.MentionTokenAnnotation.class, new MultiTokenTag(mentionTag, i));
                        ++i;
                    }
                } else {
                    currentSpeaker = null;
                }
                speakerTokens.clear();
            }
            if (this.singleSentenceTagMatcher != null && this.singleSentenceTagMatcher.matcher(tag.name).matches()) {
                if (tag.isEndTag) {
                    if (newTokens.size() > 0) {
                        previous = (CoreLabel)newTokens.get(newTokens.size() - 1);
                        previous.set(CoreAnnotations.ForcedSentenceEndAnnotation.class, true);
                    }
                    markSingleSentence = false;
                } else if (!tag.isSingleTag) {
                    markSingleSentence = true;
                }
            }
            if (this.xmlTagMatcher == null || tag.isSingleTag) continue;
            currentTagSet = null;
            if (tag.isEndTag) {
                block58: {
                    String lastTag;
                    do {
                        if (enclosingTags.isEmpty()) {
                            throw new IllegalArgumentException("Got a close tag " + tag.name + " which does not match" + " any open tag");
                        }
                        lastTag = (String)enclosingTags.pop();
                        if (this.xmlTagMatcher.matcher(lastTag).matches()) {
                            --matchDepth;
                        }
                        if (lastTag.equals(tag.name)) break block58;
                    } while (this.allowFlawedXml);
                    throw new IllegalArgumentException("Mismatched tags... " + tag.name + " closed a " + lastTag + " tag.");
                }
                if (matchDepth < 0) {
                    throw new AssertionError((Object)"Programming error?  We think there have been more close tags than open tags");
                }
                continue;
            }
            enclosingTags.push(tag.name);
            if (!this.xmlTagMatcher.matcher(tag.name).matches()) continue;
            ++matchDepth;
        }
        if (enclosingTags.size() > 0 && !this.allowFlawedXml) {
            throw new IllegalArgumentException("Unclosed tags, starting with " + (String)enclosingTags.pop());
        }
        if (newTokens.size() > 0 && removedText.length() > 0 && (lastToken = (CoreLabel)newTokens.get(newTokens.size() - 1)).get(CoreAnnotations.OriginalTextAnnotation.class) != null) {
            lastToken.set(CoreAnnotations.AfterAnnotation.class, removedText.toString());
        }
        if (annotation != null) {
            String str;
            if (!docIdTokens.isEmpty()) {
                str = CleanXmlAnnotator.tokensToString(annotation, docIdTokens).trim();
                annotation.set(CoreAnnotations.DocIDAnnotation.class, str);
            }
            if (!docDateTokens.isEmpty()) {
                str = CleanXmlAnnotator.tokensToString(annotation, docDateTokens).trim();
                annotation.set(CoreAnnotations.DocDateAnnotation.class, str);
            }
            if (!docTypeTokens.isEmpty()) {
                str = CleanXmlAnnotator.tokensToString(annotation, docTypeTokens).trim();
                annotation.set(CoreAnnotations.DocTypeAnnotation.class, str);
            }
        }
        return newTokens;
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requires() {
        return Collections.singleton(CoreAnnotations.TokensAnnotation.class);
    }

    @Override
    public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
        return Collections.emptySet();
    }
}

