/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.spanish;

import edu.stanford.nlp.io.ReaderInputStream;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasLemma;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.international.spanish.SpanishTreeNormalizer;
import edu.stanford.nlp.trees.international.spanish.SpanishTreebankLanguagePack;
import edu.stanford.nlp.trees.international.spanish.SpanishXMLTreeReaderFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.RuntimeInterruptedException;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.XMLUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class SpanishXMLTreeReader
implements TreeReader {
    private static Redwood.RedwoodChannels log = Redwood.channels(SpanishXMLTreeReader.class);
    private InputStream stream;
    private final TreeNormalizer treeNormalizer;
    private final TreeFactory treeFactory;
    private boolean simplifiedTagset;
    private boolean detailedAnnotations;
    private static final String NODE_SENT = "sentence";
    private static final String ATTR_WORD = "wd";
    private static final String ATTR_LEMMA = "lem";
    private static final String ATTR_FUNC = "func";
    private static final String ATTR_NAMED_ENTITY = "ne";
    private static final String ATTR_POS = "pos";
    private static final String ATTR_POSTYPE = "postype";
    private static final String ATTR_ELLIPTIC = "elliptic";
    private static final String ATTR_PUNCT = "punct";
    private static final String ATTR_GENDER = "gen";
    private static final String ATTR_NUMBER = "num";
    private static final String ATTR_COORDINATING = "coord";
    private static final String ATTR_CLAUSE_TYPE = "clausetype";
    private NodeList sentences;
    private int sentIdx;

    public SpanishXMLTreeReader(String filename, Reader in, boolean simplifiedTagset, boolean aggressiveNormalization, boolean retainNER, boolean detailedAnnotations) {
        SpanishTreebankLanguagePack tlp = new SpanishTreebankLanguagePack();
        this.simplifiedTagset = simplifiedTagset;
        this.detailedAnnotations = detailedAnnotations;
        this.stream = new ReaderInputStream(in, tlp.getEncoding());
        this.treeFactory = new LabeledScoredTreeFactory();
        this.treeNormalizer = new SpanishTreeNormalizer(simplifiedTagset, aggressiveNormalization, retainNER);
        DocumentBuilder parser = XMLUtils.getXmlParser();
        try {
            Document xml = parser.parse(this.stream);
            Element root = xml.getDocumentElement();
            this.sentences = root.getElementsByTagName(NODE_SENT);
            this.sentIdx = 0;
        }
        catch (SAXException e) {
            log.info("Parse exception while reading " + filename);
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void close() {
        try {
            if (this.stream != null) {
                this.stream.close();
                this.stream = null;
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    @Override
    public Tree readTree() {
        Tree t = null;
        while (t == null && this.sentences != null && this.sentIdx < this.sentences.getLength()) {
            int thisSentenceId;
            ++this.sentIdx;
            Node sentRoot = this.sentences.item(thisSentenceId);
            t = this.getTreeFromXML(sentRoot);
            if (t == null || !((t = this.treeNormalizer.normalizeWholeTree(t, this.treeFactory)).label() instanceof CoreLabel)) continue;
            ((CoreLabel)t.label()).set(CoreAnnotations.SentenceIDAnnotation.class, Integer.toString(thisSentenceId));
        }
        return t;
    }

    private boolean isWordNode(Element node) {
        return node.hasAttribute(ATTR_WORD) && !node.hasChildNodes();
    }

    private boolean isEllipticNode(Element node) {
        return node.hasAttribute(ATTR_ELLIPTIC);
    }

    private String getPOS(Element node) {
        String pos = node.getAttribute(ATTR_POS);
        String namedAttribute = node.getAttribute(ATTR_NAMED_ENTITY);
        if (pos.startsWith("np") && pos.length() == 7 && pos.charAt(pos.length() - 1) == '0') {
            char annotation = '0';
            if (namedAttribute.equals("location")) {
                annotation = 'l';
            } else if (namedAttribute.equals("person")) {
                annotation = 'p';
            } else if (namedAttribute.equals("organization")) {
                annotation = 'o';
            }
            pos = pos.substring(0, 6) + annotation;
        } else if (pos.equals("")) {
            String word = this.getWord(node);
            if (word.equals(".")) {
                return "fp";
            }
            if (namedAttribute.equals("date")) {
                return "w";
            }
            if (namedAttribute.equals("number")) {
                return "z0";
            }
            String tagName = node.getTagName();
            if (tagName.equals("i")) {
                return "i";
            }
            if (tagName.equals("r")) {
                return "rg";
            }
            if (tagName.equals("z")) {
                return "z0";
            }
            String posType = node.getAttribute(ATTR_POSTYPE);
            if (tagName.equals("c") && posType.equals("subordinating")) {
                return "cs";
            }
            if (tagName.equals("p") && posType.equals("relative") && word.equalsIgnoreCase("que")) {
                return "pr0cn000";
            }
            if (tagName.equals("s") && (word.equalsIgnoreCase("de") || word.equalsIgnoreCase("del") || word.equalsIgnoreCase("en"))) {
                return "sps00";
            }
            if (word.equals("REGRESA")) {
                return "vmip3s0";
            }
            if (this.simplifiedTagset) {
                if (word.equals("verme")) {
                    return "vmn0000";
                }
                if (tagName.equals("a")) {
                    return "aq0000";
                }
                if (posType.equals("proper")) {
                    return "np00000";
                }
                if (posType.equals("common")) {
                    return "nc0s000";
                }
                if (tagName.equals("d") && posType.equals("numeral")) {
                    return "dn0000";
                }
                if (tagName.equals("d") && (posType.equals("article") || word.equalsIgnoreCase("el") || word.equalsIgnoreCase("la"))) {
                    return "da0000";
                }
                if (tagName.equals("p") && posType.equals("relative")) {
                    return "pr000000";
                }
                if (tagName.equals("p") && posType.equals("personal")) {
                    return "pp000000";
                }
                if (tagName.equals("p") && posType.equals("indefinite")) {
                    return "pi000000";
                }
                if (tagName.equals("s") && word.equalsIgnoreCase("como")) {
                    return "sp000";
                }
                if (tagName.equals("n")) {
                    String gen = node.getAttribute(ATTR_GENDER);
                    String num = node.getAttribute(ATTR_NUMBER);
                    int genCode = gen == null ? 48 : (int)gen.charAt(0);
                    int numCode = num == null ? 48 : (int)num.charAt(0);
                    return 110 + genCode + 48 + numCode + "000";
                }
            }
            if (node.hasAttribute(ATTR_PUNCT)) {
                if (word.equals("\"")) {
                    return "fe";
                }
                if (word.equals("'")) {
                    return "fz";
                }
                if (word.equals("-")) {
                    return "fg";
                }
                if (word.equals("(")) {
                    return "fpa";
                }
                if (word.equals(")")) {
                    return "fpt";
                }
                return "fz";
            }
        }
        return pos;
    }

    private String getWord(Element node) {
        String word = node.getAttribute(ATTR_WORD);
        if (word.equals("")) {
            return "=NONE=";
        }
        return word.trim();
    }

    private Tree getTreeFromXML(Node root) {
        Element eRoot = (Element)root;
        if (this.isWordNode(eRoot)) {
            return this.buildWordNode(eRoot);
        }
        if (this.isEllipticNode(eRoot)) {
            return this.buildEllipticNode(eRoot);
        }
        ArrayList<Tree> kids = new ArrayList<Tree>();
        for (Node childNode = eRoot.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode.getNodeType() != 1) continue;
            Tree t = this.getTreeFromXML(childNode);
            if (t == null) {
                System.err.printf("%s: Discarding empty tree (root: %s)%n", this.getClass().getName(), childNode.getNodeName());
                continue;
            }
            kids.add(t);
        }
        return kids.size() == 0 ? null : this.buildConstituentNode(eRoot, kids);
    }

    private Tree buildWordNode(Node root) {
        Element eRoot = (Element)root;
        String posStr = this.getPOS(eRoot);
        posStr = this.treeNormalizer.normalizeNonterminal(posStr);
        String lemma = eRoot.getAttribute(ATTR_LEMMA);
        String word = this.getWord(eRoot);
        String leafStr = this.treeNormalizer.normalizeTerminal(word);
        Tree leafNode = this.treeFactory.newLeaf(leafStr);
        if (leafNode.label() instanceof HasWord) {
            ((HasWord)((Object)leafNode.label())).setWord(leafStr);
        }
        if (leafNode.label() instanceof HasLemma && lemma != null) {
            ((HasLemma)((Object)leafNode.label())).setLemma(lemma);
        }
        ArrayList<Tree> kids = new ArrayList<Tree>();
        kids.add(leafNode);
        Tree t = this.treeFactory.newTreeNode(posStr, kids);
        if (t.label() instanceof HasTag) {
            ((HasTag)((Object)t.label())).setTag(posStr);
        }
        return t;
    }

    private Tree buildEllipticNode(Node root) {
        Element eRoot = (Element)root;
        String constituentStr = eRoot.getNodeName();
        ArrayList<Tree> kids = new ArrayList<Tree>();
        Tree leafNode = this.treeFactory.newLeaf("=NONE=");
        if (leafNode.label() instanceof HasWord) {
            ((HasWord)((Object)leafNode.label())).setWord("=NONE=");
        }
        kids.add(leafNode);
        Tree t = this.treeFactory.newTreeNode(constituentStr, kids);
        return t;
    }

    private Tree buildConstituentNode(Node root, List<Tree> children) {
        Element eRoot = (Element)root;
        String label = eRoot.getNodeName().trim();
        if (this.detailedAnnotations) {
            if (eRoot.getAttribute(ATTR_COORDINATING).equals("yes")) {
                label = label + "-coord";
            } else if (eRoot.hasAttribute(ATTR_CLAUSE_TYPE)) {
                label = label + '-' + eRoot.getAttribute(ATTR_CLAUSE_TYPE);
            }
        }
        return this.treeFactory.newTreeNode(this.treeNormalizer.normalizeNonterminal(label), children);
    }

    public static boolean shouldPrintTree(Tree tree, Pattern pos, Pattern word) {
        for (Tree t : tree) {
            if (!t.isPreTerminal()) continue;
            CoreLabel label = (CoreLabel)t.label();
            String tpos = label.value();
            Tree wordNode = t.firstChild();
            CoreLabel wordLabel = (CoreLabel)wordNode.label();
            String tword = wordLabel.value();
            if (pos != null && !pos.matcher(tpos).find() || word != null && !word.matcher(tword).find()) continue;
            return true;
        }
        return false;
    }

    private static String toString(Tree tree, boolean plainPrint) {
        if (!plainPrint) {
            return tree.toString();
        }
        StringBuilder sb = new StringBuilder();
        List leaves = tree.getLeaves();
        for (Tree leaf : leaves) {
            sb.append(((CoreLabel)leaf.label()).value()).append(" ");
        }
        return sb.toString();
    }

    public static void process(File file, TreeReader tr, Pattern posPattern, Pattern wordPattern, boolean plainPrint) throws IOException {
        Tree t;
        int numTrees = 0;
        int numTreesRetained = 0;
        String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf(46));
        while ((t = tr.readTree()) != null) {
            ++numTrees;
            if (!SpanishXMLTreeReader.shouldPrintTree(t, posPattern, wordPattern)) continue;
            ++numTreesRetained;
            String ftbID = (String)((CoreLabel)t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
            String output = SpanishXMLTreeReader.toString(t, plainPrint);
            System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, output);
        }
        System.err.printf("%s: %d trees, %d matched and printed%n", file.getName(), numTrees, numTreesRetained);
    }

    private static String usage() {
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        sb.append(String.format("Usage: java %s [OPTIONS] file(s)%n%n", SpanishXMLTreeReader.class.getName()));
        sb.append("Options:").append(nl);
        sb.append("   -help: Print this message").append(nl);
        sb.append("   -ner: Add NER-specific information to trees").append(nl);
        sb.append("   -detailedAnnotations: Retain detailed annotations on tree constituents (useful for making treebank for parser, etc.)").append(nl);
        sb.append("   -plain: Output corpus in plaintext rather than as trees").append(nl);
        sb.append("   -searchPos posRegex: Only print sentences which contain a token whose part of speech matches the given regular expression").append(nl);
        sb.append("   -searchWord wordRegex: Only print sentences which contain a token which matches the given regular expression").append(nl);
        return sb.toString();
    }

    private static Map<String, Integer> argOptionDefs() {
        Map<String, Integer> argOptionDefs = Generics.newHashMap();
        argOptionDefs.put("help", 0);
        argOptionDefs.put("ner", 0);
        argOptionDefs.put("detailedAnnotations", 0);
        argOptionDefs.put("plain", 0);
        argOptionDefs.put("searchPos", 1);
        argOptionDefs.put("searchWord", 1);
        return argOptionDefs;
    }

    public static void main(String[] args) {
        Properties options = StringUtils.argsToProperties(args, SpanishXMLTreeReader.argOptionDefs());
        if (args.length < 1 || options.containsKey("help")) {
            log.info(SpanishXMLTreeReader.usage());
            return;
        }
        final Pattern posPattern = options.containsKey("searchPos") ? Pattern.compile(options.getProperty("searchPos")) : null;
        final Pattern wordPattern = options.containsKey("searchWord") ? Pattern.compile(options.getProperty("searchWord")) : null;
        final boolean plainPrint = PropertiesUtils.getBool(options, "plain", false);
        boolean ner = PropertiesUtils.getBool(options, "ner", false);
        boolean detailedAnnotations = PropertiesUtils.getBool(options, "detailedAnnotations", false);
        String[] remainingArgs = options.getProperty("").split(" ");
        ArrayList<File> fileList = new ArrayList<File>();
        for (String remainingArg : remainingArgs) {
            fileList.add(new File(remainingArg));
        }
        final SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory(true, true, ner, detailedAnnotations);
        ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        for (final File file : fileList) {
            pool.execute(new Runnable(){

                @Override
                public void run() {
                    try {
                        BufferedReader in = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "ISO-8859-1"));
                        TreeReader tr = trf.newTreeReader(file.getPath(), in);
                        SpanishXMLTreeReader.process(file, tr, posPattern, wordPattern, plainPrint);
                        tr.close();
                    }
                    catch (FileNotFoundException e) {
                        e.printStackTrace();
                    }
                    catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            });
        }
        pool.shutdown();
        try {
            pool.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
        }
        catch (InterruptedException e) {
            throw new RuntimeInterruptedException(e);
        }
    }
}

