/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.trees.international.french;

import edu.stanford.nlp.io.ReaderInputStream;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasCategory;
import edu.stanford.nlp.ling.HasContext;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeNormalizer;
import edu.stanford.nlp.trees.TreeReader;
import edu.stanford.nlp.trees.international.french.FrenchTreeNormalizer;
import edu.stanford.nlp.trees.international.french.FrenchTreebankLanguagePack;
import edu.stanford.nlp.trees.international.french.FrenchXMLTreeReaderFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.XMLUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class FrenchXMLTreeReader
implements TreeReader {
    private InputStream stream;
    private final TreeNormalizer treeNormalizer;
    private final TreeFactory treeFactory;
    private static final String NODE_SENT = "SENT";
    private static final String NODE_WORD = "w";
    private static final String ATTR_NUMBER = "nb";
    private static final String ATTR_POS = "cat";
    private static final String ATTR_POS_MWE = "catint";
    private static final String ATTR_LEMMA = "lemma";
    private static final String ATTR_MORPH = "mph";
    private static final String ATTR_EE = "ee";
    private static final String ATTR_SUBCAT = "subcat";
    private static final String MWE_PHRASAL = "MW";
    public static final String EMPTY_LEAF = "-NONE-";
    public static final String MISSING_PHRASAL = "DUMMYP";
    public static final String MISSING_POS = "DUMMY";
    private NodeList sentences;
    private int sentIdx;

    public FrenchXMLTreeReader(Reader in, boolean ccTagset) {
        this(in, new LabeledScoredTreeFactory(), new FrenchTreeNormalizer(ccTagset));
    }

    public FrenchXMLTreeReader(Reader in, TreeFactory tf, TreeNormalizer tn) {
        FrenchTreebankLanguagePack tlp = new FrenchTreebankLanguagePack();
        this.stream = new ReaderInputStream(in, tlp.getEncoding());
        this.treeFactory = tf;
        this.treeNormalizer = tn;
        DocumentBuilder parser = XMLUtils.getXmlParser();
        try {
            Document xml = parser.parse(this.stream);
            Element root = xml.getDocumentElement();
            this.sentences = root.getElementsByTagName(NODE_SENT);
            this.sentIdx = 0;
        }
        catch (SAXException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override
    public void close() {
        try {
            if (this.stream != null) {
                this.stream.close();
                this.stream = null;
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
    }

    @Override
    public Tree readTree() {
        Tree t = null;
        while (t == null && this.sentences != null && this.sentIdx < this.sentences.getLength()) {
            Node sentRoot;
            if ((t = this.getTreeFromXML(sentRoot = this.sentences.item(this.sentIdx++))) == null || !((t = this.treeNormalizer.normalizeWholeTree(t, this.treeFactory)).label() instanceof CoreLabel)) continue;
            String ftbId = ((Element)sentRoot).getAttribute(ATTR_NUMBER);
            ((CoreLabel)t.label()).set(CoreAnnotations.SentenceIDAnnotation.class, ftbId);
        }
        return t;
    }

    private String getPOS(Element node) {
        String attrPOSMWE;
        String attrPOS = node.hasAttribute(ATTR_POS) ? node.getAttribute(ATTR_POS).trim() : "";
        String string = attrPOSMWE = node.hasAttribute(ATTR_POS_MWE) ? node.getAttribute(ATTR_POS_MWE).trim() : "";
        if (attrPOS != "") {
            return attrPOS;
        }
        if (attrPOSMWE != "") {
            return attrPOSMWE;
        }
        return MISSING_POS;
    }

    private List<String> getLemma(Element node) {
        String lemma = node.getAttribute(ATTR_LEMMA);
        if (lemma == null || lemma.equals("")) {
            return null;
        }
        return this.getWordString(lemma);
    }

    private String getMorph(Element node) {
        String ee = node.getAttribute(ATTR_EE);
        return ee == null ? "" : ee;
    }

    private String getSubcat(Element node) {
        String subcat = node.getAttribute(ATTR_SUBCAT);
        return subcat == null ? "" : subcat;
    }

    private List<String> getWordString(String text) {
        List<String> toks = new ArrayList<String>();
        if (text == null || text.equals("")) {
            toks.add(EMPTY_LEAF);
        } else {
            String noWhitespaceStr;
            if (text.length() > 1) {
                text = text.replaceAll("[\\(\\)]", "");
            }
            if ((noWhitespaceStr = text.replaceAll("\\s+", "")).matches("\\d+") || noWhitespaceStr.matches("\\p{Punct}+")) {
                toks.add(noWhitespaceStr);
            } else {
                toks = Arrays.asList(text.split("\\s+"));
            }
        }
        if (toks.size() == 0) {
            throw new RuntimeException(this.getClass().getName() + ": Zero length token list for: " + text);
        }
        return toks;
    }

    private Tree getTreeFromXML(Node root) {
        Tree t;
        boolean isMWE;
        Element eRoot = (Element)root;
        if (eRoot.getNodeName().equals(NODE_WORD) && eRoot.getElementsByTagName(NODE_WORD).getLength() == 0) {
            String posStr = this.getPOS(eRoot);
            posStr = this.treeNormalizer.normalizeNonterminal(posStr);
            List<String> lemmas = this.getLemma(eRoot);
            String morph = this.getMorph(eRoot);
            List<String> leafToks = this.getWordString(eRoot.getTextContent().trim());
            String subcat = this.getSubcat(eRoot);
            if (lemmas != null && lemmas.size() != leafToks.size()) {
                System.err.println("Lemmas don't match tokens, ignoring lemmas: lemmas " + lemmas + ", tokens " + leafToks);
                lemmas = null;
            }
            Tree t2 = null;
            ArrayList<Tree> kids = new ArrayList<Tree>();
            if (leafToks.size() > 1) {
                for (int i = 0; i < leafToks.size(); ++i) {
                    String tok = leafToks.get(i);
                    String s = this.treeNormalizer.normalizeTerminal(tok);
                    ArrayList<Tree> leafList = new ArrayList<Tree>();
                    Tree leafNode = this.treeFactory.newLeaf(s);
                    if (leafNode.label() instanceof HasWord) {
                        ((HasWord)((Object)leafNode.label())).setWord(s);
                    }
                    if (leafNode.label() instanceof CoreLabel && lemmas != null) {
                        ((CoreLabel)leafNode.label()).setLemma(lemmas.get(i));
                    }
                    if (leafNode.label() instanceof HasContext) {
                        ((HasContext)((Object)leafNode.label())).setOriginalText(morph);
                    }
                    if (leafNode.label() instanceof HasCategory) {
                        ((HasCategory)((Object)leafNode.label())).setCategory(subcat);
                    }
                    leafList.add(leafNode);
                    Tree posNode = this.treeFactory.newTreeNode(MISSING_POS, leafList);
                    if (posNode.label() instanceof HasTag) {
                        ((HasTag)((Object)posNode.label())).setTag(MISSING_POS);
                    }
                    kids.add(posNode);
                }
                t2 = this.treeFactory.newTreeNode(MISSING_PHRASAL, kids);
            } else {
                String leafStr = this.treeNormalizer.normalizeTerminal(leafToks.get(0));
                Tree leafNode = this.treeFactory.newLeaf(leafStr);
                if (leafNode.label() instanceof HasWord) {
                    ((HasWord)((Object)leafNode.label())).setWord(leafStr);
                }
                if (leafNode.label() instanceof CoreLabel && lemmas != null) {
                    ((CoreLabel)leafNode.label()).setLemma(lemmas.get(0));
                }
                if (leafNode.label() instanceof HasContext) {
                    ((HasContext)((Object)leafNode.label())).setOriginalText(morph);
                }
                if (leafNode.label() instanceof HasCategory) {
                    ((HasCategory)((Object)leafNode.label())).setCategory(subcat);
                }
                kids.add(leafNode);
                t2 = this.treeFactory.newTreeNode(posStr, kids);
                if (t2.label() instanceof HasTag) {
                    ((HasTag)((Object)t2.label())).setTag(posStr);
                }
            }
            return t2;
        }
        ArrayList<Tree> kids = new ArrayList<Tree>();
        for (Node childNode = eRoot.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
            if (childNode.getNodeType() != 1) continue;
            Tree t3 = this.getTreeFromXML(childNode);
            if (t3 == null) {
                System.err.printf("%s: Discarding empty tree (root: %s)%n", this.getClass().getName(), childNode.getNodeName());
                continue;
            }
            kids.add(t3);
        }
        String rootLabel = eRoot.getNodeName().trim();
        boolean bl = isMWE = rootLabel.equals(NODE_WORD) && eRoot.hasAttribute(ATTR_POS);
        if (isMWE) {
            rootLabel = eRoot.getAttribute(ATTR_POS).trim();
        }
        Tree tree = t = kids.size() == 0 ? null : this.treeFactory.newTreeNode(this.treeNormalizer.normalizeNonterminal(rootLabel), kids);
        if (t != null && isMWE) {
            t = this.postProcessMWE(t);
        }
        return t;
    }

    private Tree postProcessMWE(Tree t) {
        String tYield = Sentence.listToString(t.yield()).replaceAll("\\s+", "");
        if (tYield.matches("[\\d\\p{Punct}]*")) {
            ArrayList<Tree> kids = new ArrayList<Tree>();
            kids.add(this.treeFactory.newLeaf(tYield));
            t = this.treeFactory.newTreeNode(t.value(), kids);
        } else {
            t.setValue(MWE_PHRASAL + t.value());
        }
        return t;
    }

    public static void main(String[] args) {
        if (args.length < 1) {
            System.err.printf("Usage: java %s tree_file(s)%n%n", FrenchXMLTreeReader.class.getName());
            System.exit(-1);
        }
        ArrayList<File> fileList = new ArrayList<File>();
        for (int i = 0; i < args.length; ++i) {
            fileList.add(new File(args[i]));
        }
        FrenchXMLTreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
        int totalTrees = 0;
        Set<String> morphAnalyses = Generics.newHashSet();
        try {
            for (File file : fileList) {
                Tree t;
                TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(file), "UTF-8")));
                String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf(46));
                int numTrees = 0;
                while ((t = tr.readTree()) != null) {
                    String ftbID = (String)((CoreLabel)t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
                    System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, t.toString());
                    ArrayList<Label> leaves = t.yield();
                    for (Label label : leaves) {
                        if (!(label instanceof CoreLabel)) continue;
                        morphAnalyses.add(((CoreLabel)label).originalText());
                    }
                    ++numTrees;
                }
                tr.close();
                System.err.printf("%s: %d trees%n", file.getName(), numTrees);
                totalTrees += numTrees;
            }
            System.err.printf("%nRead %d trees%n", totalTrees);
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

