/*
 * Decompiled with CFR 0.152.
 */
package com.googlecode.clearnlp.experiment;

import com.carrotsearch.hppc.IntOpenHashSet;
import com.carrotsearch.hppc.cursors.IntCursor;
import com.googlecode.clearnlp.constituent.CTLibEn;
import com.googlecode.clearnlp.constituent.CTNode;
import com.googlecode.clearnlp.constituent.CTTree;
import com.googlecode.clearnlp.morphology.AbstractMPAnalyzer;
import com.googlecode.clearnlp.morphology.EnglishMPAnalyzer;
import com.googlecode.clearnlp.morphology.MPLibEn;
import com.googlecode.clearnlp.propbank.PBArg;
import com.googlecode.clearnlp.propbank.PBInstance;
import com.googlecode.clearnlp.propbank.PBLib;
import com.googlecode.clearnlp.propbank.PBLoc;
import com.googlecode.clearnlp.run.AbstractRun;
import com.googlecode.clearnlp.util.UTOutput;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.kohsuke.args4j.Option;

public class PBFindMissingVerbs
extends AbstractRun {
    @Option(name="-t", usage="the tree directory (input; required)", required=true, metaVar="<filepath>")
    String s_treeDir;
    @Option(name="-d", usage="the dictionary file (input; required)", required=true, metaVar="<filename>")
    String s_dictFile;
    @Option(name="-p", usage="the propbank file (input; required)", required=true, metaVar="<filename>")
    String s_propFile;
    @Option(name="-o", usage="the output file (output; required)", required=true, metaVar="<filename>")
    String s_outFile;

    public PBFindMissingVerbs(String[] args) {
        int i;
        this.initArgs(args);
        Map<String, List<PBInstance>> map = PBLib.getPBInstanceMap(this.s_propFile, this.s_treeDir, false);
        EnglishMPAnalyzer morph = new EnglishMPAnalyzer(this.s_dictFile);
        ArrayList<List<PBInstance>> lists = new ArrayList<List<PBInstance>>();
        IntOpenHashSet[] sets = new IntOpenHashSet[4];
        int size = sets.length;
        this.init(sets, lists);
        for (String key : map.keySet()) {
            List<PBInstance> list = map.get(key);
            PBInstance fst = list.get(0);
            CTTree tree = fst.getTree();
            this.getPredicateIds(tree, sets);
            this.removeExistingPredicates(sets, list);
            for (i = 0; i < size; ++i) {
                this.addMissingPredicates(tree, morph, fst, sets[i], (List)lists.get(i));
            }
        }
        for (i = 0; i < size; ++i) {
            this.printPredicates((List)lists.get(i), this.s_outFile + "." + i);
        }
    }

    private void init(IntOpenHashSet[] sets, List<List<PBInstance>> lists) {
        int size = sets.length;
        for (int i = 0; i < size; ++i) {
            sets[i] = new IntOpenHashSet();
            lists.add(new ArrayList());
        }
    }

    private void getPredicateIds(CTTree tree, IntOpenHashSet[] sets) {
        for (IntOpenHashSet set : sets) {
            set.clear();
        }
        for (CTNode node : tree.getTokens()) {
            int idx = this.isVerbPredicate(tree, node);
            if (idx < 0) continue;
            sets[idx].add(node.getTerminalId());
        }
    }

    public int isVerbPredicate(CTTree tree, CTNode node) {
        CTNode parent = node.getParent();
        if (CTLibEn.isVerb(node) && parent.isPTag("VP") && !parent.containsTags("VP")) {
            String lower = node.form.toLowerCase();
            if (MPLibEn.isDo(lower) || this.hasEditedAncestor(node) || parent.hasFTag("UNF")) {
                return -1;
            }
            if (this.isHyphenated(tree, node)) {
                return 0;
            }
            if (this.isAuxiliaryLike(tree, node, lower)) {
                return 1;
            }
            if (MPLibEn.isBe(lower) || MPLibEn.isBecome(lower) || MPLibEn.isGet(lower) || MPLibEn.isHave(lower)) {
                return 2;
            }
            return 3;
        }
        return -1;
    }

    private boolean hasEditedAncestor(CTNode node) {
        for (CTNode parent = node.getParent(); parent != null; parent = parent.getParent()) {
            if (!parent.isPTag("EDITED")) continue;
            return true;
        }
        return false;
    }

    private boolean isHyphenated(CTTree tree, CTNode node) {
        int tokenId = node.getTokenId();
        int size = tree.getTokens().size();
        if (tokenId - 1 >= 0 && tree.getToken(tokenId - 1).isPTag("HYPH")) {
            return true;
        }
        return tokenId + 1 < size && tree.getToken(tokenId + 1).isPTag("HYPH");
    }

    private boolean isAuxiliaryLike(CTTree tree, CTNode node, String lower) {
        int nextId;
        if ((lower.equals("going") || lower.equals("used") || MPLibEn.isHave(lower)) && (nextId = node.getTokenId() + 1) < tree.getTokens().size()) {
            return tree.getToken(nextId).isPTag("TO");
        }
        return false;
    }

    private void removeExistingPredicates(IntOpenHashSet[] sets, List<PBInstance> list) {
        for (PBInstance inst : list) {
            for (IntOpenHashSet set : sets) {
                set.remove(inst.predId);
            }
        }
    }

    private void addMissingPredicates(CTTree tree, AbstractMPAnalyzer morph, PBInstance fst, IntOpenHashSet set, List<PBInstance> list) {
        for (IntCursor cur : set) {
            int predId = cur.value;
            CTNode node = tree.getTerminal(predId);
            String lemma = morph.getLemma(node.form, node.pTag);
            if (lemma.equals("'s")) {
                lemma = "be";
            }
            PBArg arg = new PBArg();
            arg.label = "rel";
            arg.addLoc(new PBLoc(predId, 0));
            PBInstance inst = new PBInstance();
            inst.treePath = fst.treePath;
            inst.treeId = fst.treeId;
            inst.predId = cur.value;
            inst.annotator = "miss";
            inst.type = lemma + "-v";
            inst.roleset = lemma + ".XX";
            inst.aspects = "-----";
            inst.addArg(arg);
            list.add(inst);
        }
    }

    private void printPredicates(List<PBInstance> list, String outFile) {
        PrintStream fout = UTOutput.createPrintBufferedFileStream(outFile);
        Collections.sort(list);
        for (PBInstance inst : list) {
            fout.println(inst.toString());
        }
        fout.close();
    }

    public static void main(String[] args) {
        new PBFindMissingVerbs(args);
    }
}

