/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.io.NumberRangesFileFilter;
import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Test;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.parser.lexparser.UnknownWordModel;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.ErasureUtils;
import edu.stanford.nlp.util.Numberer;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.FileFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class BaseLexicon
implements Lexicon {
    protected UnknownWordModel uwModel;
    protected static final boolean DEBUG_LEXICON = false;
    protected static final boolean DEBUG_LEXICON_SCORE = false;
    protected static final int nullWord = -1;
    protected static final short nullTag = -1;
    protected int smoothInUnknownsThreshold;
    protected boolean smartMutation;
    public transient List<IntTaggedWord>[] rulesWithWord;
    protected transient Set<IntTaggedWord> tags = new HashSet<IntTaggedWord>();
    protected transient Set<IntTaggedWord> words = new HashSet<IntTaggedWord>();
    public ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter();
    protected transient int lastSignatureIndex = -1;
    protected transient int lastSentencePosition = -1;
    protected transient int lastWordToSignaturize = -1;
    double[] smooth = new double[]{1.0, 1.0};
    transient double[][] m_TT;
    transient double[] m_T;
    private boolean flexiTag;
    private transient int debugLastWord = -1;
    private transient int debugLoc = -1;
    private transient StringBuilder debugProbs;
    private transient StringBuilder debugNoProbs;
    private transient String debugPrefix;
    private static final int STATS_BINS = 15;
    private static final long serialVersionUID = 40L;
    int[] tagsToBaseTags = null;
    private transient Numberer tagNumberer;
    private transient Numberer wordNumberer;

    public BaseLexicon() {
        this(new Options.LexOptions());
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public BaseLexicon(Options.LexOptions op) {
        this.flexiTag = op.flexiTag;
        this.smoothInUnknownsThreshold = op.smoothInUnknownsThreshold;
        this.smartMutation = op.smartMutation;
        if (op.uwModel == null) {
            op.uwModel = "edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel";
        }
        try {
            Class<?> clas = Class.forName(op.uwModel);
            Class[] argClasses = new Class[]{Options.LexOptions.class, Lexicon.class};
            Object[] args = new Object[]{op, this};
            Constructor<?> constr = clas.getConstructor(argClasses);
            this.uwModel = (UnknownWordModel)constr.newInstance(args);
        }
        catch (ClassNotFoundException e) {
            System.err.println("Class not found: " + op.uwModel);
            e.printStackTrace();
        }
        catch (NoSuchMethodException nsme) {
            System.err.println("Can't construct: " + op.uwModel);
            nsme.printStackTrace();
        }
        catch (InvocationTargetException ite) {
            System.err.println("Can't construct: " + op.uwModel);
            ite.printStackTrace();
        }
        catch (InstantiationException e) {
            System.err.println("Couldn't instantiate: " + op.uwModel);
            e.printStackTrace();
        }
        catch (IllegalAccessException e) {
            System.err.println("Illegal access to " + op.uwModel);
            e.printStackTrace();
        }
        finally {
            if (this.uwModel == null) {
                this.uwModel = new BaseUnknownWordModel(op, this);
            }
        }
    }

    @Override
    public boolean isKnown(int word) {
        if (this.rulesWithWord == null) {
            this.initRulesWithWord();
        }
        return word < this.rulesWithWord.length && !this.rulesWithWord[word].isEmpty();
    }

    @Override
    public boolean isKnown(String word) {
        IntTaggedWord iW = new IntTaggedWord(this.wordNumberer().number(word), -1);
        return this.seenCounter.getCount(iW) > 0.0;
    }

    public Iterator<IntTaggedWord> ruleIteratorByWord(String word, int loc) {
        return this.ruleIteratorByWord(this.wordNumberer().number(word), loc);
    }

    @Override
    public Iterator<IntTaggedWord> ruleIteratorByWord(int word, int loc) {
        List<Object> wordTaggings;
        if (this.isKnown(word)) {
            if (!this.flexiTag) {
                wordTaggings = this.rulesWithWord[word];
            } else {
                IntTaggedWord iW = new IntTaggedWord(word, -1);
                if (this.seenCounter.getCount(iW) > (double)this.smoothInUnknownsThreshold) {
                    return this.rulesWithWord[word].iterator();
                }
                wordTaggings = new ArrayList(40);
                for (IntTaggedWord iTW2 : this.tags) {
                    IntTaggedWord iTW = new IntTaggedWord(word, iTW2.tag);
                    if (!(this.score(iTW, loc) > Float.NEGATIVE_INFINITY)) continue;
                    wordTaggings.add(iTW);
                }
            }
        } else {
            wordTaggings = new ArrayList(40);
            for (IntTaggedWord iTW : this.rulesWithWord[this.wordNumberer.number("UNK")]) {
                wordTaggings.add(new IntTaggedWord(word, iTW.tag));
            }
        }
        return wordTaggings.iterator();
    }

    protected void initRulesWithWord() {
        if (Test.verbose) {
            System.err.print("\nInitializing lexicon scores ... ");
        }
        int unkWord = this.wordNumberer().number("UNK");
        int numWords = this.wordNumberer().total();
        this.rulesWithWord = new List[numWords];
        for (int w = 0; w < numWords; ++w) {
            this.rulesWithWord[w] = new ArrayList<IntTaggedWord>(1);
        }
        this.tags = new HashSet<IntTaggedWord>();
        for (IntTaggedWord iTW : this.seenCounter.keySet()) {
            if (iTW.word() != -1 || iTW.tag() == -1) continue;
            this.tags.add(iTW);
        }
        for (IntTaggedWord iT : this.tags) {
            double types = this.uwModel.unSeenCounter().getCount(iT);
            if (!(types > (double)Train.openClassTypesThreshold)) continue;
            IntTaggedWord iTW = new IntTaggedWord(unkWord, iT.tag);
            this.rulesWithWord[iTW.word].add(iTW);
        }
        if (Test.verbose) {
            System.err.print("The " + this.rulesWithWord[unkWord].size() + " open class tags are: [");
            for (IntTaggedWord item : this.rulesWithWord[unkWord]) {
                System.err.print(" " + this.tagNumberer().object(item.tag()));
            }
            System.err.println(" ] ");
        }
        for (IntTaggedWord iTW : this.seenCounter.keySet()) {
            if (iTW.tag() == -1 || iTW.word() == -1) continue;
            this.rulesWithWord[iTW.word].add(iTW);
        }
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree, boolean keepTagsAsLabels) {
        if (!keepTagsAsLabels) {
            return this.treeToEvents(tree);
        }
        List<LabeledWord> labeledWords = tree.labeledYield();
        return this.listOfLabeledWordsToEvents(labeledWords);
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree) {
        Sentence<TaggedWord> taggedWords = tree.taggedYield();
        return this.listToEvents(taggedWords);
    }

    protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (TaggedWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    protected List<IntTaggedWord> listOfLabeledWordsToEvents(List<LabeledWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (LabeledWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    public void addAll(List<TaggedWord> tagWords) {
        this.addAll(tagWords, 1.0);
    }

    public void addAll(List<TaggedWord> taggedWords, double weight) {
        List<IntTaggedWord> tagWords = this.listToEvents(taggedWords);
    }

    public void trainWithExpansion(Collection<TaggedWord> taggedWords) {
    }

    @Override
    public void train(Collection<Tree> trees) {
        this.train(trees, 1.0, false);
    }

    public void train(Collection<Tree> trees, boolean keepTagsAsLabels) {
        this.train(trees, 1.0, keepTagsAsLabels);
    }

    public void train(Collection<Tree> trees, double weight) {
        this.train(trees, weight, false);
    }

    public void train(Collection<Tree> trees, double weight, boolean keepTagsAsLabels) {
        this.getUnknownWordModel().train(trees);
        for (Tree tree : trees) {
            List<IntTaggedWord> taggedWords = this.treeToEvents(tree, keepTagsAsLabels);
            int sz = taggedWords.size();
            for (int w = 0; w < sz; ++w) {
                IntTaggedWord iTW = taggedWords.get(w);
                this.seenCounter.incrementCount(iTW, weight);
                IntTaggedWord iT = new IntTaggedWord(-1, iTW.tag);
                this.seenCounter.incrementCount(iT, weight);
                IntTaggedWord iW = new IntTaggedWord(iTW.word, -1);
                this.seenCounter.incrementCount(iW, weight);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                this.seenCounter.incrementCount(i, weight);
                this.tags.add(iT);
                this.words.add(iW);
            }
        }
        this.tune(trees);
    }

    protected void addTagging(boolean seen, IntTaggedWord itw, double count) {
        if (seen) {
            this.seenCounter.incrementCount(itw, count);
            if (itw.tag() == -1) {
                this.words.add(itw);
            } else if (itw.word() == -1) {
                this.tags.add(itw);
            }
        } else {
            this.uwModel.addTagging(seen, itw, count);
        }
    }

    void buildPT_T() {
        int numTags = this.tagNumberer().total();
        this.m_TT = new double[numTags][numTags];
        this.m_T = new double[numTags];
        double[] tmp = new double[numTags];
        for (IntTaggedWord word : this.words) {
            int t;
            IntTaggedWord iTW = new IntTaggedWord(word.word, -1);
            double tot = 0.0;
            for (t = 0; t < numTags; ++t) {
                iTW.tag = (short)t;
                tmp[t] = this.seenCounter.getCount(iTW);
                tot += tmp[t];
            }
            if (tot < 10.0) continue;
            for (t = 0; t < numTags; ++t) {
                for (int t2 = 0; t2 < numTags; ++t2) {
                    if (!(tmp[t2] > 0.0)) continue;
                    double c = tmp[t] / tot;
                    int n = t;
                    this.m_T[n] = this.m_T[n] + c;
                    double[] dArray = this.m_TT[t2];
                    int n2 = t;
                    dArray[n2] = dArray[n2] + c;
                }
            }
        }
    }

    @Override
    public float score(IntTaggedWord iTW, int loc) {
        double pb_W_T;
        boolean seen;
        int word = iTW.word;
        short tag = iTW.tag;
        double c_TW = this.seenCounter.getCount(iTW);
        iTW.tag = (short)-1;
        double c_W = this.seenCounter.getCount(iTW);
        iTW.word = -1;
        double total = this.seenCounter.getCount(iTW);
        double totalUnseen = this.uwModel.unSeenCounter().getCount(iTW);
        iTW.tag = tag;
        double c_T = this.seenCounter.getCount(iTW);
        double c_Tunseen = this.uwModel.unSeenCounter().getCount(iTW);
        iTW.word = word;
        boolean bl = seen = c_W > 0.0;
        if (seen) {
            double pb_T_W;
            double p_T_U = c_Tunseen / totalUnseen;
            if (c_W > (double)this.smoothInUnknownsThreshold) {
                pb_T_W = c_TW / c_W;
            } else {
                if (this.smartMutation) {
                    int numTags = this.tagNumberer().total();
                    if (this.m_TT == null || numTags != this.m_T.length) {
                        this.buildPT_T();
                    }
                    p_T_U *= 0.1;
                    for (int t = 0; t < numTags; ++t) {
                        IntTaggedWord iTW2 = new IntTaggedWord(word, t);
                        double p_T_W2 = this.seenCounter.getCount(iTW2) / c_W;
                        if (!(p_T_W2 > 0.0)) continue;
                        p_T_U += p_T_W2 * this.m_TT[tag][t] / this.m_T[t] * 0.9;
                    }
                }
                pb_T_W = (c_TW + this.smooth[1] * p_T_U) / (c_W + this.smooth[1]);
            }
            double p_T = c_T / total;
            double p_W = c_W / total;
            pb_W_T = Math.log(pb_T_W * p_W / p_T);
        } else if (loc >= 0) {
            pb_W_T = this.getUnknownWordModel().score(iTW, loc, c_T, total, this.smooth[0]);
        } else {
            double pb_W0_T = this.getUnknownWordModel().score(iTW, 0, c_T, total, this.smooth[0]);
            double pb_W1_T = this.getUnknownWordModel().score(iTW, 1, c_T, total, this.smooth[0]);
            pb_W_T = Math.log((Math.exp(pb_W0_T) + 2.0 * Math.exp(pb_W1_T)) / 3.0);
        }
        if (pb_W_T > -100.0) {
            return (float)pb_W_T;
        }
        return Float.NEGATIVE_INFINITY;
    }

    public void tune(Collection<Tree> trees) {
        double bestScore = Double.NEGATIVE_INFINITY;
        double[] bestSmooth = new double[]{0.0, 0.0};
        this.smooth[0] = 1.0;
        while (this.smooth[0] <= 1.0) {
            this.smooth[1] = 0.2;
            while (this.smooth[1] <= 0.2) {
                double score = 0.0;
                if (Test.verbose) {
                    System.out.println("Tuning lexicon: s0 " + this.smooth[0] + " s1 " + this.smooth[1] + " is " + score + ' ' + trees.size() + " trees.");
                }
                if (score > bestScore) {
                    System.arraycopy(this.smooth, 0, bestSmooth, 0, this.smooth.length);
                    bestScore = score;
                }
                this.smooth[1] = this.smooth[1] * 2.0;
            }
            this.smooth[0] = this.smooth[0] * 2.0;
        }
        System.arraycopy(bestSmooth, 0, this.smooth, 0, bestSmooth.length);
        if (this.smartMutation) {
            this.smooth[0] = 8.0;
            this.smooth[1] = 0.1;
        }
        if (Test.unseenSmooth > 0.0) {
            this.smooth[0] = Test.unseenSmooth;
        }
        if (Test.verbose) {
            System.out.println("Tuning selected smoothUnseen " + this.smooth[0] + " smoothSeen " + this.smooth[1] + " at " + bestScore);
        }
    }

    @Override
    public void readData(BufferedReader in) throws IOException {
        String SEEN = "SEEN";
        int lineNum = 1;
        String line = in.readLine();
        Pattern p = Pattern.compile("^smooth\\[([0-9])\\] = (.*)$");
        while (line != null && line.length() > 0) {
            try {
                Matcher m = p.matcher(line);
                if (m.matches()) {
                    int i = Integer.parseInt(m.group(1));
                    this.smooth[i] = Double.parseDouble(m.group(2));
                } else {
                    String[] fields = StringUtils.splitOnCharWithQuoting(line, ' ', '\"', '\\');
                    boolean seen = fields[3].equals("SEEN");
                    this.addTagging(seen, new IntTaggedWord(fields[2], fields[0]), Double.parseDouble(fields[4]));
                }
            }
            catch (RuntimeException e) {
                throw new IOException("Error on line " + lineNum + ": " + line);
            }
            ++lineNum;
            line = in.readLine();
        }
    }

    @Override
    public void writeData(Writer w) throws IOException {
        PrintWriter out2 = new PrintWriter(w);
        for (IntTaggedWord itw : this.seenCounter.keySet()) {
            out2.println(itw.toLexicalEntry() + " SEEN " + this.seenCounter.getCount(itw));
        }
        for (IntTaggedWord itw : this.getUnknownWordModel().unSeenCounter().keySet()) {
            out2.println(itw.toLexicalEntry() + " UNSEEN " + this.getUnknownWordModel().unSeenCounter().getCount(itw));
        }
        for (int i = 0; i < this.smooth.length; ++i) {
            out2.println("smooth[" + i + "] = " + this.smooth[i]);
        }
        out2.flush();
    }

    @Override
    public int numRules() {
        if (this.rulesWithWord == null) {
            this.initRulesWithWord();
        }
        int accumulated = 0;
        for (List<IntTaggedWord> lis : this.rulesWithWord) {
            accumulated += lis.size();
        }
        return accumulated;
    }

    public void printLexStats() {
        int j;
        if (this.rulesWithWord == null) {
            this.initRulesWithWord();
        }
        System.out.println("BaseLexicon statistics");
        System.out.println("unknownLevel is " + this.getUnknownWordModel().getUnknownLevel());
        System.out.println("Sum of rulesWithWord: " + this.numRules());
        System.out.println("Tags size: " + this.tags.size());
        int wsize = this.words.size();
        System.out.println("Words size: " + wsize);
        System.out.println("rulesWithWord length: " + this.rulesWithWord.length + " [should be sum of words + unknown sigs]");
        int[] lengths = new int[15];
        ArrayList[] wArr = new ArrayList[15];
        for (j = 0; j < 15; ++j) {
            wArr[j] = new ArrayList();
        }
        for (int i = 0; i < this.rulesWithWord.length; ++i) {
            int num = this.rulesWithWord[i].size();
            if (num > 14) {
                num = 14;
            }
            int n = num;
            lengths[n] = lengths[n] + 1;
            if (wsize > 20 && num < 7) continue;
            wArr[num].add(this.wordNumberer().object(i));
        }
        System.out.println("Stats on how many taggings for how many words");
        for (j = 0; j < 15; ++j) {
            System.out.print(j + " taggings: " + lengths[j] + " words ");
            if (wsize <= 20 || j >= 7) {
                System.out.print(wArr[j]);
            }
            System.out.println();
        }
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(0);
        System.out.println("Unseen counter: " + Counters.toString(this.uwModel.unSeenCounter(), nf));
    }

    public double evaluateCoverage(Collection<Tree> trees, Set<String> missingWords, Set<String> missingTags, Set<IntTaggedWord> missingTW) {
        ArrayList<IntTaggedWord> iTW1 = new ArrayList<IntTaggedWord>();
        for (Tree t : trees) {
            iTW1.addAll(this.treeToEvents(t));
        }
        int total = 0;
        int unseen = 0;
        for (IntTaggedWord itw : iTW1) {
            ++total;
            if (!this.words.contains(new IntTaggedWord(itw.word(), -1))) {
                missingWords.add((String)Numberer.object("word", itw.word()));
            }
            if (!this.tags.contains(new IntTaggedWord(-1, itw.tag()))) {
                missingTags.add((String)Numberer.object("tag", itw.tag()));
            }
            if (this.seenCounter.getCount(itw) != 0.0) continue;
            ++unseen;
            missingTW.add(itw);
        }
        return (double)unseen / (double)total;
    }

    public int getBaseTag(int tag, TreebankLanguagePack tlp) {
        if (this.tagsToBaseTags == null) {
            this.populateTagsToBaseTags(tlp);
        }
        return this.tagsToBaseTags[tag];
    }

    private void populateTagsToBaseTags(TreebankLanguagePack tlp) {
        Numberer tagNumberer = this.tagNumberer();
        int total = tagNumberer.total();
        this.tagsToBaseTags = new int[total];
        for (int i = 0; i < total; ++i) {
            int j;
            String tag = (String)tagNumberer.object(i);
            String baseTag = tlp.basicCategory(tag);
            this.tagsToBaseTags[i] = j = tagNumberer.number(baseTag);
        }
    }

    public static void main(String[] args) {
        if (args.length < 3) {
            System.err.println("java BaseLexicon treebankPath fileRange unknownWordModel words*");
            return;
        }
        System.out.print("Training BaseLexicon from " + args[0] + ' ' + args[1] + " ... ");
        DiskTreebank tb = new DiskTreebank();
        tb.loadPath(args[0], (FileFilter)new NumberRangesFileFilter(args[1], true));
        BaseLexicon lex = new BaseLexicon();
        lex.getUnknownWordModel().setUnknownLevel(Integer.parseInt(args[2]));
        lex.train(tb);
        System.out.println("done.");
        System.out.println();
        Numberer numb = Numberer.getGlobalNumberer("tags");
        Numberer wNumb = Numberer.getGlobalNumberer("words");
        NumberFormat nf = NumberFormat.getNumberInstance();
        nf.setMaximumFractionDigits(4);
        ArrayList<String> impos = new ArrayList<String>();
        for (int i = 3; i < args.length; ++i) {
            if (lex.isKnown(args[i])) {
                System.out.println(args[i] + " is a known word.  Log probabilities [log P(w|t)] for its taggings are:");
                Iterator<IntTaggedWord> it = lex.ruleIteratorByWord(wNumb.number(args[i]), i - 3);
                while (it.hasNext()) {
                    IntTaggedWord iTW = it.next();
                    System.out.println(StringUtils.pad(iTW, 24) + nf.format(lex.score(iTW, i - 3)));
                }
            } else {
                String sig = lex.getUnknownWordModel().getSignature(args[i], i - 3);
                System.out.println(args[i] + " is an unknown word.  Signature with uwm " + lex.getUnknownWordModel().getUnknownLevel() + (i == 3 ? " init" : "non-init") + " is: " + sig);
                Set tags = (Set)ErasureUtils.uncheckedCast(numb.objects());
                impos.clear();
                ArrayList lis = new ArrayList(tags);
                Collections.sort(lis);
                for (String tStr : lis) {
                    IntTaggedWord iTW = new IntTaggedWord(args[i], tStr);
                    double score = lex.score(iTW, 1);
                    if (score == Double.NEGATIVE_INFINITY) {
                        impos.add(tStr);
                        continue;
                    }
                    System.out.println(StringUtils.pad(iTW, 24) + nf.format(score));
                }
                if (impos.size() > 0) {
                    System.out.println(args[i] + " impossible tags: " + impos);
                }
            }
            System.out.println();
        }
    }

    private Numberer tagNumberer() {
        if (this.tagNumberer == null) {
            this.tagNumberer = Numberer.getGlobalNumberer("tags");
        }
        return this.tagNumberer;
    }

    private Numberer wordNumberer() {
        if (this.wordNumberer == null) {
            this.wordNumberer = Numberer.getGlobalNumberer("words");
        }
        return this.wordNumberer;
    }

    public void setWordNumberer(Numberer wordNumberer) {
        this.wordNumberer = wordNumberer;
        IntTaggedWord.setWordNumberer(wordNumberer);
    }

    public void setTagNumberer(Numberer tagNumberer) {
        this.tagNumberer = tagNumberer;
        IntTaggedWord.setTagNumberer(tagNumberer);
    }

    @Override
    public UnknownWordModel getUnknownWordModel() {
        return this.uwModel;
    }

    @Override
    public final void setUnknownWordModel(UnknownWordModel uwm) {
        this.uwModel = uwm;
    }
}

