/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.ling.LabeledWord;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.parser.lexparser.BaseUnknownWordModel;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.Train;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Numberer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class EnglishUnknownWordModel
extends BaseUnknownWordModel {
    private static final long serialVersionUID = 4825624957364628770L;
    private static final boolean DEBUG_UWM = false;
    protected boolean smartMutation = false;
    protected transient int lastSignatureIndex = -1;
    protected transient int lastSentencePosition = -1;
    protected transient int lastWordToSignaturize = -1;
    private static final boolean DOCUMENT_UNKNOWNS = false;
    protected int unknownSuffixSize = 0;
    protected int unknownPrefixSize = 0;
    private static final int MIN_UNKNOWN = 0;
    private static final int MAX_UNKNOWN = 7;
    private transient Numberer tagNumberer;
    private transient Numberer wordNumberer;

    public EnglishUnknownWordModel(Options.LexOptions op, Lexicon lex) {
        super(op, lex);
        this.unknownLevel = op.useUnknownWordSignatures;
        if (this.unknownLevel < 0 || this.unknownLevel > 7) {
            System.err.println("Invalid value for useUnknownWordSignatures: " + this.unknownLevel);
            if (this.unknownLevel < 0) {
                this.unknownLevel = 0;
            } else if (this.unknownLevel > 7) {
                this.unknownLevel = 7;
            }
        }
        this.smartMutation = op.smartMutation;
        this.unknownSuffixSize = op.unknownSuffixSize;
        this.unknownPrefixSize = op.unknownPrefixSize;
    }

    @Override
    public void train(Collection<Tree> trees) {
        this.train(trees, 1.0, false);
    }

    public void train(Collection<Tree> trees, boolean keepTagsAsLabels) {
        this.train(trees, 1.0, keepTagsAsLabels);
    }

    public void train(Collection<Tree> trees, double weight) {
        this.train(trees, weight, false);
    }

    public void train(Collection<Tree> trees, double weight, boolean keepTagsAsLabels) {
        ClassicCounter<IntTaggedWord> seenCounter = new ClassicCounter<IntTaggedWord>();
        int tNum = 0;
        int tSize = trees.size();
        int indexToStartUnkCounting = (int)((double)tSize * Train.fractionBeforeUnseenCounting);
        Numberer wNumberer = this.wordNumberer();
        Numberer tNumberer = this.tagNumberer();
        for (Tree tree : trees) {
            ++tNum;
            List<IntTaggedWord> taggedWords = this.treeToEvents(tree, keepTagsAsLabels);
            int sz = taggedWords.size();
            for (int w = 0; w < sz; ++w) {
                IntTaggedWord iTW = taggedWords.get(w);
                IntTaggedWord iT = new IntTaggedWord(-1, iTW.tag);
                IntTaggedWord iW = new IntTaggedWord(iTW.word, -1);
                seenCounter.incrementCount(iW, weight);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                if (tNum <= indexToStartUnkCounting || !(seenCounter.getCount(iW) < 2.0)) continue;
                int s = this.getSignatureIndex(iTW.word, w);
                IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
                IntTaggedWord iS = new IntTaggedWord(s, -1);
                this.unSeenCounter.incrementCount(iTS, weight);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(iS, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
        if (this.unSeenCounter.isEmpty()) {
            int numTags = this.tagNumberer().total();
            for (int tt = 0; tt < numTags; ++tt) {
                if (".$$.".equals(this.tagNumberer().object(tt))) continue;
                IntTaggedWord iT = new IntTaggedWord(-1, tt);
                IntTaggedWord i = new IntTaggedWord(-1, -1);
                this.unSeenCounter.incrementCount(iT, weight);
                this.unSeenCounter.incrementCount(i, weight);
            }
        }
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree, boolean keepTagsAsLabels) {
        if (!keepTagsAsLabels) {
            return this.treeToEvents(tree);
        }
        List<LabeledWord> labeledWords = tree.labeledYield();
        return this.listOfLabeledWordsToEvents(labeledWords);
    }

    protected List<IntTaggedWord> treeToEvents(Tree tree) {
        Sentence<TaggedWord> taggedWords = tree.taggedYield();
        return this.listToEvents(taggedWords);
    }

    protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (TaggedWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    protected List<IntTaggedWord> listOfLabeledWordsToEvents(List<LabeledWord> taggedWords) {
        ArrayList<IntTaggedWord> itwList = new ArrayList<IntTaggedWord>();
        for (LabeledWord tw : taggedWords) {
            IntTaggedWord iTW = new IntTaggedWord(this.wordNumberer().number(tw.word()), this.tagNumberer().number(tw.tag()));
            itwList.add(iTW);
        }
        return itwList;
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, double c_Tseen, double total, double smooth) {
        double p_T;
        double p_W;
        double pb_T_S;
        double pb_W_T;
        int word = iTW.word;
        short tag = iTW.tag;
        iTW.word = this.getSignatureIndex(iTW.word, loc);
        double c_TS = this.unSeenCounter.getCount(iTW);
        iTW.tag = (short)-1;
        double c_S = this.unSeenCounter.getCount(iTW);
        iTW.word = -1;
        double c_U = this.unSeenCounter.getCount(iTW);
        iTW.tag = tag;
        double c_T = this.unSeenCounter.getCount(iTW);
        iTW.word = word;
        double p_T_U = c_T / c_U;
        if (this.unknownLevel == 0) {
            c_TS = 0.0;
            c_S = 0.0;
        }
        if ((pb_W_T = Math.log((pb_T_S = (c_TS + smooth * p_T_U) / (c_S + smooth)) * (p_W = 1.0 / total) / (p_T = c_Tseen / total))) > -100.0) {
            return (float)pb_W_T;
        }
        return Float.NEGATIVE_INFINITY;
    }

    private Numberer tagNumberer() {
        if (this.tagNumberer == null) {
            this.tagNumberer = Numberer.getGlobalNumberer("tags");
        }
        return this.tagNumberer;
    }

    private Numberer wordNumberer() {
        if (this.wordNumberer == null) {
            this.wordNumberer = Numberer.getGlobalNumberer("words");
        }
        return this.wordNumberer;
    }

    @Override
    public int getSignatureIndex(int wordIndex, int sentencePosition) {
        int sig;
        if (wordIndex == this.lastWordToSignaturize && sentencePosition == this.lastSentencePosition) {
            return this.lastSignatureIndex;
        }
        String uwSig = this.getSignature((String)this.wordNumberer().object(wordIndex), sentencePosition);
        this.lastSignatureIndex = sig = this.wordNumberer().number(uwSig);
        this.lastSentencePosition = sentencePosition;
        this.lastWordToSignaturize = wordIndex;
        return sig;
    }

    @Override
    public String getSignature(String word, int loc) {
        StringBuilder sb = new StringBuilder("UNK");
        switch (this.unknownLevel) {
            case 7: {
                this.getSignature7(word, loc, sb);
                break;
            }
            case 6: {
                this.getSignature6(word, loc, sb);
                break;
            }
            case 5: {
                this.getSignature5(word, loc, sb);
                break;
            }
            case 4: {
                this.getSignature4(word, loc, sb);
                break;
            }
            case 3: {
                this.getSignature3(word, loc, sb);
                break;
            }
            case 2: {
                this.getSignature2(word, loc, sb);
                break;
            }
            case 1: {
                this.getSignature1(word, loc, sb);
                break;
            }
        }
        return sb.toString();
    }

    private void getSignature7(String word, int loc, StringBuilder sb) {
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLower = false;
        boolean hasUpper = false;
        boolean hasDash = false;
        int wlen = word.length();
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (Character.isLetter(ch)) {
                if (Character.isLowerCase(ch) || Character.isTitleCase(ch)) {
                    hasLower = true;
                    continue;
                }
                hasUpper = true;
                continue;
            }
            if (ch != '-') continue;
            hasDash = true;
        }
        if (wlen > 0 && hasUpper) {
            if (!hasLower) {
                sb.append("-ALLC");
            } else if (loc == 0) {
                sb.append("-INIT");
            } else {
                sb.append("-UC");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (hasDigit) {
            if (!hasNonDigit) {
                sb.append("-NUM");
            } else {
                sb.append("-DIG");
            }
        } else if (wlen > 3) {
            char ch = word.charAt(word.length() - 1);
            sb.append(Character.toLowerCase(ch));
        }
    }

    private void getSignature6(String word, int loc, StringBuilder sb) {
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            if (ch == '-') {
                hasDash = true;
                continue;
            }
            if (!Character.isLetter(ch)) continue;
            if (Character.isLowerCase(ch)) {
                hasLower = true;
                continue;
            }
            if (Character.isTitleCase(ch)) {
                hasLower = true;
                ++numCaps;
                continue;
            }
            ++numCaps;
        }
        String lowered = word.toLowerCase();
        if (numCaps > 1) {
            sb.append("-CAPS");
        } else if (numCaps > 0) {
            if (loc == 0) {
                sb.append("-INITC");
                if (this.getLexicon().isKnown(lowered)) {
                    sb.append("-KNOWNLC");
                }
            } else {
                sb.append("-CAP");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDigit) {
            sb.append("-NUM");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                sb.append("-s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                sb.append("-ed");
            } else if (lowered.endsWith("ing")) {
                sb.append("-ing");
            } else if (lowered.endsWith("ion")) {
                sb.append("-ion");
            } else if (lowered.endsWith("er")) {
                sb.append("-er");
            } else if (lowered.endsWith("est")) {
                sb.append("-est");
            } else if (lowered.endsWith("ly")) {
                sb.append("-ly");
            } else if (lowered.endsWith("ity")) {
                sb.append("-ity");
            } else if (lowered.endsWith("y")) {
                sb.append("-y");
            } else if (lowered.endsWith("al")) {
                sb.append("-al");
            }
        }
    }

    private void getSignature5(String word, int loc, StringBuilder sb) {
        int wlen = word.length();
        int numCaps = 0;
        boolean hasDigit = false;
        boolean hasDash = false;
        boolean hasLower = false;
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            if (ch == '-') {
                hasDash = true;
                continue;
            }
            if (!Character.isLetter(ch)) continue;
            if (Character.isLowerCase(ch)) {
                hasLower = true;
                continue;
            }
            if (Character.isTitleCase(ch)) {
                hasLower = true;
                ++numCaps;
                continue;
            }
            ++numCaps;
        }
        char ch0 = word.charAt(0);
        String lowered = word.toLowerCase();
        if (Character.isUpperCase(ch0) || Character.isTitleCase(ch0)) {
            if (loc == 0 && numCaps == 1) {
                sb.append("-INITC");
                if (this.getLexicon().isKnown(lowered)) {
                    sb.append("-KNOWNLC");
                }
            } else {
                sb.append("-CAPS");
            }
        } else if (!Character.isLetter(ch0) && numCaps > 0) {
            sb.append("-CAPS");
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (hasDigit) {
            sb.append("-NUM");
        }
        if (hasDash) {
            sb.append("-DASH");
        }
        if (lowered.endsWith("s") && wlen >= 3) {
            char ch2 = lowered.charAt(wlen - 2);
            if (ch2 != 's' && ch2 != 'i' && ch2 != 'u') {
                sb.append("-s");
            }
        } else if (!(word.length() < 5 || hasDash || hasDigit && numCaps > 0)) {
            if (lowered.endsWith("ed")) {
                sb.append("-ed");
            } else if (lowered.endsWith("ing")) {
                sb.append("-ing");
            } else if (lowered.endsWith("ion")) {
                sb.append("-ion");
            } else if (lowered.endsWith("er")) {
                sb.append("-er");
            } else if (lowered.endsWith("est")) {
                sb.append("-est");
            } else if (lowered.endsWith("ly")) {
                sb.append("-ly");
            } else if (lowered.endsWith("ity")) {
                sb.append("-ity");
            } else if (lowered.endsWith("y")) {
                sb.append("-y");
            } else if (lowered.endsWith("al")) {
                sb.append("-al");
            }
        }
    }

    private void getSignature4(String word, int loc, StringBuilder sb) {
        char ch;
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLetter = false;
        boolean hasLower = false;
        boolean hasDash = false;
        boolean hasPeriod = false;
        boolean hasComma = false;
        for (int i = 0; i < word.length(); ++i) {
            char ch2 = word.charAt(i);
            if (Character.isDigit(ch2)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (Character.isLetter(ch2)) {
                hasLetter = true;
                if (!Character.isLowerCase(ch2) && !Character.isTitleCase(ch2)) continue;
                hasLower = true;
                continue;
            }
            if (ch2 == '-') {
                hasDash = true;
                continue;
            }
            if (ch2 == '.') {
                hasPeriod = true;
                continue;
            }
            if (ch2 != ',') continue;
            hasComma = true;
        }
        if (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0))) {
            if (!hasLower) {
                sb.append("-AC");
            } else if (loc == 0) {
                sb.append("-SC");
            } else {
                sb.append("-C");
            }
        } else if (hasLower) {
            sb.append("-L");
        } else if (hasLetter) {
            sb.append("-U");
        } else {
            sb.append("-S");
        }
        if (hasDigit && !hasNonDigit) {
            sb.append("-N");
        } else if (hasDigit) {
            sb.append("-n");
        }
        if (hasDash) {
            sb.append("-H");
        }
        if (hasPeriod) {
            sb.append("-P");
        }
        if (hasComma) {
            sb.append("-C");
        }
        if (word.length() > 3 && Character.isLetter(ch = word.charAt(word.length() - 1))) {
            sb.append('-');
            sb.append(Character.toLowerCase(ch));
        }
    }

    private void getSignature3(String word, int loc, StringBuilder sb) {
        sb.append('-');
        int lastClass = 45;
        int num = 0;
        for (int i = 0; i < word.length(); ++i) {
            char ch = word.charAt(i);
            int newClass = Character.isUpperCase(ch) || Character.isTitleCase(ch) ? (loc == 0 ? 83 : 76) : (Character.isLetter(ch) ? 108 : (Character.isDigit(ch) ? 100 : (ch == '-' ? 104 : (ch == '.' ? 112 : 115))));
            if (newClass != lastClass) {
                lastClass = newClass;
                sb.append((char)lastClass);
                num = 1;
                continue;
            }
            if (num < 2) {
                sb.append('+');
            }
            ++num;
        }
        if (word.length() > 3) {
            char ch = Character.toLowerCase(word.charAt(word.length() - 1));
            sb.append('-');
            sb.append(ch);
        }
    }

    private void getSignature2(String word, int loc, StringBuilder sb) {
        boolean hasDigit = false;
        boolean hasNonDigit = false;
        boolean hasLower = false;
        int wlen = word.length();
        for (int i = 0; i < wlen; ++i) {
            char ch = word.charAt(i);
            if (Character.isDigit(ch)) {
                hasDigit = true;
                continue;
            }
            hasNonDigit = true;
            if (!Character.isLetter(ch) || !Character.isLowerCase(ch) && !Character.isTitleCase(ch)) continue;
            hasLower = true;
        }
        if (wlen > 0 && (Character.isUpperCase(word.charAt(0)) || Character.isTitleCase(word.charAt(0)))) {
            if (!hasLower) {
                sb.append("-ALLC");
            } else if (loc == 0) {
                sb.append("-INIT");
            } else {
                sb.append("-UC");
            }
        } else if (hasLower) {
            sb.append("-LC");
        }
        if (word.indexOf(45) >= 0) {
            sb.append("-DASH");
        }
        if (hasDigit) {
            if (!hasNonDigit) {
                sb.append("-NUM");
            } else {
                sb.append("-DIG");
            }
        } else if (wlen > 3) {
            char ch = word.charAt(word.length() - 1);
            sb.append(Character.toLowerCase(ch));
        }
    }

    private void getSignature1(String word, int loc, StringBuilder sb) {
        sb.append('-');
        sb.append(word.substring(Math.max(word.length() - 2, 0), word.length()));
        sb.append('-');
        if (Character.isLowerCase(word.charAt(0))) {
            sb.append("LOWER");
        } else if (Character.isUpperCase(word.charAt(0))) {
            if (loc == 0) {
                sb.append("INIT");
            } else {
                sb.append("UPPER");
            }
        } else {
            sb.append("OTHER");
        }
    }
}

