/*
 * Decompiled with CFR 0.152.
 */
package opennlp.ccg.ngrams;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import opennlp.ccg.lexicon.DefaultTokenizer;
import opennlp.ccg.lexicon.Word;
import opennlp.ccg.ngrams.AbstractStandardNgramModel;
import opennlp.ccg.ngrams.NgramScorer;
import opennlp.ccg.ngrams.kenlm.MurmurHash;
import opennlp.ccg.ngrams.kenlm.jni.KenLM;

public class KenNgramModel
extends AbstractStandardNgramModel {
    private KenLM kenlm = null;
    private Map<Long, Integer> hash2ID = new HashMap<Long, Integer>();
    private Map<Long, String> hash2String = new HashMap<Long, String>();
    private List<String> vocabList = new ArrayList<String>();
    private boolean lowercaseText = false;
    private boolean splitNEs = false;
    private char neDelim = (char)95;
    public boolean debugScore = false;

    public KenNgramModel(int order, String lmFile, boolean useSemClasses, boolean lowercaseText, boolean splitNEs, char neDelim, boolean useNgramFeatures) throws IOException {
        super(order, useSemClasses);
        this.lowercaseText = lowercaseText;
        this.splitNEs = splitNEs;
        this.neDelim = neDelim;
        this.kenlm = new KenLM(order, lmFile);
        this.useNgramFeatures = useNgramFeatures;
    }

    public KenNgramModel(int order, String lmFile) throws IOException {
        this(order, lmFile, false);
    }

    public KenNgramModel(int order, String lmFile, boolean useSemClasses) throws IOException {
        this(order, lmFile, useSemClasses, false, false, '_', false);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public int id(String token) {
        KenNgramModel kenNgramModel = this;
        synchronized (kenNgramModel) {
            long hash = 0L;
            try {
                hash = MurmurHash.hash64(token);
            }
            catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            }
            String hash_word = this.hash2String.get(hash);
            if (hash_word != null) {
                return this.hash2ID.get(hash);
            }
            int id = this.vocabList.size();
            this.kenlm.registerWord(token, id);
            this.vocabList.add(token);
            this.hash2String.put(hash, token);
            this.hash2ID.put(hash, id);
            return id;
        }
    }

    protected List<Word> splitAndLowercase(List<Word> words) {
        ArrayList<Word> tmp = new ArrayList<Word>(words.size());
        if (!this.lowercaseText && !this.splitNEs) {
            return words;
        }
        for (Word w : words) {
            String[] parts;
            String wdString = w.getForm();
            for (String part : parts = wdString.replace(this.neDelim, ' ').split("\\s+")) {
                String newWdForm = this.lowercaseText ? part.toLowerCase() : part;
                tmp.add(Word.createWord(newWdForm, w.getPitchAccent(), null, newWdForm, w.getPOS(), w.getSupertag(), w.getSemClass()));
            }
        }
        return tmp;
    }

    @Override
    protected void setWordsToScore(List<Word> words, boolean complete) {
        this.wordsToScore.clear();
        this.tagsAdded = false;
        List<Word> tmp = this.splitAndLowercase(words);
        words = tmp;
        super.setWordsToScore(words, complete);
    }

    @Override
    public float logProb(int pos, int len) {
        try {
            ArrayList<String> range = new ArrayList<String>(this.keysList.size());
            for (Object wts : this.keysList.subList(pos, pos + len)) {
                range.add((String)wts);
            }
            int rangeSize = range.size();
            if (rangeSize == 0) {
                throw new IllegalArgumentException("empty range specified for log prob");
            }
            int[] wds = new int[range.size()];
            int cursor = 0;
            for (String s : range) {
                wds[cursor++] = this.id(s);
            }
            float result = this.kenlm.prob(wds);
            if (this.debugScore) {
                String wd = (String)range.get(range.size() - 1);
                String context = "";
                for (String contextWord : range.subList(0, range.size() - 1)) {
                    context = context + " " + contextWord;
                }
                context = context.trim();
                System.out.println("logp(" + wd + " | " + context + ") = " + result);
            }
            return result;
        }
        catch (IndexOutOfBoundsException e) {
            return 0.0f;
        }
    }

    public static void main(String[] args) throws IOException {
        String usage = "Usage: java opennlp.ccg.ngrams.KenLM <order> <lmfile> <tokens>";
        if (args.length > 0 && args[0].equals("-h")) {
            System.out.println(usage);
            System.exit(0);
        }
        long start2 = System.currentTimeMillis();
        String order = args[0];
        String lmfile = args[1];
        String tokens = args[2];
        boolean lowercase = true;
        boolean splitNEs = false;
        for (char c : tokens.toCharArray()) {
            if (c == '_') {
                splitNEs = true;
                if (!lowercase) break;
            }
            if (!Character.isUpperCase(c)) continue;
            lowercase = false;
            if (splitNEs) break;
        }
        System.out.println("Loading n-gram model with order " + order + " from: " + lmfile);
        KenNgramModel lm = new KenNgramModel(Integer.parseInt(order), lmfile, false, lowercase, splitNEs, '_', false);
        lm.debugScore = true;
        int secs = (int)(System.currentTimeMillis() - start2) / 1000;
        System.out.println("secs: " + secs);
        System.out.println();
        DefaultTokenizer tokenizer = new DefaultTokenizer();
        List<Word> words = tokenizer.tokenize(tokens);
        System.out.println("scoring: " + tokens);
        System.out.println();
        lm.setWordsToScore(words, true);
        lm.prepareToScoreWords();
        double logprob = lm.logprob();
        double score = KenNgramModel.convertToProb(logprob);
        System.out.println();
        System.out.println("score: " + score);
        System.out.println("logprob: " + logprob);
        int size = lm.splitAndLowercase(words).size();
        System.out.println("ppl: " + NgramScorer.convertToPPL(logprob / (double)(size - 1)));
    }
}

