/*
 * Decompiled with CFR 0.152.
 */
package opennlp.ccg.ngrams;

import gnu.trove.THashSet;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.util.List;
import java.util.Set;
import opennlp.ccg.lexicon.DefaultTokenizer;
import opennlp.ccg.lexicon.Word;
import opennlp.ccg.ngrams.NgramFilter;
import opennlp.ccg.ngrams.NgramScorer;
import opennlp.ccg.ngrams.Reversible;
import opennlp.ccg.util.ArrayListWithIdentityEquals;
import opennlp.ccg.util.Interner;
import opennlp.ccg.util.SingletonList;
import opennlp.ccg.util.StructureSharingList;

public class AAnFilter
implements NgramFilter,
Reversible {
    private Set<List<Word>> exceptions = null;
    protected boolean reverse = false;
    private static final Word A_WORD = Word.createWord("a");
    private static final Word AN_WORD = Word.createWord("an");
    private List<Word> keyList = new ArrayListWithIdentityEquals<Word>(2);
    private static final List<Word> A_SINGLETON = (List)Interner.globalIntern(new SingletonList<Word>(A_WORD));
    private static final List<Word> AN_SINGLETON = (List)Interner.globalIntern(new SingletonList<Word>(AN_WORD));

    public AAnFilter() {
    }

    public AAnFilter(String infile) throws IOException {
        this.loadAAnExceptions(infile);
    }

    @Override
    public boolean getReverse() {
        return this.reverse;
    }

    @Override
    public void setReverse(boolean reverse) {
        this.reverse = reverse;
    }

    @Override
    public boolean filterOut(List<Word> words) {
        for (int i = 0; i < words.size() - 1; ++i) {
            Word w2;
            Word w1 = words.get(i);
            if (!this.filterOut(w1, w2 = words.get(i + 1))) continue;
            return true;
        }
        return false;
    }

    public boolean filterOut(Word w1, Word w2) {
        String f1;
        if (this.reverse) {
            Word tmp = w1;
            w1 = w2;
            w2 = tmp;
        }
        if ((f1 = w1.getForm()) != "a" && f1 != "an") {
            return false;
        }
        String f1Alt = f1 == "a" ? "an" : "a";
        String f2 = w2.getForm();
        boolean defaultRetval = AAnFilter.filterOutByDefault(f1, f2);
        if (this.isException(f1, f2) || this.isException(f1Alt, f2)) {
            return !defaultRetval;
        }
        return defaultRetval;
    }

    private static boolean filterOutByDefault(String w1, String w2) {
        boolean w2StartsWithVowel = AAnFilter.startsWithVowel(w2);
        return w1 == "a" && w2StartsWithVowel || w1 == "an" && !w2StartsWithVowel;
    }

    private static boolean startsWithVowel(String word) {
        char c = word.charAt(0);
        return c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u' || c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U';
    }

    private boolean isException(String w1, String w2) {
        if (this.exceptions == null) {
            return false;
        }
        this.keyList.clear();
        this.keyList.add(w1 == "a" ? A_WORD : AN_WORD);
        this.keyList.add(Word.createWord(w2));
        return this.exceptions.contains(this.keyList);
    }

    public void addException(String w1, String w2) {
        if ((w1 = w1.intern()) != "a" && w1 != "an") {
            System.err.println("Warning: ignoring exception not starting with a/an: " + w1 + " " + w2);
            return;
        }
        if (this.exceptions == null) {
            this.exceptions = new THashSet();
        }
        List<Word> w1Singleton = w1 == "a" ? A_SINGLETON : AN_SINGLETON;
        List w2Singleton = (List)Interner.globalIntern(new SingletonList<Word>(Word.createWord(w2)));
        List excBigram = (List)Interner.globalIntern(new StructureSharingList<Word>(w1Singleton, w2Singleton));
        this.exceptions.add(excBigram);
    }

    public static void cullAAnExceptions(String infile, String outfile) throws IOException {
        BufferedReader in = new BufferedReader(new FileReader(infile));
        StreamTokenizer tokenizer = NgramScorer.initTokenizer(in);
        PrintWriter out = new PrintWriter(new FileWriter(outfile));
        String[] tokens = new String[2];
        while (tokenizer.ttype != -1) {
            String word;
            String aan;
            NgramScorer.readLine(tokenizer, tokens);
            if (tokens[1] == null || !tokens[0].equals("a") && !tokens[0].equals("an") || !AAnFilter.filterOutByDefault(aan = tokens[0].intern(), word = tokens[1])) continue;
            out.println(aan + " " + word);
        }
        ((Reader)in).close();
        out.flush();
        out.close();
    }

    public void loadAAnExceptions(String infile) throws IOException {
        BufferedReader in = new BufferedReader(new FileReader(infile));
        StreamTokenizer tokenizer = NgramScorer.initTokenizer(in);
        String[] tokens = new String[2];
        while (tokenizer.ttype != -1) {
            NgramScorer.readLine(tokenizer, tokens);
            if (tokens[1] == null) continue;
            this.addException(tokens[0], tokens[1]);
        }
        ((Reader)in).close();
    }

    public static void main(String[] args) throws IOException {
        String usage = "Usage: java opennlp.ccg.ngrams.AAnFilter (<exceptionsfile>) <tokens> | -c <bigramsfile> <exceptionsfile>";
        if (args.length > 0 && args[0].equals("-h")) {
            System.out.println(usage);
            System.exit(0);
        }
        if (args[0].equals("-c")) {
            String infile = args[1];
            String outfile = args[2];
            System.out.println("Culling a/an exceptions from " + infile + " to " + outfile);
            AAnFilter.cullAAnExceptions(infile, outfile);
            System.exit(0);
        }
        AAnFilter aanFilter = new AAnFilter();
        String infile = null;
        String tokens = null;
        if (args.length >= 2) {
            infile = args[0];
            tokens = args[1];
        } else {
            tokens = args[0];
        }
        if (infile != null) {
            System.out.println("Loading exceptions from: " + infile);
            System.out.println();
            aanFilter.loadAAnExceptions(infile);
        }
        DefaultTokenizer tokenizer = new DefaultTokenizer();
        List<Word> words = tokenizer.tokenize(tokens);
        System.out.println("filtering: " + tokens);
        System.out.println("filter out: " + aanFilter.filterOut(words));
    }
}

