/*
 * Decompiled with CFR 0.152.
 */
package opennlp.ccg.lexicon;

import gnu.trove.THashSet;
import gnu.trove.TObjectHashingStrategy;
import gnu.trove.TObjectIdentityHashingStrategy;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Currency;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.Duration;
import opennlp.ccg.grammar.Grammar;
import opennlp.ccg.lexicon.EnglishExpander;
import opennlp.ccg.lexicon.Tokenizer;
import opennlp.ccg.lexicon.Word;
import opennlp.ccg.util.Pair;

public class DefaultTokenizer
implements Tokenizer {
    private DateFormat dateFormat = null;
    private DateFormat dateFormatNoYear = null;
    private DateFormat timeFormat = null;
    private DatatypeFactory datatypeFactory = null;
    protected Map<String, String> specialTokenMap = null;
    protected Set<String> replacementSemClasses = new THashSet((TObjectHashingStrategy)new TObjectIdentityHashingStrategy());

    public DefaultTokenizer() {
        this.dateFormat = new SimpleDateFormat("yyyy.MM.dd", Locale.ENGLISH);
        this.dateFormat.setLenient(false);
        this.dateFormatNoYear = new SimpleDateFormat("*.MM.dd", Locale.ENGLISH);
        this.dateFormatNoYear.setLenient(false);
        this.timeFormat = new SimpleDateFormat("HH:mm", Locale.ENGLISH);
        this.timeFormat.setLenient(false);
        try {
            this.datatypeFactory = DatatypeFactory.newInstance();
        }
        catch (DatatypeConfigurationException exc) {
            throw (RuntimeException)new RuntimeException().initCause(exc);
        }
        this.specialTokenMap = new HashMap<String, String>();
        this.specialTokenMap.put("date", "[*DATE*]");
        this.specialTokenMap.put("time", "[*TIME*]");
        this.specialTokenMap.put("num", "[*NUM*]");
        this.specialTokenMap.put("amt", "[*AMT*]");
        this.specialTokenMap.put("dur", "[*DUR*]");
        this.specialTokenMap.put("ne", "[*NE*]");
    }

    @Override
    public void addReplacementSemClass(String semClass) {
        this.replacementSemClasses.add(semClass.intern());
    }

    @Override
    public boolean isReplacementSemClass(String semClass) {
        return this.replacementSemClasses.contains(semClass);
    }

    @Override
    public List<Word> tokenize(String s) {
        return this.tokenize(s, false);
    }

    @Override
    public List<Word> tokenize(String s, boolean strictFactors) {
        ArrayList<Word> retval = new ArrayList<Word>();
        StringTokenizer st = new StringTokenizer(s);
        while (st.hasMoreTokens()) {
            retval.add(this.parseToken(st.nextToken(), strictFactors));
        }
        return retval;
    }

    @Override
    public Word parseToken(String token) {
        return this.parseToken(token, false);
    }

    @Override
    public Word parseToken(String token, boolean strictFactors) {
        String specialTokenClass;
        String suffix;
        int pos;
        String form = token;
        String pitchAccent = null;
        ArrayList<Pair<String, String>> attrValPairs = null;
        String stem = null;
        String POS = null;
        String supertag = null;
        String semClass = null;
        int colonPos = token.indexOf(58);
        int hyphenPos = token.indexOf(45);
        if (strictFactors || colonPos > 0 && hyphenPos > 0) {
            String suffix2;
            if (colonPos > 0 && hyphenPos > colonPos) {
                form = token.substring(0, colonPos);
                suffix2 = token.substring(colonPos + 1);
            } else if (colonPos < 0 && hyphenPos < 0) {
                form = token;
                suffix2 = null;
            } else {
                form = null;
                suffix2 = token;
            }
            while (suffix2 != null) {
                hyphenPos = suffix2.indexOf(45);
                String attr = suffix2.substring(0, hyphenPos);
                String val = suffix2.substring(hyphenPos + 1);
                colonPos = suffix2.indexOf(58);
                if (colonPos > 0) {
                    val = suffix2.substring(hyphenPos + 1, colonPos);
                    suffix2 = suffix2.substring(colonPos + 1);
                } else {
                    suffix2 = null;
                }
                attr = DefaultTokenizer.unescape(attr);
                val = DefaultTokenizer.unescape(val);
                if (attr.equals("W")) {
                    form = val;
                    continue;
                }
                if (attr.equals("S")) {
                    stem = val;
                    continue;
                }
                if (attr.equals("P")) {
                    POS = val;
                    continue;
                }
                if (attr.equals("T")) {
                    supertag = val;
                    continue;
                }
                if (attr.equals("C")) {
                    semClass = val;
                    continue;
                }
                if (attr.equals("A")) {
                    pitchAccent = val;
                    continue;
                }
                if (attrValPairs == null) {
                    attrValPairs = new ArrayList<Pair<String, String>>(5);
                }
                attrValPairs.add(new Pair<String, String>(attr, val));
            }
        }
        int n = pos = form != null ? form.lastIndexOf("_") : -1;
        if (pos > 0 && Grammar.isPitchAccent(suffix = form.substring(pos + 1))) {
            pitchAccent = suffix;
            form = form.substring(0, pos);
        }
        if (!"null".equals(form)) {
            form = DefaultTokenizer.unescape(form);
        }
        if ((specialTokenClass = this.isSpecialToken(form)) != null) {
            semClass = specialTokenClass;
        }
        return Word.createWord(form, pitchAccent, attrValPairs, stem, POS, supertag, semClass);
    }

    @Override
    public String isSpecialToken(String token) {
        if (token == null) {
            return null;
        }
        if (this.isDate(token)) {
            return "date";
        }
        if (this.isTime(token)) {
            return "time";
        }
        if (this.isNum(token)) {
            return "num";
        }
        if (this.isAmt(token)) {
            return "amt";
        }
        if (this.isDur(token)) {
            return "dur";
        }
        if (this.isNamedEntity(token)) {
            return "ne";
        }
        return null;
    }

    @Override
    public String getSpecialTokenConstant(String semClass) {
        if (semClass == null) {
            return null;
        }
        return this.specialTokenMap.get(semClass);
    }

    @Override
    public boolean isSpecialTokenConstant(String s) {
        return this.specialTokenMap.containsValue(s);
    }

    @Override
    public boolean isDate(String token) {
        ParsePosition pos = new ParsePosition(0);
        Date date = this.dateFormat.parse(token, pos);
        if (date != null && pos.getIndex() == token.length()) {
            return true;
        }
        pos = new ParsePosition(0);
        date = this.dateFormatNoYear.parse(token, pos);
        return date != null && pos.getIndex() == token.length();
    }

    @Override
    public boolean isTime(String token) {
        ParsePosition pos = new ParsePosition(0);
        Date time = this.timeFormat.parse(token, pos);
        return time != null && pos.getIndex() == token.length();
    }

    @Override
    public boolean isNum(String token) {
        try {
            Integer.parseInt(token);
            return true;
        }
        catch (NumberFormatException exc) {
            try {
                Double.parseDouble(token);
                if (token.indexOf(69) != -1) {
                    return false;
                }
                return token.indexOf(101) == -1;
            }
            catch (NumberFormatException exc2) {
                return false;
            }
        }
    }

    @Override
    public boolean isAmt(String token) {
        if (token.length() < 4) {
            return false;
        }
        String code = token.substring(token.length() - 3);
        try {
            Currency.getInstance(code);
        }
        catch (IllegalArgumentException exc) {
            return false;
        }
        String num = token.substring(0, token.length() - 3).trim();
        return this.isNum(num);
    }

    public boolean isDur(String token) {
        try {
            this.datatypeFactory.newDuration(token);
            return true;
        }
        catch (Exception exc) {
            return false;
        }
    }

    @Override
    public boolean isNamedEntity(String token) {
        return false;
    }

    @Override
    public String getOrthography(List<Word> words) {
        return this.getOrthography(words, false);
    }

    @Override
    public String getOrthography(List<Word> words, boolean semClassReplacement) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < words.size(); ++i) {
            Word w = words.get(i);
            sb.append(this.getOrthography(w, semClassReplacement));
            if (i >= words.size() - 1) continue;
            sb.append(" ");
        }
        return sb.toString();
    }

    @Override
    public String getOrthography(Word w, boolean semClassReplacement) {
        StringBuffer sb = new StringBuffer();
        String semClass = w.getSemClass();
        if (semClassReplacement && semClass != null && this.replacementSemClasses.contains(semClass)) {
            sb.append(semClass.toUpperCase());
        } else {
            sb.append(w.getForm());
        }
        if (w.getPitchAccent() != null) {
            sb.append("_").append(w.getPitchAccent());
        }
        Iterator<Pair<String, String>> it = w.getAttrValPairs();
        while (it.hasNext()) {
            Pair<String, String> p = it.next();
            sb.append("_").append((String)p.b);
        }
        return sb.toString();
    }

    @Override
    public String format(List<Word> words) {
        return this.format(words, false);
    }

    @Override
    public String format(List<Word> words, boolean semClassReplacement) {
        StringBuffer sb = new StringBuffer();
        sb.append("<s> ");
        for (int i = 0; i < words.size(); ++i) {
            Word w = words.get(i);
            if (w.getForm() == "<s>" || w.getForm() == "</s>") continue;
            sb.append(this.format(w, semClassReplacement));
            sb.append(" ");
        }
        sb.append("</s>");
        return sb.toString();
    }

    @Override
    public String format(Word w) {
        return this.format(w, false);
    }

    @Override
    public String format(Word w, boolean semClassReplacement) {
        StringBuffer sb = new StringBuffer();
        String form = w.getForm();
        String pitchAccent = w.getPitchAccent();
        String stem = w.getStem();
        String POS = w.getPOS();
        String supertag = w.getSupertag();
        String semClass = w.getSemClass();
        if (semClassReplacement && semClass != null && this.replacementSemClasses.contains(semClass)) {
            stem = form = DefaultTokenizer.escape(semClass.toUpperCase());
        }
        sb.append(DefaultTokenizer.escape(form));
        if (pitchAccent != null) {
            sb.append(":").append("A").append("-").append(DefaultTokenizer.escape(pitchAccent));
        }
        Iterator<Pair<String, String>> it = w.getAttrValPairs();
        while (it.hasNext()) {
            Pair<String, String> p = it.next();
            String attr = (String)p.a;
            String val = (String)p.b;
            if (val == null) continue;
            sb.append(":").append(DefaultTokenizer.escape(attr)).append("-").append(DefaultTokenizer.escape(val));
        }
        if (stem != null) {
            sb.append(":").append("S").append("-").append(DefaultTokenizer.escape(stem));
        }
        if (POS != null) {
            sb.append(":").append("P").append("-").append(DefaultTokenizer.escape(POS));
        }
        if (supertag != null) {
            sb.append(":").append("T").append("-").append(DefaultTokenizer.escape(supertag));
        }
        if (semClass != null) {
            sb.append(":").append("C").append("-").append(DefaultTokenizer.escape(semClass));
        }
        return sb.toString();
    }

    public static String escape(String s) {
        if (s == null) {
            return null;
        }
        StringBuffer output = null;
        if (s.startsWith("null")) {
            output = new StringBuffer();
            output.append("null");
        }
        block9: for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (output == null && (c == '<' || c == '>' || c == '&' || c == '\'' || c == '\"' || c == ':' || c == '-')) {
                output = new StringBuffer();
                output.append(s.substring(0, i));
            }
            if (output == null) continue;
            switch (c) {
                case '<': {
                    output.append("&lt;");
                    continue block9;
                }
                case '>': {
                    output.append("&gt;");
                    continue block9;
                }
                case '&': {
                    output.append("&amp;");
                    continue block9;
                }
                case '\'': {
                    output.append("&apos;");
                    continue block9;
                }
                case '\"': {
                    output.append("&quot;");
                    continue block9;
                }
                case ':': {
                    output.append("&#").append(58).append(";");
                    continue block9;
                }
                case '-': {
                    output.append("&#").append(45).append(";");
                    continue block9;
                }
                default: {
                    output.append(c);
                }
            }
        }
        return output != null ? output.toString() : s;
    }

    public static String unescape(String s) {
        if (s == null || s.equals("null")) {
            return null;
        }
        StringBuffer output = null;
        if (s.startsWith("nullnull")) {
            s = s.substring(4);
            output = new StringBuffer();
        }
        for (int i = 0; i < s.length(); ++i) {
            char c = s.charAt(i);
            if (c == '&') {
                String escaped;
                int endPos = s.indexOf(";", i);
                if (endPos < 0) {
                    if (output == null) continue;
                    output.append(c);
                    continue;
                }
                if (output == null) {
                    output = new StringBuffer();
                    output.append(s.substring(0, i));
                }
                if ((escaped = s.substring(i + 1, endPos)).equals("lt")) {
                    output.append('<');
                    i = endPos;
                    continue;
                }
                if (escaped.equals("gt")) {
                    output.append('>');
                    i = endPos;
                    continue;
                }
                if (escaped.equals("amp")) {
                    output.append('&');
                    i = endPos;
                    continue;
                }
                if (escaped.equals("apos")) {
                    output.append('\'');
                    i = endPos;
                    continue;
                }
                if (escaped.equals("quot")) {
                    output.append('\"');
                    i = endPos;
                    continue;
                }
                if (s.charAt(i + 1) == '#') {
                    escaped = s.substring(i + 2, endPos);
                    output.append((char)Integer.parseInt(escaped));
                    i = endPos;
                    continue;
                }
                throw new RuntimeException("Unable to unescape " + s.substring(i, endPos + 1) + "at position " + i + " in: " + s);
            }
            if (output == null) continue;
            output.append(c);
        }
        return output != null ? output.toString() : s;
    }

    @Override
    public List<String> expandWord(Word word) {
        String token = word.getForm();
        String sc = word.getSemClass();
        if (sc == "date" && this.isDate(token)) {
            return this.expandDate(token);
        }
        if (sc == "time" && this.isTime(token)) {
            return this.expandTime(token);
        }
        if (sc == "num" && this.isNum(token)) {
            return this.expandNum(token);
        }
        if (sc == "amt" && this.isAmt(token)) {
            return this.expandAmt(token);
        }
        if (sc == "dur" && this.isDur(token)) {
            return this.expandDur(token);
        }
        if (sc == "ne" && this.isNamedEntity(token)) {
            return this.expandNamedEntity(token);
        }
        String[] words = token.split("_");
        return Arrays.asList(words);
    }

    @Override
    public List<String> expandDate(String date) {
        ArrayList<String> retval = new ArrayList<String>();
        try {
            ParsePosition pos = new ParsePosition(0);
            Date dateObj = this.dateFormat.parse(date, pos);
            if (dateObj != null && pos.getIndex() == date.length()) {
                EnglishExpander.expandDate(dateObj, 1, retval);
            } else {
                dateObj = this.dateFormatNoYear.parse(date);
                EnglishExpander.expandDate(dateObj, 2, retval);
            }
        }
        catch (ParseException exc) {
            retval.add(date);
        }
        return retval;
    }

    @Override
    public List<String> expandTime(String time) {
        ArrayList<String> retval = new ArrayList<String>();
        try {
            EnglishExpander.expandTime(this.timeFormat.parse(time), retval);
        }
        catch (ParseException exc) {
            retval.add(time);
        }
        return retval;
    }

    @Override
    public List<String> expandNum(String num) {
        ArrayList<String> retval = new ArrayList<String>();
        EnglishExpander.expandNumber(num, retval);
        return retval;
    }

    @Override
    public List<String> expandAmt(String amt) {
        String code = amt.substring(amt.length() - 3);
        String num = amt.substring(0, amt.length() - 3).trim();
        ArrayList<String> retval = new ArrayList<String>();
        EnglishExpander.expandAmount(num, code, retval);
        return retval;
    }

    public List<String> expandDur(String dur) {
        Duration duration = null;
        try {
            duration = this.datatypeFactory.newDuration(dur);
        }
        catch (Exception exc) {
            throw (RuntimeException)new RuntimeException().initCause(exc);
        }
        ArrayList<String> retval = new ArrayList<String>();
        EnglishExpander.expandDuration(duration, retval);
        return retval;
    }

    @Override
    public List<String> expandNamedEntity(String namedEntity) {
        String[] words = namedEntity.split("_");
        return Arrays.asList(words);
    }

    public static void main(String[] args) {
        DefaultTokenizer tk = new DefaultTokenizer();
        String s = args[0];
        List<Word> words = tk.tokenize(s);
        String expw = "";
        System.out.println("words: ");
        for (int i = 0; i < words.size(); ++i) {
            Word word = words.get(i);
            System.out.print(word + " ");
            List<String> orthWords = tk.expandWord(word);
            for (int j = 0; j < orthWords.size(); ++j) {
                expw = expw + orthWords.get(j) + " ";
            }
        }
        System.out.println();
        System.out.println("expanded: " + expw);
        System.out.println("formatted: " + tk.format(words));
        if (args.length > 1) {
            System.out.println();
            Word strictlyParsed = tk.parseToken(args[1], true);
            System.out.println("strictly parsed word: " + strictlyParsed);
            System.out.println("formatted: " + tk.format(strictlyParsed));
        }
    }
}

