/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.PTB2TextLexer;
import edu.stanford.nlp.process.PTBLexer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Timing;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringReader;
import java.io.Writer;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class PTBTokenizer<T extends HasWord>
extends AbstractTokenizer<T> {
    private PTBLexer lexer;

    public static PTBTokenizer<Word> newPTBTokenizer(Reader r) {
        return PTBTokenizer.newPTBTokenizer(r, false);
    }

    public static PTBTokenizer<Word> newPTBTokenizer(Reader r, boolean tokenizeNLs) {
        return new PTBTokenizer<Word>(r, tokenizeNLs, false, false, new WordTokenFactory());
    }

    public static PTBTokenizer<CoreLabel> newPTBTokenizer(Reader r, boolean tokenizeNLs, boolean invertible) {
        return new PTBTokenizer<CoreLabel>(r, tokenizeNLs, invertible, false, new CoreLabelTokenFactory());
    }

    private PTBTokenizer(Reader r, boolean tokenizeNLs, boolean invertible, boolean suppressEscaping, LexedTokenFactory<T> tokenFactory) {
        StringBuilder options = new StringBuilder();
        if (suppressEscaping) {
            options.append("ptb3Escaping=false");
        } else {
            options.append("ptb3Escaping=true");
        }
        if (tokenizeNLs) {
            options.append(",tokenizeNLs");
        }
        if (invertible) {
            options.append(",invertible");
        }
        this.lexer = new PTBLexer(r, tokenFactory, options.toString());
    }

    public PTBTokenizer(Reader r, LexedTokenFactory<T> tokenFactory, String options) {
        this.lexer = new PTBLexer(r, tokenFactory, options);
    }

    @Override
    protected T getNext() {
        HasWord token = null;
        try {
            token = (HasWord)this.lexer.next();
        }
        catch (Exception e) {
            this.nextToken = null;
        }
        return (T)token;
    }

    public static String ptb2Text(String ptbText) {
        StringBuilder sb = new StringBuilder(ptbText.length());
        PTB2TextLexer lexer = new PTB2TextLexer(new StringReader(ptbText));
        try {
            String token;
            while ((token = lexer.next()) != null) {
                sb.append(token);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        return sb.toString();
    }

    public static String ptbToken2Text(String ptbText) {
        return PTBTokenizer.ptb2Text(" " + ptbText + " ").trim();
    }

    public static int ptb2Text(Reader ptbText, Writer w) throws IOException {
        String token;
        int numTokens = 0;
        PTB2TextLexer lexer = new PTB2TextLexer(ptbText);
        while ((token = lexer.next()) != null) {
            ++numTokens;
            w.write(token);
        }
        return numTokens;
    }

    private static void untok(List<String> inputFileList, List<String> outputFileList, String charset) throws IOException {
        Timing t = new Timing();
        int numTokens = 0;
        int sz = inputFileList.size();
        if (sz == 0) {
            InputStreamReader r = new InputStreamReader(System.in, charset);
            PrintWriter out2 = new PrintWriter(System.out, true);
            numTokens = PTBTokenizer.ptb2Text(r, out2);
        } else {
            for (int j = 0; j < sz; ++j) {
                BufferedReader r = IOUtils.readReaderFromString(inputFileList.get(j), charset);
                PrintWriter out3 = outputFileList == null ? new PrintWriter(System.out, true) : new PrintWriter((Writer)new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(outputFileList.get(j)), charset)), true);
                numTokens += PTBTokenizer.ptb2Text(r, out3);
                out3.close();
            }
        }
        long millis = t.stop();
        double wordspersec = (double)numTokens / ((double)millis / 1000.0);
        DecimalFormat nf = new DecimalFormat("0.00");
        System.err.println("PTBTokenizer untokenized " + numTokens + " tokens at " + nf.format(wordspersec) + " tokens per second.");
    }

    public static String ptb2Text(List<String> ptbWords) {
        return PTBTokenizer.ptb2Text(StringUtils.join(ptbWords));
    }

    public static String labelList2Text(List<? extends HasWord> ptbWords) {
        ArrayList<String> words = new ArrayList<String>();
        for (HasWord hasWord : ptbWords) {
            words.add(hasWord.word());
        }
        return PTBTokenizer.ptb2Text(words);
    }

    private static void tok(List<String> inputFileList, List<String> outputFileList, String charset, Pattern parseInsideBegin, Pattern parseInsideEnd, String options, boolean preserveLines, boolean dump) throws IOException {
        Timing t = new Timing();
        int numTokens = 0;
        int sz = inputFileList.size();
        if (sz == 0) {
            InputStreamReader r = new InputStreamReader(System.in, charset);
            PrintWriter out2 = new PrintWriter(System.out, true);
            numTokens += PTBTokenizer.tokReader(r, out2, parseInsideBegin, parseInsideEnd, options, preserveLines, dump);
        } else {
            for (int j = 0; j < sz; ++j) {
                BufferedReader r = IOUtils.readReaderFromString(inputFileList.get(j), charset);
                PrintWriter out3 = outputFileList == null ? new PrintWriter(System.out, true) : new PrintWriter((Writer)new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(outputFileList.get(j)), charset)), true);
                numTokens += PTBTokenizer.tokReader(r, out3, parseInsideBegin, parseInsideEnd, options, preserveLines, dump);
                ((Reader)r).close();
                if (outputFileList == null) continue;
                out3.close();
            }
        }
        long millis = t.stop();
        double wordspersec = (double)numTokens / ((double)millis / 1000.0);
        DecimalFormat nf = new DecimalFormat("0.00");
        System.err.println("PTBTokenizer tokenized " + numTokens + " tokens at " + nf.format(wordspersec) + " tokens per second.");
    }

    private static int tokReader(Reader r, PrintWriter out2, Pattern parseInsideBegin, Pattern parseInsideEnd, String options, boolean preserveLines, boolean dump) {
        int numTokens = 0;
        PTBTokenizer<CoreLabel> tokenizer = new PTBTokenizer<CoreLabel>(r, new CoreLabelTokenFactory(), options);
        boolean printing = parseInsideBegin == null;
        boolean beginLine = true;
        while (tokenizer.hasNext()) {
            CoreLabel obj = (CoreLabel)tokenizer.next();
            String str = obj.word();
            if (parseInsideBegin != null && parseInsideBegin.matcher(str).matches()) {
                printing = true;
            } else if (parseInsideEnd != null && parseInsideEnd.matcher(str).matches()) {
                printing = false;
            } else if (printing) {
                if (dump) {
                    str = obj.toString();
                }
                if (preserveLines) {
                    if ("*NL*".equals(str)) {
                        beginLine = true;
                        out2.println();
                    } else {
                        if (!beginLine) {
                            out2.print(" ");
                        } else {
                            beginLine = false;
                        }
                        out2.print(str);
                    }
                } else {
                    out2.println(str);
                }
            }
            ++numTokens;
        }
        return numTokens;
    }

    public static TokenizerFactory<Word> factory() {
        return PTBTokenizerFactory.newTokenizerFactory();
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(boolean tokenizeNLs, LexedTokenFactory<T> factory) {
        return new PTBTokenizerFactory(tokenizeNLs, false, false, factory);
    }

    public static TokenizerFactory<CoreLabel> factory(boolean tokenizeNLs, boolean invertible) {
        return PTBTokenizerFactory.newPTBTokenizerFactory(tokenizeNLs, invertible);
    }

    public static <T extends HasWord> TokenizerFactory<T> factory(LexedTokenFactory<T> factory, String options) {
        return new PTBTokenizerFactory(factory, options);
    }

    public static void main(String[] args) throws IOException {
        int i;
        String charset = "utf-8";
        Pattern parseInsideBegin = null;
        Pattern parseInsideEnd = null;
        StringBuilder optionsSB = new StringBuilder();
        boolean preserveLines = false;
        boolean inputOutputFileList = false;
        boolean dump = false;
        boolean untok = false;
        for (i = 0; i < args.length && args[i].charAt(0) == '-'; ++i) {
            if ("-options".equals(args[i])) {
                optionsSB.append(',');
                optionsSB.append(args[++i]);
                continue;
            }
            if ("-preserveLines".equals(args[i])) {
                optionsSB.append(",tokenizeNL");
                preserveLines = true;
                continue;
            }
            if ("-dump".equals(args[i])) {
                dump = true;
                continue;
            }
            if ("-ioFileList".equals(args[i])) {
                inputOutputFileList = true;
                continue;
            }
            if ("-charset".equals(args[i]) && i < args.length - 1) {
                charset = args[++i];
                continue;
            }
            if ("-parseInside".equals(args[i]) && i < args.length - 1) {
                ++i;
                try {
                    parseInsideBegin = Pattern.compile("<(?:" + args[i] + ")[^>]*?>");
                    parseInsideEnd = Pattern.compile("</(?:" + args[i] + ")[^>]*?>");
                }
                catch (Exception e) {
                    parseInsideBegin = null;
                    parseInsideEnd = null;
                }
                continue;
            }
            if ("-untok".equals(args[i])) {
                untok = true;
                continue;
            }
            if ("-h".equals(args[i]) || "-help".equals(args[i]) || "--help".equals(args[i])) {
                System.err.println("usage: java edu.stanford.nlp.process.PTBTokenizer [options]* filename*");
                System.err.println("  options: -preserveLines|-dump|-ioFileList|-charset|-parseInside elementRegex|-options options|-h");
                return;
            }
            System.err.println("Unknown option: " + args[i]);
        }
        ArrayList<String> inputFileList = new ArrayList<String>();
        ArrayList<String> outputFileList = null;
        if (inputOutputFileList) {
            outputFileList = new ArrayList<String>();
            for (int j = i; j < args.length; ++j) {
                String inLine;
                BufferedReader r = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(args[j]), charset));
                while ((inLine = r.readLine()) != null) {
                    String[] fields = inLine.split("\\s+");
                    inputFileList.add(fields[0]);
                    if (fields.length > 1) {
                        outputFileList.add(fields[1]);
                        continue;
                    }
                    outputFileList.add(fields[0] + ".tok");
                }
                r.close();
            }
        } else {
            inputFileList.addAll(Arrays.asList(args).subList(i, args.length));
        }
        if (untok) {
            PTBTokenizer.untok(inputFileList, outputFileList, charset);
        } else {
            PTBTokenizer.tok(inputFileList, outputFileList, charset, parseInsideBegin, parseInsideEnd, optionsSB.toString(), preserveLines, dump);
        }
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    public static class PTBTokenizerFactory<T extends HasWord>
    implements TokenizerFactory<T> {
        protected LexedTokenFactory<T> factory;
        protected String options;

        public static TokenizerFactory<Word> newTokenizerFactory() {
            return PTBTokenizerFactory.newPTBTokenizerFactory(new WordTokenFactory(), "");
        }

        public static PTBTokenizerFactory<Word> newPTBTokenizerFactory(boolean tokenizeNLs) {
            return new PTBTokenizerFactory<Word>(tokenizeNLs, false, false, new WordTokenFactory());
        }

        public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) {
            return new PTBTokenizerFactory<Word>(new WordTokenFactory(), options);
        }

        public static PTBTokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory(String options) {
            return new PTBTokenizerFactory<CoreLabel>(new CoreLabelTokenFactory(), options);
        }

        public static <T extends HasWord> PTBTokenizerFactory<T> newPTBTokenizerFactory(LexedTokenFactory<T> tokenFactory, String options) {
            return new PTBTokenizerFactory<T>(tokenFactory, options);
        }

        public static PTBTokenizerFactory<CoreLabel> newPTBTokenizerFactory(boolean tokenizeNLs, boolean invertible) {
            return new PTBTokenizerFactory<CoreLabel>(tokenizeNLs, invertible, false, new CoreLabelTokenFactory());
        }

        private PTBTokenizerFactory(boolean tokenizeNLs, boolean invertible, boolean suppressEscaping, LexedTokenFactory<T> factory) {
            this.factory = factory;
            StringBuilder optionsSB = new StringBuilder();
            if (suppressEscaping) {
                optionsSB.append("ptb3Escaping=false");
            } else {
                optionsSB.append("ptb3Escaping=true");
            }
            if (tokenizeNLs) {
                optionsSB.append(",tokenizeNLs");
            }
            if (invertible) {
                optionsSB.append(",invertible");
            }
            this.options = optionsSB.toString();
        }

        private PTBTokenizerFactory(LexedTokenFactory<T> tokenFactory, String options) {
            this.factory = tokenFactory;
            this.options = options;
        }

        @Override
        public Iterator<T> getIterator(Reader r) {
            return this.getTokenizer(r);
        }

        @Override
        public Tokenizer<T> getTokenizer(Reader r) {
            return new PTBTokenizer<T>(r, this.factory, this.options);
        }
    }
}

