/*
 * Decompiled with CFR 0.152.
 */
package org.sift.winnow;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;

public class StopWords {
    public static final String LINE_BOUNDARY = "\\\\n";
    public static final String WORD_BOUNDARY = "\\s+|[^a-zA-Z0-9]+";
    public static final String WORD_BOUNDARY_STRING = " ";
    public static final int DEFAULT_N_GRAM = 1;
    private static final String[] STOP_WORDS = new String[]{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "/", "\\", "\\n", ",", ".", "<", ">", "!", "&", "*", "%", "$", "#", "@", "-", "+", "//", "\\\\", "(", ")", "an", "and", "are", "as", "at", "be", "but", "by", "eight", "five", "for", "four", "go", "has", "have", "he", "her", "his", "if", "in", "into", "is", "it", "its", "my", "nine", "no", "not", "of", "on", "one", "or", "seven", "six", "so", "such", "ten", "that", "the", "then", "there", "these", "they", "this", "three", "to", "too", "two", "will", "with", "&amp;", "&quot;", "amp", "quot", "&gt", "&mdash", "gt", "mdash"};
    public static final String[] CONJUNCTIONS = new String[]{"and", "both", "but", "either", "for", "neither", "nor", "or", "so", "whether", "yet"};
    public static final String[] ARTICLES = new String[]{"a", "an", "the", "some"};
    private List<String> stopWords = new LinkedList<String>();
    private Map<String, List<String>> groupIDBasedStopWords = new HashMap<String, List<String>>();
    private List<String> conjunctionWords = new LinkedList<String>();
    private List<String> articleWords = new LinkedList<String>();
    private List<String> stopWordsFiles = new LinkedList<String>();
    private Map<String, String> groupIDBasedStopWordsFiles = new HashMap<String, String>();

    public StopWords() {
        for (String word : STOP_WORDS) {
            this.stopWords.add(word);
        }
        for (String word : CONJUNCTIONS) {
            this.conjunctionWords.add(word);
        }
        for (String word : ARTICLES) {
            this.articleWords.add(word);
        }
    }

    public boolean isStopWord(String word) {
        if (this.stopWords.contains(word)) {
            return true;
        }
        String[] words = word.split(WORD_BOUNDARY);
        if (words.length < 1) {
            return true;
        }
        return this.conjunctionWords.contains(words[0]) || this.conjunctionWords.contains(words[words.length - 1]) || this.articleWords.contains(words[0]) || this.articleWords.contains(words[words.length - 1]);
    }

    public boolean isStopWord(String word, String groupID) {
        if (this.groupIDBasedStopWords.containsKey(groupID)) {
            return this.groupIDBasedStopWords.get(groupID).contains(word);
        }
        return false;
    }

    public String[] split(String input) {
        String[] splittedText = input.split(WORD_BOUNDARY);
        LinkedList<String> returnList = new LinkedList<String>();
        for (String i : splittedText) {
            if (i.trim().length() <= 0) continue;
            returnList.add(i.trim());
        }
        return returnList.toArray(new String[0]);
    }

    public List<String> getStopWords() {
        return this.stopWords;
    }

    public void setStopWords(List<String> stopWords) {
        this.stopWords = stopWords;
    }

    public List<String> getConjunctionWords() {
        return this.conjunctionWords;
    }

    public void setConjunctionWords(List<String> conjunctionWords) {
        this.conjunctionWords = conjunctionWords;
    }

    public List<String> getStopWordsFiles() {
        return this.stopWordsFiles;
    }

    public List<String> getArticleWords() {
        return this.articleWords;
    }

    public void setArticleWords(List<String> articleWords) {
        this.articleWords = articleWords;
    }

    public void setStopWordsFiles(List<String> stopWordsFiles) {
        this.stopWordsFiles = stopWordsFiles;
        for (String fileName : stopWordsFiles) {
            try {
                File file = new File(fileName);
                BufferedReader br = new BufferedReader(new FileReader(file));
                String line = "";
                while ((line = br.readLine()) != null) {
                    this.stopWords.add(line);
                }
                br.close();
            }
            catch (IOException e) {
                throw new RuntimeException("Error while reading from filtering file", e);
            }
        }
    }

    public Map<String, String> getgroupIDBasedStopWordsFiles() {
        return this.groupIDBasedStopWordsFiles;
    }

    public void setgroupIDBasedStopWordsFiles(Map<String, String> groupIDBasedStopWordsFiles) {
        this.groupIDBasedStopWordsFiles = groupIDBasedStopWordsFiles;
        for (String groupID : groupIDBasedStopWordsFiles.keySet()) {
            try {
                String fileName = this.groupIDBasedStopWordsFiles.get(groupID);
                File file = new File(fileName);
                BufferedReader br = new BufferedReader(new FileReader(file));
                LinkedList<String> stopWords = new LinkedList<String>();
                String line = "";
                while ((line = br.readLine()) != null) {
                    stopWords.add(line);
                }
                br.close();
                this.groupIDBasedStopWords.put(groupID, stopWords);
            }
            catch (IOException e) {
                throw new RuntimeException("Error while reading from group ID based filtering file", e);
            }
        }
    }
}

