/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss.expt;

import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.expt.Blocker;
import com.wcohen.ss.expt.MatchData;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

public class TokenBlocker
extends Blocker {
    private static double defaultMaxFraction = 1.0;
    private static final Set<Integer> STOPWORD_TOKEN_MARKER;
    private List<Blocker.Pair> pairList;
    protected Tokenizer tokenizer;
    private double maxFraction;
    private int numCorrectPairs;

    public TokenBlocker(Tokenizer tokenizer, double maxFraction) {
        this.tokenizer = tokenizer;
        this.maxFraction = maxFraction;
    }

    public TokenBlocker() {
        this(SimpleTokenizer.defaultTokenizer(), defaultMaxFraction);
    }

    public double getMaxFraction() {
        return this.maxFraction;
    }

    public void setMaxFraction(double maxFraction) {
        this.maxFraction = maxFraction;
    }

    @Override
    public void block(MatchData data) {
        String bigSource;
        this.numCorrectPairs = this.countCorrectPairs(data);
        this.pairList = new ArrayList<Blocker.Pair>();
        if (!this.clusterMode && data.numSources() != 2) {
            throw new IllegalArgumentException("need exactly two sources out of clusterMode");
        }
        if (this.clusterMode && data.numSources() != 1) {
            throw new IllegalArgumentException("need exactly one source in clusterMode");
        }
        String smallSource = data.getSource(0);
        String string = bigSource = this.clusterMode ? data.getSource(0) : data.getSource(1);
        if (data.numInstances(smallSource) > data.numInstances(bigSource)) {
            String tmp = smallSource;
            smallSource = bigSource;
            bigSource = tmp;
        }
        double maxSetSize = (double)data.numInstances(smallSource) * this.maxFraction;
        TreeMap<Token, Set<Integer>> index = new TreeMap<Token, Set<Integer>>();
        for (int i = 0; i < data.numInstances(smallSource); ++i) {
            Token[] tokens = this.tokenizer.tokenize(data.getInstance(smallSource, i).unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                TreeSet<Integer> containers = (TreeSet<Integer>)index.get(tokens[j]);
                if (containers != STOPWORD_TOKEN_MARKER && containers == null) {
                    containers = new TreeSet<Integer>();
                    index.put(tokens[j], containers);
                }
                containers.add(new Integer(i));
                if (!((double)containers.size() > maxSetSize)) continue;
                index.put(tokens[j], STOPWORD_TOKEN_MARKER);
            }
        }
        TreeSet<Integer> pairedUpInstances = new TreeSet<Integer>();
        for (int i = 0; i < data.numInstances(bigSource); ++i) {
            MatchData.Instance bigInst = data.getInstance(bigSource, i);
            pairedUpInstances.clear();
            Token[] tokens = this.tokenizer.tokenize(bigInst.unwrap());
            for (int j = 0; j < tokens.length; ++j) {
                Set containers = (Set)index.get(tokens[j]);
                if (containers == null || containers == STOPWORD_TOKEN_MARKER) continue;
                for (Integer smallIndexInteger : containers) {
                    int smallIndex = smallIndexInteger;
                    if (pairedUpInstances.contains(smallIndexInteger) || smallSource == bigSource && smallIndex <= i) continue;
                    MatchData.Instance smallInst = data.getInstance(smallSource, smallIndex);
                    this.pairList.add(new Blocker.Pair(bigInst, smallInst, smallInst.sameId(bigInst)));
                    pairedUpInstances.add(smallIndexInteger);
                }
            }
        }
    }

    @Override
    public int size() {
        return this.pairList.size();
    }

    @Override
    public Blocker.Pair getPair(int i) {
        return this.pairList.get(i);
    }

    @Override
    public int numCorrectPairs() {
        return this.numCorrectPairs;
    }

    public String toString() {
        StringBuilder buf = new StringBuilder();
        buf.append("[TokenBlocker:clusterMode=").append(this.clusterMode);
        buf.append(",maxFraction=").append(this.maxFraction);
        buf.append("]");
        return buf.toString();
    }

    static {
        try {
            String s = System.getProperty("blockerMaxFraction");
            if (s != null) {
                defaultMaxFraction = Double.parseDouble(s);
            }
        }
        catch (NumberFormatException numberFormatException) {
            // empty catch block
        }
        STOPWORD_TOKEN_MARKER = new HashSet<Integer>();
    }
}

