/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractStatisticalTokenDistance;
import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.JaroWinkler;
import com.wcohen.ss.PrintfFormat;
import com.wcohen.ss.api.StringDistance;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.expt.Blocker;
import com.wcohen.ss.expt.ClusterNGramBlocker;
import com.wcohen.ss.expt.MatchData;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

public class SoftTokenFelligiSunter
extends AbstractStatisticalTokenDistance {
    private double mismatchFactor;
    private StringDistance tokenDistance;
    private double tokenMatchThreshold;
    private static final StringDistance DEFAULT_TOKEN_DISTANCE = new JaroWinkler();
    private boolean tokenDistancesComputed = false;
    private Map<String, Set<TokenNeighbor>> neighborMap;

    public SoftTokenFelligiSunter(Tokenizer tokenizer, StringDistance tokenDistance, double tokenMatchThreshold, double mismatchFactor) {
        super(tokenizer);
        this.tokenDistance = tokenDistance;
        this.tokenMatchThreshold = tokenMatchThreshold;
        this.mismatchFactor = mismatchFactor;
    }

    public SoftTokenFelligiSunter() {
        this(SimpleTokenizer.defaultTokenizer(), DEFAULT_TOKEN_DISTANCE, 0.9, 0.5);
    }

    public void setMismatchFactor(double d) {
        this.mismatchFactor = d;
    }

    public void setMismatchFactor(Double d) {
        this.mismatchFactor = d;
    }

    public void setTokenMatchThreshold(double d) {
        this.tokenMatchThreshold = d;
    }

    public void setTokenMatchThreshold(Double d) {
        this.tokenMatchThreshold = d;
    }

    @Override
    public double score(StringWrapper s, StringWrapper t) {
        this.computeTokenDistances();
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        double sim = 0.0;
        Iterator<Token> i = sBag.tokenIterator();
        while (i.hasNext()) {
            double w;
            Token tok = i.next();
            double df = this.getDocumentFrequency(tok);
            if (tBag.contains(tok)) {
                double w2 = -Math.log(df / (double)this.collectionSize);
                sim += w2;
                continue;
            }
            Token matchTok = null;
            double matchScore = this.tokenMatchThreshold;
            Iterator<Token> j = tBag.tokenIterator();
            while (j.hasNext()) {
                Token tokJ = j.next();
                double distItoJ = this.tokenDistance.score(tok.getValue(), tokJ.getValue());
                if (!(distItoJ >= matchScore)) continue;
                matchTok = tokJ;
                matchScore = distItoJ;
            }
            if (matchTok != null) {
                df = this.neighborhoodDocumentFrequency(tok, matchScore);
                w = -Math.log(df / (double)this.collectionSize);
                sim += w;
                continue;
            }
            w = -Math.log(df / (double)this.collectionSize);
            sim -= w * this.mismatchFactor;
        }
        return sim;
    }

    @Override
    public StringWrapper prepare(String s) {
        return new BagOfTokens(s, this.tokenizer.tokenize(s));
    }

    @Override
    public String explainScore(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        StringBuilder buf = new StringBuilder("");
        PrintfFormat fmt = new PrintfFormat("%.3f");
        buf.append("Common tokens: ");
        Iterator<Token> i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = i.next();
            if (!tBag.contains(tok)) continue;
            buf.append(" " + tok.getValue() + ": ");
            buf.append(fmt.sprintf(tBag.getWeight(tok)));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }

    public String toString() {
        return "[SoftTokenFelligiSunter]";
    }

    private void computeTokenDistances() {
        if (this.tokenDistancesComputed) {
            return;
        }
        this.neighborMap = new HashMap<String, Set<TokenNeighbor>>();
        MatchData tokenData = new MatchData();
        for (Token tok : this.documentFrequency.keySet()) {
            tokenData.addInstance("tokens", tok.getValue(), tok.getValue());
        }
        ClusterNGramBlocker tokenBlocker = new ClusterNGramBlocker();
        ((Blocker)tokenBlocker).block(tokenData);
        for (int i = 0; i < ((Blocker)tokenBlocker).size(); ++i) {
            String t;
            Blocker.Pair pair = ((Blocker)tokenBlocker).getPair(i);
            String s = pair.getA().unwrap();
            double d = this.tokenDistance.score(s, t = pair.getB().unwrap());
            if (!(d >= this.tokenMatchThreshold)) continue;
            this.addNeighbor(s, t, d);
        }
        this.tokenDistancesComputed = true;
    }

    private void addNeighbor(String s, String t, double d) {
        Set<TokenNeighbor> set = this.neighborMap.get(s);
        if (set == null) {
            set = new TreeSet<TokenNeighbor>();
            this.neighborMap.put(s, set);
        }
        set.add(new TokenNeighbor(t, d));
    }

    private int neighborhoodDocumentFrequency(Token tok, double d) {
        int df = this.getDocumentFrequency(tok);
        String s = tok.getValue();
        Set<TokenNeighbor> neighbors = this.neighborMap.get(s);
        if (neighbors == null) {
            return df;
        }
        for (TokenNeighbor neighbor : neighbors) {
            if (neighbor.score < d) break;
            df += neighbor.freq;
        }
        return df;
    }

    public static void main(String[] argv) {
        SoftTokenFelligiSunter.doMain(new SoftTokenFelligiSunter(), argv);
    }

    private class TokenNeighbor
    implements Comparable<TokenNeighbor> {
        public String tokVal;
        public int freq;
        public double score;

        public TokenNeighbor(String tokVal, double score) {
            this.tokVal = tokVal;
            this.score = score;
            this.freq = SoftTokenFelligiSunter.this.getDocumentFrequency(SoftTokenFelligiSunter.this.tokenizer.intern(tokVal));
        }

        @Override
        public int compareTo(TokenNeighbor other) {
            if (other.score > this.score) {
                return 1;
            }
            if (other.score < this.score) {
                return -1;
            }
            return 0;
        }

        public int hashCode() {
            return this.tokVal.hashCode();
        }
    }
}

