/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.AbstractStatisticalTokenDistance;
import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.Iterator;

public class SLIMToken
extends AbstractStatisticalTokenDistance {
    private Tokenizer tokenizer = SimpleTokenizer.DEFAULT_TOKENIZER;

    public String toString() {
        return "[SLIMToken]";
    }

    public StringWrapper prepare(String s) {
        BagOfTokens bag = new BagOfTokens(s, this.tokenizer.tokenize(s));
        Iterator i = bag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (this.collectionSize > 0) {
                Integer dfInteger = (Integer)this.documentFrequency.get(tok);
                double df = dfInteger == null ? 1.0 : (double)dfInteger.intValue();
                bag.setWeight(tok, df / (double)this.collectionSize);
                continue;
            }
            bag.setWeight(tok, 0.1);
        }
        return bag;
    }

    public double score(StringWrapper s, StringWrapper t) {
        return 0.0;
    }

    public String explainScore(StringWrapper s, StringWrapper t) {
        StringBuffer buf = new StringBuffer();
        BagOfTokens a = (BagOfTokens)s;
        BagOfTokens b = (BagOfTokens)t;
        int numCommon = 0;
        Iterator i = a.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (!b.contains(tok)) continue;
            ++numCommon;
        }
        double numCommonTokens = (double)numCommon - 0.5;
        int k = 0;
        while (k < 3) {
            double newNumCommonTokens = 0.0;
            Iterator i2 = a.tokenIterator();
            while (i2.hasNext()) {
                Token tok = (Token)i2.next();
                if (!b.contains(tok)) continue;
                double pk = b.getWeight(tok);
                double priorZijk = pk * (1.0 / (double)a.size()) * (1.0 / (double)b.size());
                double m1 = (double)a.size() - numCommonTokens;
                double n1 = (double)b.size() - numCommonTokens;
                double pMijkGivenZijk = 1.0;
                double pMijkGivenNotZijk = 1.0 - Math.exp((m1 + 1.0) * (n1 + 1.0) * Math.log(1.0 - pk * pk));
                double pMijk = pMijkGivenZijk * priorZijk + pMijkGivenNotZijk * (1.0 - priorZijk);
                double pZijkGivenMijk = pMijkGivenZijk * priorZijk / pMijk;
                buf.append(" tok='" + tok + "' pk=" + pk + " m1:" + m1 + " n1:" + n1);
                buf.append(" pMijk=" + pMijk);
                buf.append("' pMijkGivenNotZijk=" + pMijkGivenNotZijk + " pZijkGivenMijk=" + pZijkGivenMijk + "\n");
                newNumCommonTokens += pZijkGivenMijk;
            }
            numCommonTokens = newNumCommonTokens;
            buf.append("numCommonTokens -> " + numCommonTokens);
            ++k;
        }
        buf.append("score = " + numCommonTokens + " / " + ((double)(a.size() + b.size()) - numCommonTokens));
        return buf.toString();
    }

    public static void main(String[] argv) {
        SLIMToken.doMain(new SLIMToken(), argv);
    }
}

