/*
 * Decompiled with CFR 0.152.
 */
package com.wcohen.ss;

import com.wcohen.ss.BagOfTokens;
import com.wcohen.ss.BasicStringWrapper;
import com.wcohen.ss.BasicStringWrapperIterator;
import com.wcohen.ss.JaroWinkler;
import com.wcohen.ss.PrintfFormat;
import com.wcohen.ss.TFIDF;
import com.wcohen.ss.api.StringDistance;
import com.wcohen.ss.api.StringWrapper;
import com.wcohen.ss.api.Token;
import com.wcohen.ss.api.Tokenizer;
import com.wcohen.ss.tokens.SimpleTokenizer;
import java.util.ArrayList;
import java.util.Iterator;

public class SoftTFIDF
extends TFIDF {
    private StringDistance tokenDistance;
    private double tokenMatchThreshold;
    private static final StringDistance DEFAULT_TOKEN_DISTANCE = new JaroWinkler();

    public SoftTFIDF(Tokenizer tokenizer, StringDistance tokenDistance, double tokenMatchThreshold) {
        super(tokenizer);
        this.tokenDistance = tokenDistance;
        this.tokenMatchThreshold = tokenMatchThreshold;
    }

    public SoftTFIDF(StringDistance tokenDistance, double tokenMatchThreshold) {
        this.tokenDistance = tokenDistance;
        this.tokenMatchThreshold = tokenMatchThreshold;
    }

    public SoftTFIDF(StringDistance tokenDistance) {
        this(tokenDistance, 0.9);
    }

    public void setTokenMatchThreshold(double d) {
        this.tokenMatchThreshold = d;
    }

    public void setTokenMatchThreshold(Double d) {
        this.tokenMatchThreshold = d;
    }

    public double getTokenMatchThreshold() {
        return this.tokenMatchThreshold;
    }

    public double score(StringWrapper s, StringWrapper t) {
        this.checkTrainingHasHappened(s, t);
        TFIDF.UnitVector sBag = this.asUnitVector(s);
        TFIDF.UnitVector tBag = this.asUnitVector(t);
        double sim = 0.0;
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (tBag.contains(tok)) {
                sim += sBag.getWeight(tok) * tBag.getWeight(tok);
                continue;
            }
            double matchScore = this.tokenMatchThreshold;
            Token matchTok = null;
            Iterator j = tBag.tokenIterator();
            while (j.hasNext()) {
                Token tokJ = (Token)j.next();
                double distItoJ = this.tokenDistance.score(tok.getValue(), tokJ.getValue());
                if (!(distItoJ >= matchScore)) continue;
                matchTok = tokJ;
                matchScore = distItoJ;
            }
            if (matchTok == null) continue;
            sim += sBag.getWeight(tok) * tBag.getWeight(matchTok) * matchScore;
        }
        return sim;
    }

    public String explainScore(StringWrapper s, StringWrapper t) {
        BagOfTokens sBag = (BagOfTokens)s;
        BagOfTokens tBag = (BagOfTokens)t;
        StringBuffer buf = new StringBuffer("");
        PrintfFormat fmt = new PrintfFormat("%.3f");
        buf.append("Common tokens: ");
        Iterator i = sBag.tokenIterator();
        while (i.hasNext()) {
            Token tok = (Token)i.next();
            if (tBag.contains(tok)) {
                buf.append(" " + tok.getValue() + ": ");
                buf.append(fmt.sprintf(sBag.getWeight(tok)));
                buf.append("*");
                buf.append(fmt.sprintf(tBag.getWeight(tok)));
                continue;
            }
            double matchScore = this.tokenMatchThreshold;
            Token matchTok = null;
            Iterator j = tBag.tokenIterator();
            while (j.hasNext()) {
                Token tokJ = (Token)j.next();
                double distItoJ = this.tokenDistance.score(tok.getValue(), tokJ.getValue());
                if (!(distItoJ >= matchScore)) continue;
                matchTok = tokJ;
                matchScore = distItoJ;
            }
            if (matchTok == null) continue;
            buf.append(" '" + tok.getValue() + "'~='" + matchTok.getValue() + "': ");
            buf.append(fmt.sprintf(sBag.getWeight(tok)));
            buf.append("*");
            buf.append(fmt.sprintf(tBag.getWeight(matchTok)));
            buf.append("*");
            buf.append(fmt.sprintf(matchScore));
        }
        buf.append("\nscore = " + this.score(s, t));
        return buf.toString();
    }

    public String toString() {
        return "[SoftTFIDF thresh=" + this.tokenMatchThreshold + ";" + this.tokenDistance + "]";
    }

    public static void main(String[] args) {
        SoftTFIDF softTFIDF = new SoftTFIDF(SimpleTokenizer.DEFAULT_TOKENIZER, new JaroWinkler(), 0.9);
        String str1 = "service hotel city locator";
        String str2 = "service country capital";
        String str3 = "country hospital finder";
        ArrayList<BasicStringWrapper> corpus = new ArrayList<BasicStringWrapper>();
        String[] words = new String[]{str1, str2, str3};
        int i = 0;
        while (i < words.length) {
            corpus.add(new BasicStringWrapper(words[i]));
            ++i;
        }
        BasicStringWrapperIterator iterator = new BasicStringWrapperIterator(corpus.iterator());
        softTFIDF.train(iterator);
        System.out.println("### Preparing string ###");
        StringWrapper w1 = softTFIDF.prepare(str1);
        System.out.println("### Preparing string ###");
        StringWrapper w2 = softTFIDF.prepare(str2);
        softTFIDF.score(w1, w2);
        System.out.println(softTFIDF.explainScore(w1, w2));
    }
}

