/*
 * Decompiled with CFR 0.152.
 */
package de.dfki.s2m2de.textsim;

import de.dfki.s2m2de.expression.ExpressionEvaluationException;
import de.dfki.s2m2de.textsim.Index;
import de.dfki.s2m2de.textsim.IndexException;
import de.dfki.s2m2de.textsim.TextSimilarityMeasure;
import de.dfki.s2m2de.textsim.lucene.LuceneMemoryIndex;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class Cosine
extends TextSimilarityMeasure {
    protected Index index;

    public Index getIndex() {
        return this.index;
    }

    public Cosine(Index index) {
        this.index = index;
    }

    @Override
    public Double evaluate(String entity1, String entity2) throws ExpressionEvaluationException {
        try {
            Vector<String> terms1 = this.index.tokenize(entity1);
            Vector<String> terms2 = this.index.tokenize(entity2);
            if (terms1.isEmpty() && terms2.isEmpty()) {
                return 1.0;
            }
            if (terms1.isEmpty() || terms2.isEmpty()) {
                return 0.0;
            }
            HashMap<String, Integer> aTF1 = new HashMap<String, Integer>();
            for (String term : terms1) {
                if (!aTF1.containsKey(term)) {
                    aTF1.put(term, 1);
                    continue;
                }
                aTF1.put(term, (Integer)aTF1.get(term) + 1);
            }
            double maxATF1 = ((Integer)Collections.max(aTF1.values())).intValue();
            HashMap<String, Integer> aTF2 = new HashMap<String, Integer>();
            for (String term : terms2) {
                if (!aTF2.containsKey(term)) {
                    aTF2.put(term, 2);
                    continue;
                }
                aTF2.put(term, (Integer)aTF2.get(term) + 2);
            }
            double maxATF2 = ((Integer)Collections.max(aTF2.values())).intValue();
            double N = this.index.getNumberOfDocuments();
            HashMap<String, Double> TFIDF1 = new HashMap<String, Double>();
            for (Map.Entry entry : aTF1.entrySet()) {
                String term = (String)entry.getKey();
                double tf = (double)((Integer)entry.getValue()).intValue() / maxATF1;
                double idf = Math.log(N / (double)this.index.getDocumentFrequency(term));
                TFIDF1.put(term, tf * idf);
            }
            HashMap<String, Double> TFIDF2 = new HashMap<String, Double>();
            for (Map.Entry entry : aTF2.entrySet()) {
                String term = (String)entry.getKey();
                double tf = (double)((Integer)entry.getValue()).intValue() / maxATF2;
                double idf = Math.log(N / (double)this.index.getDocumentFrequency(term));
                TFIDF2.put(term, tf * idf);
            }
            double dot = 0.0;
            for (Map.Entry entry : TFIDF1.entrySet()) {
                Double tfidf2 = (Double)TFIDF2.get(entry.getKey());
                if (tfidf2 == null) continue;
                dot += (Double)entry.getValue() * tfidf2;
            }
            double mag1 = 0.0;
            for (Map.Entry entry : TFIDF1.entrySet()) {
                double tfidf1 = (Double)entry.getValue();
                mag1 += tfidf1 * tfidf1;
            }
            mag1 = Math.sqrt(mag1);
            double mag2 = 0.0;
            for (Map.Entry entry : TFIDF2.entrySet()) {
                double tfidf2 = (Double)entry.getValue();
                mag2 += tfidf2 * tfidf2;
            }
            mag2 = Math.sqrt(mag2);
            return dot / (mag1 * mag2);
        }
        catch (IndexException e) {
            throw new ExpressionEvaluationException("Unable to compute cosine similarity.", e);
        }
    }

    public static void main(String[] args) {
        try {
            LuceneMemoryIndex index = new LuceneMemoryIndex();
            index.addDocument("1", "A test is a way of checking something to see if it is true, or false, or if it is edible or not. If something can be tested, or finishes the tests correctly, it is testable.");
            index.addDocument("2", "The Turing test is a test to see if a computer can trick a person into believing that the computer is a person too. Alan Turing thought that if a human couldn't tell the difference between another human and the computer, then that computer must be as intelligent as a human.");
            index.addDocument("3", "Computer science is the science of how to treat information. There are many different areas in computer science. Some of the areas consider problems in a more abstract way. Some areas need special machines, called computers. A computer programmer will often need math, science, and logic in order to make and use computers.");
            Cosine cos = new Cosine(index);
            System.out.println("Search term \"test\"");
            System.out.println("sim 1: " + cos.evaluate("test", "A test is a way of checking something to see if it is true, or false, or if it is edible or not. If something can be tested, or finishes the tests correctly, it is testable."));
            System.out.println("sim 2: " + cos.evaluate("test", "The Turing test is a test to see if a computer can trick a person into believing that the computer is a person too. Alan Turing thought that if a human couldn't tell the difference between another human and the computer, then that computer must be as intelligent as a human."));
            System.out.println("sim 3: " + cos.evaluate("test", "Computer science is the science of how to treat information. There are many different areas in computer science. Some of the areas consider problems in a more abstract way. Some areas need special machines, called computers. A computer programmer will often need math, science, and logic in order to make and use computers."));
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void decompile(Document targetDoc, Element parentElem) {
        Element elem = targetDoc.createElement("cosine");
        parentElem.appendChild(elem);
    }
}

