/**
 * SimMetrics - SimMetrics is a java library of Similarity or Distance
 * Metrics, e.g. Levenshtein Distance, that provide float based similarity
 * measures between String Data. All metrics return consistant measures
 * rather than unbounded similarity scores.
 *
 * Copyright (C) 2005 Sam Chapman - Open Source Release v1.1
 *
 * Please Feel free to contact me about this library, I would appreciate
 * knowing quickly what you wish to use it for and any criticisms/comments
 * upon the SimMetric library.
 *
 * email:       s.chapman@dcs.shef.ac.uk
 * www:         http://www.dcs.shef.ac.uk/~sam/
 * www:         http://www.dcs.shef.ac.uk/~sam/stringmetrics.html
 *
 * address:     Sam Chapman,
 *              Department of Computer Science,
 *              University of Sheffield,
 *              Sheffield,
 *              S. Yorks,
 *              S1 4DP
 *              United Kingdom,
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
 * for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

package simmetrics.similaritymetrics.costfunctions;


import java.util.HashSet;
import java.util.Set;
import java.io.Serializable;

import simmetrics.api.AbstractSubstitutionCost;

/**
 * Package: costfunctions
 * Description: SubCost5_3_Minus3 implements a cost function as used in Monge Elkan where by an exact match
 * no match or an approximate match whereby a set of characters are in an approximate range.
 * for pairings in {dt} {gj} {lr} {mn} {bpv} {aeiou} {,.}

 * Date: 30-Mar-2004
 * Time: 09:45:19
 * @author Sam Chapman
 * @version 1.1
 */
public final class SubCost5_3_Minus3 extends AbstractSubstitutionCost implements Serializable {

    /**
     * return score.
     */
    private static final int CHAR_EXACT_MATCH_SCORE = +5;

    /**
     * return score.
     */
    private static final int CHAR_APPROX_MATCH_SCORE = +3;

    /**
     * return score.
     */
    private static final int CHAR_MISMATCH_MATCH_SCORE = -3;

    /**
     * approximate charcater set.
     */
    static private final Set[] approx;

    /**
     * approximate match = +3,
     * for pairings in {dt} {gj} {lr} {mn} {bpv} {aeiou} {,.}.
     */
    static {
        approx = new Set[7];
        approx[0] = new HashSet();
        approx[0].add(new Character('d'));
        approx[0].add(new Character('t'));
        approx[1] = new HashSet();
        approx[1].add(new Character('g'));
        approx[1].add(new Character('j'));
        approx[2] = new HashSet();
        approx[2].add(new Character('l'));
        approx[2].add(new Character('r'));
        approx[3] = new HashSet();
        approx[3].add(new Character('m'));
        approx[3].add(new Character('n'));
        approx[4] = new HashSet();
        approx[4].add(new Character('b'));
        approx[4].add(new Character('p'));
        approx[4].add(new Character('v'));
        approx[5] = new HashSet();
        approx[5].add(new Character('a'));
        approx[5].add(new Character('e'));
        approx[5].add(new Character('i'));
        approx[5].add(new Character('o'));
        approx[5].add(new Character('u'));
        approx[6] = new HashSet();
        approx[6].add(new Character(','));
        approx[6].add(new Character('.'));
    }

    /**
     * returns the name of the cost function.
     *
     * @return the name of the cost function
     */
    public final String getShortDescriptionString() {
        return "SubCost5_3_Minus3";
    }

    /**
     * get cost between characters where
     * d(i,j) = CHAR_EXACT_MATCH_SCORE if i equals j,
     * CHAR_APPROX_MATCH_SCORE if i approximately equals j or
     * CHAR_MISMATCH_MATCH_SCORE if i does not equal j.
     *
     * @param str1         - the string1 to evaluate the cost
     * @param string1Index - the index within the string1 to test
     * @param str2         - the string2 to evaluate the cost
     * @param string2Index - the index within the string2 to test
     * @return the cost of a given subsitution d(i,j) as defined above
     */
    public final float getCost(final String str1, final int string1Index, final String str2, final int string2Index) {
        //check within range
        if (str1.length() <= string1Index || string1Index < 0) {
            return CHAR_MISMATCH_MATCH_SCORE;
        }
        if (str2.length() <= string2Index || string2Index < 0) {
            return CHAR_MISMATCH_MATCH_SCORE;
        }

        if (str1.charAt(string1Index) == str2.charAt(string2Index)) {
            return CHAR_EXACT_MATCH_SCORE;
        } else {
            //check for approximate match
            final Character si = new Character(Character.toLowerCase(str1.charAt(string1Index)));
            final Character ti = new Character(Character.toLowerCase(str2.charAt(string2Index)));
            for (int i = 0; i < approx.length; i++) {
                if (approx[i].contains(si) && approx[i].contains(ti))
                    return CHAR_APPROX_MATCH_SCORE;
            }
            return CHAR_MISMATCH_MATCH_SCORE;
        }
    }

    /**
     * returns the maximum possible cost.
     *
     * @return the maximum possible cost
     */
    public final float getMaxCost() {
        return CHAR_EXACT_MATCH_SCORE;
    }

    /**
     * returns the minimum possible cost.
     *
     * @return the minimum possible cost
     */
    public final float getMinCost() {
        return CHAR_MISMATCH_MATCH_SCORE;
    }
}


