1 '''
2 This module allows for the calculation of the basic rank metrics that evaluate
3 on a segment level (i.e. one ranking list at a time)
4
5 Created on Nov 25, 2012
6
7 @author: Eleftherios Avramidis
8 '''
9
10 from math import log
11 import logging
12 from collections import namedtuple
13 from sentence.ranking import Ranking
14
15 """""""""
16 Kendall tau
17 """""""""
18
20 """
21 Calculation of Kendall tau hypothesis testing based on scipy calculation
22 @param tau: already calculated tau coefficient
23 @type tau: float
24 @param pairs: count of pairs
25 @type pairs: int
26 @return: the probability for the null hypothesis of X and Y being independent
27 @rtype: float
28 """
29 import numpy as np
30 from scipy import special
31 try:
32 svar = (4.0 * pairs + 10.0) / (9.0 * pairs * (pairs - 1))
33 except:
34 svar = 1
35 try:
36 z = tau / np.sqrt(svar)
37 except:
38 z = 1
39 return special.erfc(np.abs(z) / 1.4142136)
40
41
42 -def kendall_tau(predicted_rank_vector, original_rank_vector, **kwargs):
43 """
44 This is the refined calculation of segment-level Kendall tau of predicted vs human ranking according to WMT12 (Birch et. al 2012)
45 @param predicted_rank_vector: a list of integers representing the predicted ranks
46 @type predicted_rank_vector: [str, ..]
47 @param original_rank_vector: the name of the attribute containing the human rank
48 @type original_rank_vector: [str, ..]
49 @kwarg ties: way of handling ties, passed to L{sentence.ranking.Ranking} object
50 @type ties: string
51 @return: the Kendall tau score,
52 the probability for the null hypothesis of X and Y being independent
53 the count of concordant pairs,
54 the count of discordant pairs,
55 the count of pairs used for calculating tau (excluding "invalid" pairs)
56 the count of original ties,
57 the count of predicted ties,
58 the count of all pairs
59 @rtype: namedtuple(float, float, int, int, int, int, int, int)
60 """
61 import itertools
62 ties_handling = kwargs.setdefault('ties','ceiling')
63
64
65 predicted_rank_vector = predicted_rank_vector.normalize(ties=ties_handling)
66 original_rank_vector = original_rank_vector.normalize(ties=ties_handling)
67
68 logging.debug("\n* Segment tau *")
69 logging.debug("predicted vector: {}".format(predicted_rank_vector))
70 logging.debug("original vector : {}".format(original_rank_vector))
71
72
73 exclude_ties = kwargs.setdefault("exclude_ties", True)
74
75 penalize_predicted_ties = kwargs.setdefault("penalize_predicted_ties", True)
76 logging.debug("exclude_ties: {}".format(exclude_ties))
77
78
79 if kwargs.setdefault("invert_ranks", False):
80 inv = -1.00
81 else:
82 inv = 1.00
83
84
85 predicted_pairs = [(float(i), float(j)) for i, j in itertools.combinations(predicted_rank_vector, 2)]
86 original_pairs = [(inv*float(i), inv*float(j)) for i, j in itertools.combinations(original_rank_vector, 2)]
87
88 concordant_count = 0
89 discordant_count = 0
90
91 original_ties = 0
92 predicted_ties = 0
93 pairs = 0
94
95
96 for original_pair, predicted_pair in zip(original_pairs, predicted_pairs):
97 original_i, original_j = original_pair
98
99
100 predicted_i, predicted_j = predicted_pair
101
102 logging.debug("%s\t%s", original_pair, predicted_pair)
103
104
105 pairs +=1
106 if original_i == original_j:
107 original_ties +=1
108 if predicted_i == predicted_j:
109 predicted_ties += 1
110
111
112 if original_i == -1 or original_j == -1:
113 pass
114
115 elif original_i == original_j and exclude_ties:
116 pass
117
118 elif (original_i > original_j and predicted_i > predicted_j) \
119 or (original_i < original_j and predicted_i < predicted_j) \
120 or (original_i == original_j and predicted_i == predicted_j):
121
122 concordant_count += 1
123 logging.debug("\t\tCON")
124
125 elif (predicted_i == predicted_j and not penalize_predicted_ties):
126 pass
127 else:
128 discordant_count += 1
129 logging.debug("\t\tDIS")
130 all_pairs_count = concordant_count + discordant_count
131
132 logging.debug("original_ties = %d, predicted_ties = %d", original_ties, predicted_ties)
133 logging.debug("conc = %d, disc= %d", concordant_count, discordant_count)
134
135 try:
136 tau = 1.00 * (concordant_count - discordant_count) / all_pairs_count
137 logging.debug("tau = {0} - {1} / {0} + {1}".format(concordant_count, discordant_count))
138 logging.debug("tau = {0} / {1}".format(concordant_count - discordant_count, all_pairs_count))
139
140 except ZeroDivisionError:
141 tau = None
142 prob = None
143 else:
144 prob = kendall_tau_prob(tau, all_pairs_count)
145
146 logging.debug("tau = {}, prob = {}\n".format(tau, prob))
147
148
149 Result = namedtuple('Result', ['tau', 'prob', 'concordant_count', 'discordant_count', 'all_pairs_count', 'original_ties', 'predicted_ties', 'pairs'])
150 result = Result(tau, prob, concordant_count, discordant_count, all_pairs_count, original_ties, predicted_ties, pairs)
151
152 return result
153
154
155 """""""""
156 DCG
157 """""""""
158
159 -def _calculate_gains(predicted_rank_vector, original_rank_vector, verbose=True):
160 """
161 Calculate the gain for each one of the predicted ranks
162 @param predicted_rank_vector: list of integers representing the predicted ranks
163 @type predicted_rank_vector: [int, ...]
164 @param original_rank_vector: list of integers containing the original ranks
165 @type original_rank_vector: [int, ...]
166 @return: a list of gains, relevant to the DCG calculation
167 @rtype: [float, ...]
168 """
169
170
171 r = predicted_rank_vector
172 n = len(original_rank_vector)
173
174
175 l = [n-i+1 for i in original_rank_vector]
176
177 expn = 2**n
178 gains = [0]*n
179
180 for j in range(n):
181 gains[r[j]-1] = (2**l[j]-1.0)/expn
182
183 logging.debug("j={}\nr[j]={}\nl[j]={}\n".format(j,r[j],l[j]))
184 logging.debug("gains[{}] = ".format(r[j]-1))
185 logging.debug("\t(2**l[j]-1.0) / 2**n =")
186 logging.debug("\t(2**{}-1.0) / 2**{}=".format(l[j], n))
187 logging.debug("\t{} / {} =".format((2**l[j]-1.0),expn))
188 logging.debug("{}".format((2**l[j]-1.0)/expn))
189 logging.debug("gains = {}".format(gains))
190
191 assert min(gains)>=0, 'Not all ranks present'
192 return gains
193
194
196 """
197 Calculate the Ideal Discounted Cumulative Gain, for the given vector of ranking gains
198 @param gains: a list of integers pointing to the ranks
199 @type gains: [float, ...]
200 @param k: the DCG cut-off
201 @type k: int
202 @return: the calculated Ideal Discounted Cumulative Gain
203 @rtype: float
204 """
205
206 gains.sort()
207
208 gains = gains[::-1]
209 ideal_dcg = sum([g/log(j+2) for (j,g) in enumerate(gains[:k])])
210 return ideal_dcg
211
212
213 -def ndgc_err(predicted_rank_vector, original_rank_vector, k=None):
214 """
215 Calculate the normalize Discounted Cumulative Gain and the Expected Reciprocal Rank on a sentence level
216 This follows the definition of U{DCG<http://en.wikipedia.org/wiki/Discounted_cumulative_gain#Cumulative_Gain>}
217 and U{ERR<http://research.yahoo.com/files/err.pdf>}, and the implementation of
218 U{Yahoo Learning to Rank challenge<http://learningtorankchallenge.yahoo.com/evaluate.py.txt>}
219 @param predicted_rank_vector: list of integers representing the predicted ranks
220 @type predicted_rank_vector: [int, ...]
221 @param original_rank_vector: list of integers containing the original ranks.
222 @type original_rank_vector: [int, ...]
223 @param k: the cut-off for the calculation of the gains. If not specified, the length of the ranking is used
224 @type k: int
225 @return: a tuple containing the values for the two metrics
226 @rtype: tuple(float,float)
227 """
228
229
230 r = predicted_rank_vector.normalize(ties='ceiling').integers()
231 l = original_rank_vector.normalize(ties='ceiling').integers()
232 n = len(l)
233
234
235 if not k:
236 k = n
237
238
239 assert len(r)==n, 'Expected {} ranks, but got {}.'.format(n,len(r))
240 gains = _calculate_gains(r, l)
241
242
243 p = 1.0
244 err = 0.0
245 for j in range(n):
246 r = gains[j]
247 err += p*r/(j+1.0)
248 p *= 1-r
249
250
251 dcg = sum([g/log(j+2) for (j,g) in enumerate(gains[:k])])
252
253
254 ideal_dcg = idcg(gains, k)
255
256 if ideal_dcg:
257 ndcg = dcg / ideal_dcg
258 else:
259 ndcg = 1.0
260
261 return ndcg, err
262
263
264
265 """
266 Reciprocal rank
267 """
268
269 -def reciprocal_rank(predicted_rank_vector, original_rank_vector, **kwargs):
270 """
271 Calculates the First Answer Reciprocal Rank according to Radev et. al (2002)
272 @param predicted_rank_vector: list of integers representing the predicted ranks
273 @type predicted_rank_vector: [int, ...]
274 @param original_rank_vector: list of integers containing the original ranks.
275 @type original_rank_vector: [int, ...]
276 @kwarg ties: way of handling ties, passed to L{sentence.ranking.Ranking} object
277 @type ties: string
278 @return: the reciprocal rank value
279 @type: float
280 """
281 ties_handling = kwargs.setdefault('ties','ceiling')
282
283 predicted_rank_vector = predicted_rank_vector.normalize(ties=ties_handling)
284 original_rank_vector = original_rank_vector.normalize(ties=ties_handling)
285
286 best_original_rank = min(original_rank_vector)
287 best_original_rank_indexes = original_rank_vector.indexes(best_original_rank)
288
289 predicted_values_for_best_rank = [predicted_rank_vector[index] for index in best_original_rank_indexes]
290
291
292 best_predicted_value_for_best_rank = min(predicted_values_for_best_rank)
293
294 reciprocal_rank = 1.00/best_predicted_value_for_best_rank
295 return reciprocal_rank
296
297
298
299
300
301
302
303
304
305
306
307