1 '''
2 This module allows for the calculation of the basic rank metrics that evaluate
3 on a segment level (i.e. one ranking list at a time)
4
5 Created on 18 Dec 2012
6
7 @author: Eleftherios Avramidis
8 '''
9
10 import segment
11 from numpy import average
12 import numpy as np
13
15 kwargs["penalize_predicted_ties"] = False
16 result = kendall_tau_set(predicted_rank_vectors, original_rank_vectors, **kwargs)
17 newresult = {}
18 for key, value in result.iteritems():
19 newkey = key.replace("tau", "tau-nt")
20 newresult[newkey] = value
21 return newresult
22
23 -def kendall_tau_set(predicted_rank_vectors, original_rank_vectors, **kwargs):
24 """
25 This is the refined calculation of set-level Kendall tau of predicted vs human ranking according to WMT12 (Birch et. al 2012)
26 It returns both set-level Kendall tau and average segment-level Kendall tau
27 @param predicted_rank_vectors: a list of lists containing integers representing the predicted ranks, one ranking for each segment
28 @type predicted_rank_vectors: [Ranking, ..]
29 @param original_rank_vectors: a list of the names of the attribute containing the human rank, one ranking for each segment
30 @type original_rank_vectors: [Ranking, ..]
31 @return: overall Kendall tau score,
32 - average segment Kendall tau score,
33 - the probability for the null hypothesis of X and Y being independent
34 - the count of concordant pairs,
35 - the count of discordant pairs,
36 - the count of pairs used for calculating tau (excluding "invalid" pairs)
37 - the count of original ties,
38 - the count of predicted ties,
39 - the count of all pairs
40 @rtype: {string:float, string:float, string:int, string:int, string:int, string:int, string:int, string:int}
41
42 """
43 segtaus = []
44 segprobs = []
45
46 concordant = 0
47 discordant = 0
48 valid_pairs = 0
49 original_ties_overall = 0
50 predicted_ties_overall = 0
51 pairs_overall = 0
52 sentences_with_ties = 0
53
54 for predicted_rank_vector, original_rank_vector in zip(predicted_rank_vectors, original_rank_vectors):
55
56
57 segtau, segprob, concordant_count, discordant_count, all_pairs_count, original_ties, predicted_ties, pairs = segment.kendall_tau(predicted_rank_vector, original_rank_vector, **kwargs)
58
59 if segtau and segprob:
60 segtaus.append(segtau)
61 segprobs.append(segprob)
62
63 concordant += concordant_count
64 discordant += discordant_count
65 valid_pairs += all_pairs_count
66
67 original_ties_overall += original_ties
68 predicted_ties_overall += predicted_ties
69 if predicted_ties > 0:
70 sentences_with_ties += 1
71 pairs_overall += pairs
72
73
74 tau = 1.00 * (concordant - discordant) / (concordant + discordant)
75 prob = segment.kendall_tau_prob(tau, valid_pairs)
76
77 avg_seg_tau = np.average(segtaus)
78 avg_seg_prob = np.product(segprobs)
79
80 predicted_ties_avg = 100.00*predicted_ties / pairs_overall
81 sentence_ties_avg = 100.00*sentences_with_ties / len(predicted_rank_vector)
82
83 stats = {'tau': tau,
84 'tau_prob': prob,
85 'tau_avg_seg': avg_seg_tau,
86 'tau_avg_seg_prob': avg_seg_prob,
87 'tau_concordant': concordant,
88 'tau_discordant': discordant,
89 'tau_valid_pairs': valid_pairs,
90 'tau_all_pairs': pairs_overall,
91 'tau_original_ties': original_ties_overall,
92 'tau_predicted_ties': predicted_ties_overall,
93 'tau_predicted_ties_per': predicted_ties_avg,
94 'tau_sentence_ties': sentences_with_ties,
95 'tau_sentence_ties_per' : sentence_ties_avg
96
97 }
98
99 return stats
100
101
102 -def mrr(predicted_rank_vectors, original_rank_vectors, **kwargs):
103 """
104 Calculation of mean reciprocal rank based on Radev et. all (2002)
105 @param predicted_rank_vectors: a list of lists containing integers representing the predicted ranks, one ranking for each segment
106 @type predicted_rank_vectors: [Ranking, ..]
107 @param original_rank_vectors: a list of the names of the attribute containing the human rank, one ranking for each segment
108 @type original_rank_vectors: [Ranking, ..]
109 @return: mean reciprocal rank
110 @rtype: {string, float}
111 """
112 reciprocal_ranks = []
113
114 for predicted_rank_vector, original_rank_vector in zip(predicted_rank_vectors, original_rank_vectors):
115 reciprocal_rank = segment.reciprocal_rank(predicted_rank_vector, original_rank_vector)
116 reciprocal_ranks.append(reciprocal_rank)
117
118 return {'mrr' : average(reciprocal_ranks)}
119
120
122 """
123 For each sentence, the item selected as best by our system, may have been ranked lower by the humans. This
124 statistic counts how many times the item predicted as best has fallen into each of the human ranks.
125 This is useful for plotting.
126 @param predicted_rank_vectors: a list of lists containing integers representing the predicted ranks, one ranking for each segment
127 @type predicted_rank_vectors: [Ranking, ..]
128 @param original_rank_vectors: a list of the names of the attribute containing the human rank, one ranking for each segment
129 @type original_rank_vectors: [Ranking, ..]
130 @return: a dictionary with percentages for each human rank
131 @rtype: {string, float}
132 """
133 actual_values_of_best_predicted = {}
134 for predicted_rank_vector, original_rank_vector in zip(predicted_rank_vectors, original_rank_vectors):
135
136
137 predicted_rank_vector = predicted_rank_vector.normalize()
138 original_rank_vector = original_rank_vector.normalize()
139 if not predicted_rank_vector:
140 continue
141 best_predicted_rank = min(predicted_rank_vector)
142
143 original_ranks = []
144 for original_rank, predicted_rank in zip(original_rank_vector, predicted_rank_vector):
145 if predicted_rank == best_predicted_rank:
146 original_ranks.append(original_rank)
147
148
149 selected_original_rank = max(original_ranks)
150 a = actual_values_of_best_predicted.setdefault(selected_original_rank, 0)
151 actual_values_of_best_predicted[selected_original_rank] = a + 1
152
153 n = len(predicted_rank_vectors)
154 percentages = {}
155 total = 0
156
157 for rank, counts in actual_values_of_best_predicted.iteritems():
158 percentages["bph_" + str(rank)] = round(100.00 * counts / n , 2 )
159 total += counts
160 return percentages
161
163
164 """
165 It will provide the average human rank of the item chosen by the system as best
166 @param predicted_rank_vectors: a list of lists containing integers representing the predicted ranks, one ranking for each segment
167 @type predicted_rank_vectors: [Ranking, ..]
168 @param original_rank_vectors: a list of the names of the attribute containing the human rank, one ranking for each segment
169 @type original_rank_vectors: [Ranking, ..]
170 @return: a dictionary with the name of the metric and its value
171 @rtype: {string, float}
172 """
173
174 original_ranks = []
175
176 for predicted_rank_vector, original_rank_vector in zip(predicted_rank_vectors, original_rank_vectors):
177
178
179 predicted_rank_vector = predicted_rank_vector.normalize(ties='ceiling')
180 original_rank_vector = original_rank_vector.normalize(ties='ceiling')
181
182 best_predicted_rank = min(predicted_rank_vector)
183 mapped_original_ranks = []
184
185 for original_rank, predicted_rank in zip(original_rank_vector, predicted_rank_vector):
186 if predicted_rank == best_predicted_rank:
187 mapped_original_ranks.append(original_rank)
188
189
190 original_ranks.append(max(mapped_original_ranks))
191
192 return {'avg_predicted_ranked': average(original_ranks)}
193
194
195
196
197 -def avg_ndgc_err(predicted_rank_vectors, original_rank_vectors, **kwargs):
198 """
199 Returns normalize Discounted Cumulative Gain and the Expected Reciprocal Rank, both averaged over number of sentences
200 @param predicted_rank_vectors: a list of lists containing integers representing the predicted ranks, one ranking for each segment
201 @type predicted_rank_vectors: [Ranking, ..]
202 @param original_rank_vectors: a list of the names of the attribute containing the human rank, one ranking for each segment
203 @type original_rank_vectors: [Ranking, ..]
204 @keyword k: cut-off passed to the segment L{ndgc_err} function
205 @type k: int
206 @return: a dictionary with the name of the metric and the respective result
207 @rtype: {string, float}
208 """
209 ndgc_list = []
210 err_list = []
211 for predicted_rank_vector, original_rank_vector in zip(predicted_rank_vectors, original_rank_vectors):
212 k = kwargs.setdefault('k', len(predicted_rank_vector))
213 ndgc, err = segment.ndgc_err(predicted_rank_vector, original_rank_vector, k)
214 ndgc_list.append(ndgc)
215 err_list.append(err)
216 avg_ndgc = average(ndgc_list)
217 avg_err = average(err_list)
218 return {'ndgc':avg_ndgc, 'err':avg_err}
219
220
221 -def allmetrics(predicted_rank_vectors, original_rank_vectors, **kwargs):
228
229
230
231
232
233
234
235
236
237
238