Package sentence :: Module ranking
[hide private]
[frames] | no frames]

Source Code for Module sentence.ranking

  1  ''' 
  2  Utility functions and classes for ranking.  
  3  Can be used both in a pythonic or an object-oriented way wrapped in Ranking class 
  4   
  5  Created on 20 Mar 2013 
  6  @author: Eleftherios Avramidis 
  7  ''' 
  8   
9 -def indexes(ranking_list, neededrank):
10 ''' 11 Returns the indexes of the particular ranks in the list 12 @param ranking_list: the list of ranks that will be searched 13 @type ranking_list: list 14 @param rank: a rank value 15 @type rank: float 16 @return: the indexes where the given rank appears 17 @rtype: [int, ...] 18 ''' 19 indexes = [index for index, rank in enumerate(ranking_list) if neededrank==rank] 20 return indexes
21
22 -def _handle_tie(ranking_list, original_rank, modified_rank, ties_handling):
23 ''' Modifies the values of the tied items as specified by the parameters 24 @param ranking_list: the list of ranks 25 @type ranking_list: list 26 @param original_rank: the original rank value 27 @type original_rank: float 28 @param modified_rank: the new normalized rank value that would have been assigned if there was no tie 29 @type modified_rank: float 30 @param ties_handling: A string defining the mode of handling ties. For the description see function normalized() 31 @type: string 32 @return: the new value of the given rank after considering its ties and the value of the rank that the normalization iteration should continue with 33 @rtype: tuple(float, float) 34 ''' 35 count = ranking_list.count(original_rank) 36 if count <= 1: 37 return modified_rank, modified_rank 38 if ties_handling == 'minimize': 39 return modified_rank, modified_rank 40 if ties_handling == 'floor': 41 return modified_rank, modified_rank+count-1 42 if ties_handling == 'ceiling': 43 return modified_rank+count-1, modified_rank+count-1 44 if ties_handling == 'middle': 45 return modified_rank-1 + (count+1.00)/2, modified_rank+count-1 46 return modified_rank, modified_rank
47
48 -def normalize(ranking_list, **kwargs):
49 ''' 50 Convert a messy ranking like [1,3,5,4] to [1,2,4,3] 51 @param ranking_list: the list of ranks that will be normalized 52 @type ranking_list: list 53 @keyword ties: Select how to handle ties. Accepted values are: 54 - 'minimize', which reserves only one rank position for all tied items of the same rank 55 - 'floor', which reserves all rank positions for all tied items of the same rank, but sets their value to the minimum tied rank position 56 - 'ceiling', which reserves all rank positions for all tied items of the same rank, but sets their value to the maximum tied rank position 57 - 'middle', which reserves all rank positions for all tied items of the same rank, but sets their value to the middle of the tied rank positions 58 @type inflate_ties: string 59 @return: a new normalized list of ranks 60 @rtype: [float, ...] 61 ''' 62 ties_handling = kwargs.setdefault('ties', 'minimize') 63 64 length = len(ranking_list) 65 66 #create an empty ranking list 67 normalized_rank = [0]*length 68 new_rank = 0 69 #iterate through the ordered rank values in the list 70 for original_rank in sorted(set(ranking_list)): 71 #this is incrementing the actual order of the rank 72 new_rank += 1 73 #find the positions where this particular rank value appears 74 rank_indexes = indexes(ranking_list, original_rank) 75 #check if this particular rank value is tied and get the new rank value according to the tie handling preferences 76 new_rank, next_rank = _handle_tie(ranking_list, original_rank, new_rank, ties_handling) 77 #assign the new rank value to the respective position of the new ranking list 78 for rank_index in rank_indexes: 79 normalized_rank[rank_index] = new_rank 80 #this is needed, if ties existed and the next rank needs to increment in a special way according to the tie handling preferences 81 new_rank = next_rank 82 #make sure that all ranks have been processed 83 assert(normalized_rank.count(0)==0) 84 return normalized_rank
85
86 -def invert(ranking_list, **kwargs):
87 ''' 88 Inverts a ranking list so that the best item becomes worse and vice versa 89 @param ranking_list: the list whose ranks are to be inverted 90 @type ranking_list: [float, ...] 91 @return: the inverted rank list 92 @rtype: [float, ...] 93 ''' 94 inverted_ranking_list = [-1.0*item for item in ranking_list] 95 return normalize(inverted_ranking_list, **kwargs)
96
97 -class Ranking(list):
98 ''' 99 Class that wraps the functionality of a ranking list. It behaves as normal list but also allows additional functions to be performed, that are relevant to ranking 100 @ivar list: the ranking 101 @rtype list: [float, ...] 102 @ivar normalization: describes what kind of normalization has been been performed to the internal list 103 @rtype normalization: string 104 ''' 105
106 - def __init__(self, ranking, **kwargs):
107 ''' 108 @param ranking: a list of values representing a ranking 109 @type ranking: list of floats, integers or strings 110 ''' 111 #convert to float, in order to support intermediate positions 112 113 integers = kwargs.setdefault('integers', False) 114 115 for i in ranking: 116 if not integers: 117 self.append(float(i)) 118 else: 119 self.append(int(round(float(i),0))) 120 self.normalization = kwargs.setdefault('normalization', 'unknown')
121
122 - def __setitem__(self, key, value):
123 self.normalization = 'unknown' 124 super(Ranking, self).__setitem__(key, float(value))
125 126
127 - def __delitem__(self, key):
128 self.normalization = 'unknown' 129 super(Ranking, self).__delitem__(key)
130 131
132 - def normalize(self, **kwargs):
133 ''' 134 Create a new normaliyed ranking out of a messy ranking like [1,3,5,4] to [1,2,4,3] 135 @keyword ties: Select how to handle ties. Accepted values are: 136 - 'minimize', which reserves only one rank position for all tied items of the same rank 137 - 'floor', which reserves all rank positions for all tied items of the same rank, but sets their value to the minimum tied rank position 138 - 'ceiling', which reserves all rank positions for all tied items of the same rank, but sets their value to the maximum tied rank position 139 - 'middle', which reserves all rank positions for all tied items of the same rank, but sets their value to the middle of the tied rank positions 140 @type inflate_ties: boolean 141 @return a new normalized ranking 142 @rtype Ranking 143 ''' 144 ties_handling = kwargs.setdefault('ties', 'minimize') 145 return Ranking(normalize(self, ties=ties_handling), normalization=ties_handling)
146
147 - def indexes(self, neededrank):
148 ''' 149 Returns the indexes of the particular ranks in the list 150 @param ranking_list: the list of ranks that will be searched 151 @type ranking_list: list 152 @param rank: a rank value 153 @type rank: float 154 @return: the indexes where the given rank appears 155 @rtype: [int, ...] 156 ''' 157 return indexes(self, neededrank)
158
159 - def inverse(self, **kwargs):
160 ''' 161 Created an inverted ranking, so that the best item becomes worse 162 @keyword ties: Select how to handle ties. Accepted values are: 163 - 'minimize', which reserves only one rank position for all tied items of the same rank 164 - 'floor', which reserves all rank positions for all tied items of the same rank, but sets their value to the minimum tied rank position 165 - 'ceiling', which reserves all rank positions for all tied items of the same rank, but sets their value to the maximum tied rank position 166 - 'middle', which reserves all rank positions for all tied items of the same rank, but sets their value to the middle of the tied rank positions 167 @return: the inverted ranking 168 @rtype: Ranking 169 ''' 170 ties_handling = kwargs.setdefault('ties', 'minimize') 171 return Ranking(invert(self, ties=ties_handling), normalization=ties_handling)
172
173 - def integers(self):
174 ''' 175 Return a version of the ranking, only with integers. It would be nice if the Ranking is normalized 176 @return: a new ranking with integers 177 @rtype: Ranking 178 ''' 179 180 return Ranking(self, integers=True)
181 182 183 184 # 185 #if __name__ == '__main__': 186 # r = Ranking([1,2,3,2.2,4]) 187 # r[0] = '0' 188 ## 189 # print r.normalize(ties='ceiling').integers() 190 ## 191 # 192 # 193