1 '''
2 Created on Aug 4, 2011
3
4 @author: Eleftherios Avramidis
5 '''
6
7
8 from sentence.dataset import DataSet
9
10
12 '''
13 Abstract base class to describe the basic functionality of a reader, i.e. a mechanism that can
14 import data from external entities (e.g. files) for use with the framework.
15 '''
16
17
18 - def __init__(self, input_xml_filename, load = True):
19 """
20 Constructor. Creates a memory object that handles file data
21 @param input_xml_filename: the name of file
22 @type input_xml_filename: string
23 @param load: by turning this option to false, the instance will be
24 initialized without loading everything into memory. This can be
25 done later by calling .load() function
26 @type load: boolean
27 """
28 self.input_filename = input_xml_filename
29 self.loaded = load
30 if load:
31 self.load()
32
33
34
36 raise NotImplementedError( "Should have implemented this" )
37
38
40 raise NotImplementedError( "Should have implemented this" )
41
42
44 raise NotImplementedError( "Should have implemented this" )
45
47 """
48 Returns the contents of the parsed file into an object structure, which is represented by the DataSet object
49 Note that this will cause all the data of the file to be loaded into system memory at once.
50 For big data sets this may not be optimal, so consider sentence-by-sentence reading with SAX, or CElementTree (e.g. saxjcml.py)
51 @return the formed data set
52 @rtype DataSet
53 """
54 return DataSet(self.get_parallelsentences())
55
57 """
58 Returns the contents of the parsed file into an a list with
59 ParallelSentence objects. Note that this will cause all the data of the file to be loaded into system memory at once.
60 For big data sets this may not be optimal, so consider sentence-by-sentence reading with SAX or CElementTree (e.g. saxjcml.py)
61 @return the list of parallel sentences
62 @rtype [ParallelSentence, ...]
63 """
64 raise NotImplementedError( "Should have implemented this" )
65