|
|
|
data_fetch(external_file,
output_file)
Fetch training file and place it comfortably in the working directory
Files are expected to contain the set name, followed by the ending
.jcml |
source code
|
|
|
|
|
|
|
|
|
|
|
|
|
features_langtool(input_file,
output_file,
language) |
source code
|
|
|
|
|
|
|
|
|
features_berkeley(input_file,
output_file,
language)
Parsing |
source code
|
|
|
|
|
|
|
|
|
|
|
|
|
truecase(input_file,
output_file,
language,
model) |
source code
|
|
|
|
|
|
|
|
|
|
|
features_lm(input_file,
output_file,
language,
lm_name) |
source code
|
|
|
features_lm_batch(input_file,
output_file,
language,
lm_name) |
source code
|
|
|
features_lm_single(input_file,
output_file,
language,
lm_url,
lm_tokenize,
lm_lowercase) |
source code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cfg = bootstrap.get_cfg()
|
|
gateway = cfg.java_init()
|
|
cores = int(cfg.get("general", "cores"))
|
|
parallel_feature_functions = []
|
|
path = cfg.get_path()
|
|
source_language = cfg.get("general", "source_language")
|
|
target_language = cfg.get("general", "target_language")
|
|
training_sets = cfg.get("training", "filenames").split(",")
|
|
testing_set = cfg.get("testing", "filename")
|
|
all_sets = cfg.get("training", "filenames").split(",")
|
|
params = []
|
|
annotated_filenames = []
|