diff --git a/bertserini/experiments/eval/evaluate_v1_cmrc.py b/bertserini/experiments/eval/evaluate_v1_cmrc.py index 0f3ff32..4556d15 100755 --- a/bertserini/experiments/eval/evaluate_v1_cmrc.py +++ b/bertserini/experiments/eval/evaluate_v1_cmrc.py @@ -17,6 +17,9 @@ #from utils import init_logger #logger = init_logger("evaluation") +#install punkt corpus +nltk.download('punkt') + # split Chinese with English def mixed_segmentation(in_str, rm_punc=False): in_str = str(in_str).lower().strip() diff --git a/docs/experiments-cmrc.md b/docs/experiments-cmrc.md index ab63f13..c551260 100644 --- a/docs/experiments-cmrc.md +++ b/docs/experiments-cmrc.md @@ -58,7 +58,7 @@ python -m bertserini.experiments.inference --dataset_path data/cmrc2018_dev_squa ``` mkdir temp -pyhton -m bertserini.experiments.evaluate --eval_data data/cmrc2018_dev.json \ +python -m bertserini.experiments.evaluate --eval_data data/cmrc2018_dev.json \ --search_file prediction/cmrc2018_pred.json \ --output_path temp \ --dataset cmrc @@ -70,4 +70,4 @@ Expected results: ``` ## rsvp-ai/bertserini-bert-base-cmrc, this is bert-base-chinese finetuned on the chinese reading comprehension dataset(CMRC) (0.5, {'f1_score': 68.0033167812909, 'exact_match': 51.164958061509786, 'total_count': 3219, 'skip_count': 1}) -``` \ No newline at end of file +``` diff --git a/docs/experiments-squad.md b/docs/experiments-squad.md index 105b67d..65a275b 100644 --- a/docs/experiments-squad.md +++ b/docs/experiments-squad.md @@ -63,7 +63,7 @@ python -m bertserini.experiments.inference --dataset_path data/dev-v1.1.json \ ``` mkdir temp -pyhton -m bertserini.experiments.evaluate --eval_data data/dev-v1.1.json \ +python -m bertserini.experiments.evaluate --eval_data data/dev-v1.1.json \ --search_file prediction/squad_bert_large_pred.json \ --output_path temp \ --dataset squad @@ -76,4 +76,4 @@ Expected results: ## rsvp-ai/bertserini-bert-base-squad, this is finetuned based on bert-base-uncased (0.5, {'exact_match': 40.179754020813625, 'f1': 47.828056659017584, 'recall': 49.517951036176, 'precision': 48.3495034100538, 'cover': 45.50614947965941, 'overlap': 56.20624408703879}) -``` \ No newline at end of file +```