My rouge score does not similar to what you show :(

import argparse import os import rouge

def compute_rouge(args):

truth_files = []
for r, d, f in os.walk(args.truth):
    for file in f:
        truth_files.append(os.path.join(r, file))
label = []
for f in truth_files:
    text = ''
    with open(f,'r') as file:
        label.append(' '.join(file.read().strip().split('\n')))

summary_files = []
for r, d, f in os.walk(args.summary):
    for file in f:
        summary_files.append(os.path.join(r, file))
pred = []
for f in summary_files:
    text = ''
    with open(f,'r') as file:
        pred.append(' '.join(file.read().strip().split('\n')))
print(truth_files[0],summary_files[0])
print(label[0])
print(pred[0])
for aggregator in ['Avg', 'Best']:
    print('Evaluation with {}'.format(aggregator))
    apply_avg = aggregator == 'Avg'
    apply_best = aggregator == 'Best'
    evaluator = rouge.Rouge(metrics=['rouge-n', 'rouge-l', 'rouge-w'],
                            max_n=4,
                            limit_length=True,
                            length_limit=400,
                            length_limit_type='words',
                            apply_avg=apply_avg,
                            apply_best=apply_best,
                            alpha=0.5, # Default F1_score
                            weight_factor=1.2,
                            stemming=True)

    scores = evaluator.get_scores(pred, label)

    print('rouge-1 : {} , rouge-2 : {} , rouge-l : {}'.format(scores['rouge-1']['f'],scores['rouge-2']['f'],scores['rouge-l']['f']))

lipiji / TranSummar

My rouge score does not similar to what you show :( #14