Formatting

This commit is contained in:
reshinthadithyan 2021-07-04 14:02:29 +05:30
parent 4b5cd33de5
commit 1cff3f903e
2 changed files with 102 additions and 86 deletions

View File

@ -28,98 +28,106 @@ import math
def _get_ngrams(segment, max_order): def _get_ngrams(segment, max_order):
"""Extracts all n-grams upto a given maximum order from an input segment. """Extracts all n-grams upto a given maximum order from an input segment.
Args: Args:
segment: text segment from which n-grams will be extracted. segment: text segment from which n-grams will be extracted.
max_order: maximum length in tokens of the n-grams returned by this max_order: maximum length in tokens of the n-grams returned by this
methods. methods.
Returns: Returns:
The Counter containing all n-grams upto max_order in segment The Counter containing all n-grams upto max_order in segment
with a count of how many times each n-gram occurred. with a count of how many times each n-gram occurred.
""" """
ngram_counts = collections.Counter() ngram_counts = collections.Counter()
for order in range(1, max_order + 1): for order in range(1, max_order + 1):
for i in range(0, len(segment) - order + 1): for i in range(0, len(segment) - order + 1):
ngram = tuple(segment[i:i+order]) ngram = tuple(segment[i : i + order])
ngram_counts[ngram] += 1 ngram_counts[ngram] += 1
return ngram_counts return ngram_counts
def compute_bleu(reference_corpus, translation_corpus, max_order=4, def compute_bleu(reference_corpus, translation_corpus, max_order=4, smooth=True):
smooth=True): """Computes BLEU score of translated segments against one or more references.
"""Computes BLEU score of translated segments against one or more references.
Args: Args:
reference_corpus: list of lists of references for each translation. Each reference_corpus: list of lists of references for each translation. Each
reference should be tokenized into a list of tokens. reference should be tokenized into a list of tokens.
translation_corpus: list of translations to score. Each translation translation_corpus: list of translations to score. Each translation
should be tokenized into a list of tokens. should be tokenized into a list of tokens.
max_order: Maximum n-gram order to use when computing BLEU score. max_order: Maximum n-gram order to use when computing BLEU score.
smooth: Whether or not to apply Lin et al. 2004 smoothing. smooth: Whether or not to apply Lin et al. 2004 smoothing.
Returns: Returns:
3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram 3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
precisions and brevity penalty. precisions and brevity penalty.
""" """
matches_by_order = [0] * max_order matches_by_order = [0] * max_order
possible_matches_by_order = [0] * max_order possible_matches_by_order = [0] * max_order
reference_length = 0 reference_length = 0
translation_length = 0 translation_length = 0
for (references, translation) in zip(reference_corpus, for (references, translation) in zip(reference_corpus, translation_corpus):
translation_corpus): reference_length += min(len(r) for r in references)
reference_length += min(len(r) for r in references) translation_length += len(translation)
translation_length += len(translation)
merged_ref_ngram_counts = collections.Counter() merged_ref_ngram_counts = collections.Counter()
for reference in references: for reference in references:
merged_ref_ngram_counts |= _get_ngrams(reference, max_order) merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
translation_ngram_counts = _get_ngrams(translation, max_order) translation_ngram_counts = _get_ngrams(translation, max_order)
overlap = translation_ngram_counts & merged_ref_ngram_counts overlap = translation_ngram_counts & merged_ref_ngram_counts
for ngram in overlap: for ngram in overlap:
matches_by_order[len(ngram)-1] += overlap[ngram] matches_by_order[len(ngram) - 1] += overlap[ngram]
for order in range(1, max_order+1): for order in range(1, max_order + 1):
possible_matches = len(translation) - order + 1 possible_matches = len(translation) - order + 1
if possible_matches > 0: if possible_matches > 0:
possible_matches_by_order[order-1] += possible_matches possible_matches_by_order[order - 1] += possible_matches
precisions = [0] * max_order precisions = [0] * max_order
for i in range(0, max_order): for i in range(0, max_order):
if smooth: if smooth:
precisions[i] = ((matches_by_order[i] + 1.) / precisions[i] = (matches_by_order[i] + 1.0) / (
(possible_matches_by_order[i] + 1.)) possible_matches_by_order[i] + 1.0
)
else:
if possible_matches_by_order[i] > 0:
precisions[i] = (
float(matches_by_order[i]) / possible_matches_by_order[i]
)
else:
precisions[i] = 0.0
if min(precisions) > 0:
p_log_sum = sum((1.0 / max_order) * math.log(p) for p in precisions)
geo_mean = math.exp(p_log_sum)
else: else:
if possible_matches_by_order[i] > 0: geo_mean = 0
precisions[i] = (float(matches_by_order[i]) /
possible_matches_by_order[i])
else:
precisions[i] = 0.0
if min(precisions) > 0: ratio = float(translation_length) / reference_length
p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
geo_mean = math.exp(p_log_sum)
else:
geo_mean = 0
ratio = float(translation_length) / reference_length if ratio > 1.0:
bp = 1.0
else:
bp = math.exp(1 - 1.0 / ratio)
bleu = geo_mean * bp
bleu_score_dict = {
"bleu": bleu,
"precision": precisions,
"bp": bp,
"ratio": ratio,
"trans_len": translation_length,
"ref_len": reference_length,
}
return bleu_score_dict # (bleu, precisions, bp, ratio, translation_length, reference_length)
if ratio > 1.0:
bp = 1.
else:
bp = math.exp(1 - 1. / ratio)
bleu = geo_mean * bp
print(geo_mean)
bleu_score_dict = {"bleu":bleu,"precision":precisions,"bp":bp,"ratio":ratio,"trans_len":translation_length,"ref_len":reference_length}
return bleu_score_dict#(bleu, precisions, bp, ratio, translation_length, reference_length)
def bleu_test_case(): def bleu_test_case():
"""A simple functionality test case to evaluate BLEU""" """A simple functionality test case to evaluate BLEU"""
generated = [[["a","=","b","\n","y","=","a","+","1"]]] generated = [[["a", "=", "b", "\n", "y", "=", "a", "+", "1"]]]
reference = [["a","=","b","\n","print","a"]] reference = [["a", "=", "b", "\n", "print", "a"]]
score_dict = compute_bleu(generated,reference,smooth=False) score_dict = compute_bleu(generated, reference, smooth=False)
return score_dict return score_dict
if __name__ == "__main__": if __name__ == "__main__":
score_dict = bleu_test_case() score_dict = bleu_test_case()
print(score_dict) print(score_dict)

View File

@ -1,7 +1,7 @@
from metrics.bleu import compute_bleu from metrics.bleu import compute_bleu
def compute_exact_match(references,generated)->float: def compute_exact_match(references, generated) -> float:
""" """
Computes Exact Match Accuracy. Computes Exact Match Accuracy.
args: args:
@ -12,15 +12,19 @@ def compute_exact_match(references,generated)->float:
returns: returns:
exact_match_accuracy : Float exact_match_accuracy : Float
""" """
assert(len(references[0])==len(generated),"Number of Samples should be equal in References and Synthesized Outputs..") assert (
len(references[0]) == len(generated),
"Number of Samples should be equal in References and Synthesized Outputs..",
)
exact_match_count = 0.0 exact_match_count = 0.0
for gen,ref in zip(generated, references[0]): for gen, ref in zip(generated, references[0]):
if gen == ref: if gen == ref:
exact_match_count += 1 exact_match_count += 1
exact_match_acc = exact_match_count/len(generated) exact_match_acc = exact_match_count / len(generated)
return exact_match_acc return exact_match_acc
def compute_metrics(references,generated) -> dict:
def compute_metrics(references, generated) -> dict:
""" """
Calculates various metrics and returns the calculated dict of these matrics. Calculates various metrics and returns the calculated dict of these matrics.
args: args:
@ -31,8 +35,12 @@ def compute_metrics(references,generated) -> dict:
returns: returns:
A dicitonary with different metrics intact. A dicitonary with different metrics intact.
""" """
metrics_dict = {} #Update as in new metrics are added over here. metrics_dict = {
metrics_dict["smoothed_bleu_4"] = compute_bleu(references,generated,smooth=True) "smoothed_bleu_4": None,
metrics_dict["bleu_4"] = compute_bleu(references,generated,smooth=False) "blue_4": None,
metrics_dict["exact_match_acc"] = compute_exact_match(references,generated) "exact_match_acc": None,
return metrics_dict } # Update as in new metrics are computed.
metrics_dict["smoothed_bleu_4"] = compute_bleu(references, generated, smooth=True)
metrics_dict["bleu_4"] = compute_bleu(references, generated, smooth=False)
metrics_dict["exact_match_acc"] = compute_exact_match(references, generated)
return metrics_dict