mirror of
https://github.com/CodedotAl/gpt-code-clippy.git
synced 2024-10-26 09:17:45 +03:00
Formatting
This commit is contained in:
parent
4b5cd33de5
commit
1cff3f903e
160
metrics/bleu.py
160
metrics/bleu.py
@ -28,98 +28,106 @@ import math
|
|||||||
|
|
||||||
|
|
||||||
def _get_ngrams(segment, max_order):
|
def _get_ngrams(segment, max_order):
|
||||||
"""Extracts all n-grams upto a given maximum order from an input segment.
|
"""Extracts all n-grams upto a given maximum order from an input segment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
segment: text segment from which n-grams will be extracted.
|
segment: text segment from which n-grams will be extracted.
|
||||||
max_order: maximum length in tokens of the n-grams returned by this
|
max_order: maximum length in tokens of the n-grams returned by this
|
||||||
methods.
|
methods.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The Counter containing all n-grams upto max_order in segment
|
The Counter containing all n-grams upto max_order in segment
|
||||||
with a count of how many times each n-gram occurred.
|
with a count of how many times each n-gram occurred.
|
||||||
"""
|
"""
|
||||||
ngram_counts = collections.Counter()
|
ngram_counts = collections.Counter()
|
||||||
for order in range(1, max_order + 1):
|
for order in range(1, max_order + 1):
|
||||||
for i in range(0, len(segment) - order + 1):
|
for i in range(0, len(segment) - order + 1):
|
||||||
ngram = tuple(segment[i:i+order])
|
ngram = tuple(segment[i : i + order])
|
||||||
ngram_counts[ngram] += 1
|
ngram_counts[ngram] += 1
|
||||||
return ngram_counts
|
return ngram_counts
|
||||||
|
|
||||||
|
|
||||||
def compute_bleu(reference_corpus, translation_corpus, max_order=4,
|
def compute_bleu(reference_corpus, translation_corpus, max_order=4, smooth=True):
|
||||||
smooth=True):
|
"""Computes BLEU score of translated segments against one or more references.
|
||||||
"""Computes BLEU score of translated segments against one or more references.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
reference_corpus: list of lists of references for each translation. Each
|
reference_corpus: list of lists of references for each translation. Each
|
||||||
reference should be tokenized into a list of tokens.
|
reference should be tokenized into a list of tokens.
|
||||||
translation_corpus: list of translations to score. Each translation
|
translation_corpus: list of translations to score. Each translation
|
||||||
should be tokenized into a list of tokens.
|
should be tokenized into a list of tokens.
|
||||||
max_order: Maximum n-gram order to use when computing BLEU score.
|
max_order: Maximum n-gram order to use when computing BLEU score.
|
||||||
smooth: Whether or not to apply Lin et al. 2004 smoothing.
|
smooth: Whether or not to apply Lin et al. 2004 smoothing.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
|
3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
|
||||||
precisions and brevity penalty.
|
precisions and brevity penalty.
|
||||||
"""
|
"""
|
||||||
matches_by_order = [0] * max_order
|
matches_by_order = [0] * max_order
|
||||||
possible_matches_by_order = [0] * max_order
|
possible_matches_by_order = [0] * max_order
|
||||||
reference_length = 0
|
reference_length = 0
|
||||||
translation_length = 0
|
translation_length = 0
|
||||||
for (references, translation) in zip(reference_corpus,
|
for (references, translation) in zip(reference_corpus, translation_corpus):
|
||||||
translation_corpus):
|
reference_length += min(len(r) for r in references)
|
||||||
reference_length += min(len(r) for r in references)
|
translation_length += len(translation)
|
||||||
translation_length += len(translation)
|
|
||||||
|
|
||||||
merged_ref_ngram_counts = collections.Counter()
|
merged_ref_ngram_counts = collections.Counter()
|
||||||
for reference in references:
|
for reference in references:
|
||||||
merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
|
merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
|
||||||
translation_ngram_counts = _get_ngrams(translation, max_order)
|
translation_ngram_counts = _get_ngrams(translation, max_order)
|
||||||
overlap = translation_ngram_counts & merged_ref_ngram_counts
|
overlap = translation_ngram_counts & merged_ref_ngram_counts
|
||||||
for ngram in overlap:
|
for ngram in overlap:
|
||||||
matches_by_order[len(ngram)-1] += overlap[ngram]
|
matches_by_order[len(ngram) - 1] += overlap[ngram]
|
||||||
for order in range(1, max_order+1):
|
for order in range(1, max_order + 1):
|
||||||
possible_matches = len(translation) - order + 1
|
possible_matches = len(translation) - order + 1
|
||||||
if possible_matches > 0:
|
if possible_matches > 0:
|
||||||
possible_matches_by_order[order-1] += possible_matches
|
possible_matches_by_order[order - 1] += possible_matches
|
||||||
|
|
||||||
precisions = [0] * max_order
|
precisions = [0] * max_order
|
||||||
for i in range(0, max_order):
|
for i in range(0, max_order):
|
||||||
if smooth:
|
if smooth:
|
||||||
precisions[i] = ((matches_by_order[i] + 1.) /
|
precisions[i] = (matches_by_order[i] + 1.0) / (
|
||||||
(possible_matches_by_order[i] + 1.))
|
possible_matches_by_order[i] + 1.0
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if possible_matches_by_order[i] > 0:
|
||||||
|
precisions[i] = (
|
||||||
|
float(matches_by_order[i]) / possible_matches_by_order[i]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
precisions[i] = 0.0
|
||||||
|
|
||||||
|
if min(precisions) > 0:
|
||||||
|
p_log_sum = sum((1.0 / max_order) * math.log(p) for p in precisions)
|
||||||
|
geo_mean = math.exp(p_log_sum)
|
||||||
else:
|
else:
|
||||||
if possible_matches_by_order[i] > 0:
|
geo_mean = 0
|
||||||
precisions[i] = (float(matches_by_order[i]) /
|
|
||||||
possible_matches_by_order[i])
|
|
||||||
else:
|
|
||||||
precisions[i] = 0.0
|
|
||||||
|
|
||||||
if min(precisions) > 0:
|
ratio = float(translation_length) / reference_length
|
||||||
p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
|
|
||||||
geo_mean = math.exp(p_log_sum)
|
|
||||||
else:
|
|
||||||
geo_mean = 0
|
|
||||||
|
|
||||||
ratio = float(translation_length) / reference_length
|
if ratio > 1.0:
|
||||||
|
bp = 1.0
|
||||||
|
else:
|
||||||
|
bp = math.exp(1 - 1.0 / ratio)
|
||||||
|
bleu = geo_mean * bp
|
||||||
|
bleu_score_dict = {
|
||||||
|
"bleu": bleu,
|
||||||
|
"precision": precisions,
|
||||||
|
"bp": bp,
|
||||||
|
"ratio": ratio,
|
||||||
|
"trans_len": translation_length,
|
||||||
|
"ref_len": reference_length,
|
||||||
|
}
|
||||||
|
return bleu_score_dict # (bleu, precisions, bp, ratio, translation_length, reference_length)
|
||||||
|
|
||||||
if ratio > 1.0:
|
|
||||||
bp = 1.
|
|
||||||
else:
|
|
||||||
bp = math.exp(1 - 1. / ratio)
|
|
||||||
bleu = geo_mean * bp
|
|
||||||
print(geo_mean)
|
|
||||||
bleu_score_dict = {"bleu":bleu,"precision":precisions,"bp":bp,"ratio":ratio,"trans_len":translation_length,"ref_len":reference_length}
|
|
||||||
return bleu_score_dict#(bleu, precisions, bp, ratio, translation_length, reference_length)
|
|
||||||
|
|
||||||
def bleu_test_case():
|
def bleu_test_case():
|
||||||
"""A simple functionality test case to evaluate BLEU"""
|
"""A simple functionality test case to evaluate BLEU"""
|
||||||
generated = [[["a","=","b","\n","y","=","a","+","1"]]]
|
generated = [[["a", "=", "b", "\n", "y", "=", "a", "+", "1"]]]
|
||||||
reference = [["a","=","b","\n","print","a"]]
|
reference = [["a", "=", "b", "\n", "print", "a"]]
|
||||||
score_dict = compute_bleu(generated,reference,smooth=False)
|
score_dict = compute_bleu(generated, reference, smooth=False)
|
||||||
return score_dict
|
return score_dict
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
score_dict = bleu_test_case()
|
score_dict = bleu_test_case()
|
||||||
print(score_dict)
|
print(score_dict)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from metrics.bleu import compute_bleu
|
from metrics.bleu import compute_bleu
|
||||||
|
|
||||||
|
|
||||||
def compute_exact_match(references,generated)->float:
|
def compute_exact_match(references, generated) -> float:
|
||||||
"""
|
"""
|
||||||
Computes Exact Match Accuracy.
|
Computes Exact Match Accuracy.
|
||||||
args:
|
args:
|
||||||
@ -12,15 +12,19 @@ def compute_exact_match(references,generated)->float:
|
|||||||
returns:
|
returns:
|
||||||
exact_match_accuracy : Float
|
exact_match_accuracy : Float
|
||||||
"""
|
"""
|
||||||
assert(len(references[0])==len(generated),"Number of Samples should be equal in References and Synthesized Outputs..")
|
assert (
|
||||||
|
len(references[0]) == len(generated),
|
||||||
|
"Number of Samples should be equal in References and Synthesized Outputs..",
|
||||||
|
)
|
||||||
exact_match_count = 0.0
|
exact_match_count = 0.0
|
||||||
for gen,ref in zip(generated, references[0]):
|
for gen, ref in zip(generated, references[0]):
|
||||||
if gen == ref:
|
if gen == ref:
|
||||||
exact_match_count += 1
|
exact_match_count += 1
|
||||||
exact_match_acc = exact_match_count/len(generated)
|
exact_match_acc = exact_match_count / len(generated)
|
||||||
return exact_match_acc
|
return exact_match_acc
|
||||||
|
|
||||||
def compute_metrics(references,generated) -> dict:
|
|
||||||
|
def compute_metrics(references, generated) -> dict:
|
||||||
"""
|
"""
|
||||||
Calculates various metrics and returns the calculated dict of these matrics.
|
Calculates various metrics and returns the calculated dict of these matrics.
|
||||||
args:
|
args:
|
||||||
@ -31,8 +35,12 @@ def compute_metrics(references,generated) -> dict:
|
|||||||
returns:
|
returns:
|
||||||
A dicitonary with different metrics intact.
|
A dicitonary with different metrics intact.
|
||||||
"""
|
"""
|
||||||
metrics_dict = {} #Update as in new metrics are added over here.
|
metrics_dict = {
|
||||||
metrics_dict["smoothed_bleu_4"] = compute_bleu(references,generated,smooth=True)
|
"smoothed_bleu_4": None,
|
||||||
metrics_dict["bleu_4"] = compute_bleu(references,generated,smooth=False)
|
"blue_4": None,
|
||||||
metrics_dict["exact_match_acc"] = compute_exact_match(references,generated)
|
"exact_match_acc": None,
|
||||||
return metrics_dict
|
} # Update as in new metrics are computed.
|
||||||
|
metrics_dict["smoothed_bleu_4"] = compute_bleu(references, generated, smooth=True)
|
||||||
|
metrics_dict["bleu_4"] = compute_bleu(references, generated, smooth=False)
|
||||||
|
metrics_dict["exact_match_acc"] = compute_exact_match(references, generated)
|
||||||
|
return metrics_dict
|
||||||
|
Loading…
Reference in New Issue
Block a user