Formatting

2024-10-26 09:17:45 +03:00 · 2021-07-04 14:02:29 +05:30 · 2021-07-04 14:02:29 +05:30 · 1cff3f903e
commit 1cff3f903e
parent 4b5cd33de5
2 changed files with 102 additions and 86 deletions
--- a/metrics/bleu.py
+++ b/metrics/bleu.py
@ -28,98 +28,106 @@ import math
 def _get_ngrams(segment, max_order):
-  """Extracts all n-grams upto a given maximum order from an input segment.
+    """Extracts all n-grams upto a given maximum order from an input segment.
-  Args:
+    Args:
-    segment: text segment from which n-grams will be extracted.
+      segment: text segment from which n-grams will be extracted.
-    max_order: maximum length in tokens of the n-grams returned by this
+      max_order: maximum length in tokens of the n-grams returned by this
-        methods.
+          methods.
-  Returns:
+    Returns:
-    The Counter containing all n-grams upto max_order in segment
+      The Counter containing all n-grams upto max_order in segment
-    with a count of how many times each n-gram occurred.
+      with a count of how many times each n-gram occurred.
-  """
+    """
-  ngram_counts = collections.Counter()
+    ngram_counts = collections.Counter()
-  for order in range(1, max_order + 1):
+    for order in range(1, max_order + 1):
-    for i in range(0, len(segment) - order + 1):
+        for i in range(0, len(segment) - order + 1):
-      ngram = tuple(segment[i:i+order])
+            ngram = tuple(segment[i : i + order])
-      ngram_counts[ngram] += 1
+            ngram_counts[ngram] += 1
-  return ngram_counts
+    return ngram_counts
-def compute_bleu(reference_corpus, translation_corpus, max_order=4,
+def compute_bleu(reference_corpus, translation_corpus, max_order=4, smooth=True):
-                 smooth=True):
+    """Computes BLEU score of translated segments against one or more references.
  """Computes BLEU score of translated segments against one or more references.
-  Args:
+    Args:
-    reference_corpus: list of lists of references for each translation. Each
+      reference_corpus: list of lists of references for each translation. Each
-        reference should be tokenized into a list of tokens.
+          reference should be tokenized into a list of tokens.
-    translation_corpus: list of translations to score. Each translation
+      translation_corpus: list of translations to score. Each translation
-        should be tokenized into a list of tokens.
+          should be tokenized into a list of tokens.
-    max_order: Maximum n-gram order to use when computing BLEU score.
+      max_order: Maximum n-gram order to use when computing BLEU score.
-    smooth: Whether or not to apply Lin et al. 2004 smoothing.
+      smooth: Whether or not to apply Lin et al. 2004 smoothing.
-  Returns:
+    Returns:
-    3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
+      3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
-    precisions and brevity penalty.
+      precisions and brevity penalty.
-  """
+    """
-  matches_by_order = [0] * max_order
+    matches_by_order = [0] * max_order
-  possible_matches_by_order = [0] * max_order
+    possible_matches_by_order = [0] * max_order
-  reference_length = 0
+    reference_length = 0
-  translation_length = 0
+    translation_length = 0
-  for (references, translation) in zip(reference_corpus,
+    for (references, translation) in zip(reference_corpus, translation_corpus):
-                                       translation_corpus):
+        reference_length += min(len(r) for r in references)
-    reference_length += min(len(r) for r in references)
+        translation_length += len(translation)
    translation_length += len(translation)
-    merged_ref_ngram_counts = collections.Counter()
+        merged_ref_ngram_counts = collections.Counter()
-    for reference in references:
+        for reference in references:
-      merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
+            merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
-    translation_ngram_counts = _get_ngrams(translation, max_order)
+        translation_ngram_counts = _get_ngrams(translation, max_order)
-    overlap = translation_ngram_counts & merged_ref_ngram_counts
+        overlap = translation_ngram_counts & merged_ref_ngram_counts
-    for ngram in overlap:
+        for ngram in overlap:
-      matches_by_order[len(ngram)-1] += overlap[ngram]
+            matches_by_order[len(ngram) - 1] += overlap[ngram]
-    for order in range(1, max_order+1):
+        for order in range(1, max_order + 1):
-      possible_matches = len(translation) - order + 1
+            possible_matches = len(translation) - order + 1
-      if possible_matches > 0:
+            if possible_matches > 0:
-        possible_matches_by_order[order-1] += possible_matches
+                possible_matches_by_order[order - 1] += possible_matches
-  precisions = [0] * max_order
+    precisions = [0] * max_order
-  for i in range(0, max_order):
+    for i in range(0, max_order):
-    if smooth:
+        if smooth:
-      precisions[i] = ((matches_by_order[i] + 1.) /
+            precisions[i] = (matches_by_order[i] + 1.0) / (
-                       (possible_matches_by_order[i] + 1.))
+                possible_matches_by_order[i] + 1.0
            )
        else:
            if possible_matches_by_order[i] > 0:
                precisions[i] = (
                    float(matches_by_order[i]) / possible_matches_by_order[i]
                )
            else:
                precisions[i] = 0.0
    if min(precisions) > 0:
        p_log_sum = sum((1.0 / max_order) * math.log(p) for p in precisions)
        geo_mean = math.exp(p_log_sum)
    else:
-      if possible_matches_by_order[i] > 0:
+        geo_mean = 0
        precisions[i] = (float(matches_by_order[i]) /
                         possible_matches_by_order[i])
      else:
        precisions[i] = 0.0
-  if min(precisions) > 0:
+    ratio = float(translation_length) / reference_length
    p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
    geo_mean = math.exp(p_log_sum)
  else:
    geo_mean = 0
-  ratio = float(translation_length) / reference_length
+    if ratio > 1.0:
        bp = 1.0
    else:
        bp = math.exp(1 - 1.0 / ratio)
    bleu = geo_mean * bp
    bleu_score_dict = {
        "bleu": bleu,
        "precision": precisions,
        "bp": bp,
        "ratio": ratio,
        "trans_len": translation_length,
        "ref_len": reference_length,
    }
    return bleu_score_dict  # (bleu, precisions, bp, ratio, translation_length, reference_length)
  if ratio > 1.0:
    bp = 1.
  else:
    bp = math.exp(1 - 1. / ratio)
  bleu = geo_mean * bp
  print(geo_mean)
  bleu_score_dict = {"bleu":bleu,"precision":precisions,"bp":bp,"ratio":ratio,"trans_len":translation_length,"ref_len":reference_length}
  return bleu_score_dict#(bleu, precisions, bp, ratio, translation_length, reference_length)
 def bleu_test_case():
    """A simple functionality test case to evaluate BLEU"""
-    generated = [[["a","=","b","\n","y","=","a","+","1"]]]
+    generated = [[["a", "=", "b", "\n", "y", "=", "a", "+", "1"]]]
-    reference = [["a","=","b","\n","print","a"]]
+    reference = [["a", "=", "b", "\n", "print", "a"]]
-    score_dict = compute_bleu(generated,reference,smooth=False)
+    score_dict = compute_bleu(generated, reference, smooth=False)
    return score_dict
 if __name__ == "__main__":
    score_dict = bleu_test_case()
-    print(score_dict)
+    print(score_dict)
--- a/metrics/extrinsic_eval.py
+++ b/metrics/extrinsic_eval.py
@ -1,7 +1,7 @@
 from metrics.bleu import compute_bleu
-def compute_exact_match(references,generated)->float:
+def compute_exact_match(references, generated) -> float:
    """
    Computes Exact Match Accuracy.
    args:
@ -12,15 +12,19 @@ def compute_exact_match(references,generated)->float:
    returns:
        exact_match_accuracy : Float
    """
-    assert(len(references[0])==len(generated),"Number of Samples should be equal in References and Synthesized Outputs..")
+    assert (
        len(references[0]) == len(generated),
        "Number of Samples should be equal in References and Synthesized Outputs..",
    )
    exact_match_count = 0.0
-    for gen,ref in zip(generated, references[0]):
+    for gen, ref in zip(generated, references[0]):
        if gen == ref:
            exact_match_count += 1
-    exact_match_acc = exact_match_count/len(generated)
+    exact_match_acc = exact_match_count / len(generated)
    return exact_match_acc
-def compute_metrics(references,generated) -> dict:
+
 def compute_metrics(references, generated) -> dict:
    """
    Calculates various metrics and returns the calculated dict of these matrics.
    args:
@ -31,8 +35,12 @@ def compute_metrics(references,generated) -> dict:
    returns:
        A dicitonary with different metrics intact.
    """
-    metrics_dict = {} #Update as in new metrics are added over here.
+    metrics_dict = {
-    metrics_dict["smoothed_bleu_4"] = compute_bleu(references,generated,smooth=True)
+        "smoothed_bleu_4": None,
-    metrics_dict["bleu_4"] = compute_bleu(references,generated,smooth=False)
+        "blue_4": None,
-    metrics_dict["exact_match_acc"] = compute_exact_match(references,generated)
+        "exact_match_acc": None,
-    return metrics_dict
+    }  # Update as in new metrics are computed.
    metrics_dict["smoothed_bleu_4"] = compute_bleu(references, generated, smooth=True)
    metrics_dict["bleu_4"] = compute_bleu(references, generated, smooth=False)
    metrics_dict["exact_match_acc"] = compute_exact_match(references, generated)
    return metrics_dict