mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2025-01-01 00:12:27 +03:00
45 lines
1.0 KiB
Python
Executable File
45 lines
1.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
#
|
|
#
|
|
#
|
|
|
|
import heapq
|
|
import math
|
|
import random
|
|
import sys
|
|
|
|
from bleu import BleuScorer
|
|
|
|
|
|
class Sample:
|
|
"""A pair of hypotheses, and their score difference"""
|
|
def __init__(self,hyp1,hyp2):
|
|
self.hyp1 = hyp1
|
|
self.hyp2 = hyp2
|
|
self.diff = abs(hyp1.score-hyp2.score)
|
|
|
|
def __cmp__(self,other):
|
|
return cmp(self.diff,other.diff)
|
|
|
|
class HopkinsMaySampler:
|
|
"""Implements Hopkins & May sampling"""
|
|
def __init__(self):
|
|
self.ncandidates = 5000 # Gamma in Hopkins and May
|
|
self.nsamples = 50 # Xi in Hopkins and May
|
|
self.min_diff = 0.05 # Minimum scoring difference
|
|
|
|
def sample(self,nbest):
|
|
samples = []
|
|
for i in xrange(self.ncandidates):
|
|
hyp1 = random.choice(nbest.hyps)
|
|
hyp2 = random.choice(nbest.hyps)
|
|
sample = Sample(hyp1,hyp2)
|
|
if sample.diff < self.min_diff: continue
|
|
# maintain nsamples biggest samples
|
|
heapq.heappush(samples,sample)
|
|
while len(samples) > self.nsamples:
|
|
heapq.heappop(samples)
|
|
return samples
|
|
|