mosesdecoder/contrib/promix/sampler.py
2013-02-21 17:59:53 +00:00

45 lines
1.0 KiB
Python
Executable File

#!/usr/bin/env python
#
#
#
import heapq
import math
import random
import sys
from bleu import BleuScorer
class Sample:
"""A pair of hypotheses, and their score difference"""
def __init__(self,hyp1,hyp2):
self.hyp1 = hyp1
self.hyp2 = hyp2
self.diff = abs(hyp1.score-hyp2.score)
def __cmp__(self,other):
return cmp(self.diff,other.diff)
class HopkinsMaySampler:
"""Implements Hopkins & May sampling"""
def __init__(self):
self.ncandidates = 5000 # Gamma in Hopkins and May
self.nsamples = 50 # Xi in Hopkins and May
self.min_diff = 0.05 # Minimum scoring difference
def sample(self,nbest):
samples = []
for i in xrange(self.ncandidates):
hyp1 = random.choice(nbest.hyps)
hyp2 = random.choice(nbest.hyps)
sample = Sample(hyp1,hyp2)
if sample.diff < self.min_diff: continue
# maintain nsamples biggest samples
heapq.heappush(samples,sample)
while len(samples) > self.nsamples:
heapq.heappop(samples)
return samples