mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
add DIMwid /Robin Kurtz
This commit is contained in:
parent
72981abf20
commit
233062a468
290
contrib/DIMwid/DIMputs.py
Normal file
290
contrib/DIMwid/DIMputs.py
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class DataInput():
|
||||||
|
def __init__(self, file_name):
|
||||||
|
self.file = open(file_name, "r")
|
||||||
|
self.sentences = None
|
||||||
|
|
||||||
|
|
||||||
|
def read_phrase(self):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
span_reg = re.compile("\|[0-9]+-[0-9]+\|")
|
||||||
|
previous = ""
|
||||||
|
for line in self.file:
|
||||||
|
sentence = Single()
|
||||||
|
for word in line.split():
|
||||||
|
if span_reg.match(word):
|
||||||
|
sentence.spans[tuple([int(i) for i in word.strip("|").split("-")])] = previous.strip()
|
||||||
|
previous = " "
|
||||||
|
else:
|
||||||
|
previous += word + " "
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence.number = len(self.sentences)
|
||||||
|
|
||||||
|
def read_syntax(self):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
for line in self.file:
|
||||||
|
if int(line.split()[2]) != number:
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence = Single()
|
||||||
|
sentence.number = int(line.split()[2])
|
||||||
|
number = sentence.number
|
||||||
|
sentence.spans[tuple([int(i) for i in line.split()[3].strip(":[]").split("..")])] \
|
||||||
|
= line.strip()
|
||||||
|
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
# = tuple([line.split(":")[1], line.split(":")[2], line.split(":")[3]])
|
||||||
|
|
||||||
|
|
||||||
|
def read_syntax_cubes(self, cell_limit):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
new_item = False
|
||||||
|
for line in self.file:
|
||||||
|
if line.startswith("Chart Cell"):
|
||||||
|
pass # we dont care for those lines
|
||||||
|
elif line.startswith("---------"):
|
||||||
|
new_item = True
|
||||||
|
elif line.startswith("Trans Opt") and new_item is True:
|
||||||
|
new_item = False
|
||||||
|
if int(line.split()[2]) != number:
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence = Multiple()
|
||||||
|
sentence.number = int(line.split()[2])
|
||||||
|
number = sentence.number
|
||||||
|
span = tuple([int(i) for i in line.split()[3].strip(":[]").split("..")])
|
||||||
|
if len(sentence.spans[span]) < cell_limit:
|
||||||
|
sentence.spans[span].append(line.strip())
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
def read_phrase_stack_flag(self, cell_limit):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
for line in self.file:
|
||||||
|
if len(line.split()) < 6:
|
||||||
|
pass
|
||||||
|
# elif re.match("recombined=[0-9]+", line.split()[6]):
|
||||||
|
# pass
|
||||||
|
else:
|
||||||
|
if int(line.split()[0]) != number:
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence = Multiple()
|
||||||
|
sentence.number = int(line.split()[0])
|
||||||
|
number = sentence.number
|
||||||
|
# span = tuple([int(i) for i in line.split()[8].split("=")[1].split("-")])
|
||||||
|
span = re.search(r"covered=([0-9]+\-[0-9]+)", line).expand("\g<1>")
|
||||||
|
# print span.expand("\g<1>")
|
||||||
|
span = tuple([int(i) for i in span.split("-")])
|
||||||
|
if len(sentence.spans[span]) < cell_limit:
|
||||||
|
sentence.spans[span].append(line.strip())
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
def read_phrase_stack_verbose(self, cell_limit):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
span_input = False
|
||||||
|
for line in self.file:
|
||||||
|
if line.startswith("Translating: "):
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
number += 1
|
||||||
|
sentence = Multiple()
|
||||||
|
sentence.number = number
|
||||||
|
else:
|
||||||
|
if re.match("\[[A-Z,a-z,\ ]+;\ [0-9]+-[0-9]+\]", line):
|
||||||
|
span = tuple([int(i) for i in line.split(";")[1].strip().strip("]").split("-")])
|
||||||
|
sentence.spans[span].append(line.strip())
|
||||||
|
span_input = True
|
||||||
|
# print line,
|
||||||
|
elif span_input is True:
|
||||||
|
if line.strip() == "":
|
||||||
|
span_input = False
|
||||||
|
# print "X"
|
||||||
|
else:
|
||||||
|
if len(sentence.spans[span]) < cell_limit:
|
||||||
|
sentence.spans[span].append(line.strip())
|
||||||
|
# print line,
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def read_syntax_cube_flag(self, cell_limit):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
for line in self.file:
|
||||||
|
if len(line.split()) < 6:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if int(line.split()[0]) != number:
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence = Multiple() #
|
||||||
|
sentence.number = int(line.split()[0])
|
||||||
|
number = sentence.number
|
||||||
|
span = re.search(r"\[([0-9]+)\.\.([0-9]+)\]", line).expand("\g<1> \g<2>")
|
||||||
|
span = tuple([int(i) for i in span.split()])
|
||||||
|
if len(sentence.spans[span]) < cell_limit:
|
||||||
|
sentence.spans[span].append(line.strip())
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
|
||||||
|
def read_mbot(self, cell_limit):
|
||||||
|
self.sentences = []
|
||||||
|
sentence = None
|
||||||
|
number = -1
|
||||||
|
hypo = False
|
||||||
|
rule = False
|
||||||
|
popping = False
|
||||||
|
target = ""
|
||||||
|
source = ""
|
||||||
|
source_parent = ""
|
||||||
|
target_parent = ""
|
||||||
|
alignment = ""
|
||||||
|
for line in self.file:
|
||||||
|
if line.startswith("Translating:"):
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
sentence = Multiple()
|
||||||
|
sentence.number = number + 1
|
||||||
|
number = sentence.number
|
||||||
|
elif line.startswith("POPPING"):
|
||||||
|
popping = True
|
||||||
|
elif popping is True:
|
||||||
|
popping = False
|
||||||
|
span = tuple([int(i) for i in line.split()[1].strip("[").split("]")[0].split("..")])
|
||||||
|
hypo = True
|
||||||
|
elif hypo is True:
|
||||||
|
if line.startswith("Target Phrases"):
|
||||||
|
target = line.split(":", 1)[1].strip()
|
||||||
|
|
||||||
|
elif line.startswith("Alignment Info"):
|
||||||
|
alignment = line.split(":", 1)[1].strip()
|
||||||
|
if alignment == "":
|
||||||
|
alignment = "(1)"
|
||||||
|
|
||||||
|
elif line.startswith("Source Phrase"):
|
||||||
|
source = line.split(":", 1)[1].strip()
|
||||||
|
|
||||||
|
elif line.startswith("Source Left-hand-side"):
|
||||||
|
source_parent = line.split(":", 1)[1].strip()
|
||||||
|
|
||||||
|
elif line.startswith("Target Left-hand-side"):
|
||||||
|
target_parent = line.split(":", 1)[1].strip()
|
||||||
|
|
||||||
|
# Input stored: now begin translation into rule-format
|
||||||
|
alignment = re.sub(r"\([0-9]+\)", "||", alignment)
|
||||||
|
align_blocks = alignment.split("||")[:-1]
|
||||||
|
target = re.sub(r"\([0-9]+\)", "||", target)
|
||||||
|
target = [x.split() for x in target.split("||")][:-1]
|
||||||
|
source = source.split()
|
||||||
|
|
||||||
|
for i in range(len(source)):
|
||||||
|
if source[i].isupper():
|
||||||
|
source[i] = "[" + source[i] + "]"
|
||||||
|
for k in range(len(align_blocks)):
|
||||||
|
align_pairs = [tuple([int(y) for y in x.split("-")]) for x in align_blocks[k].split()]
|
||||||
|
for j in filter(lambda x: x[0] == i, align_pairs):
|
||||||
|
source[i] = source[i] + "[" + target[k][j[1]] + "]"
|
||||||
|
|
||||||
|
for i in range(len(target)):
|
||||||
|
for j in range(len(target[i])):
|
||||||
|
align_pairs = [tuple([int(y) for y in x.split("-")]) for x in align_blocks[i].split()]
|
||||||
|
for k in filter(lambda x: x[1] == j, align_pairs):
|
||||||
|
target[i][j] = source[k[0]].split("]")[0] + "][" + target[i][j] + "]"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
target = " || ".join([" ".join(x) for x in target]) + " ||"
|
||||||
|
|
||||||
|
source = " ".join(source)
|
||||||
|
source = source + " [" + source_parent + "]"
|
||||||
|
|
||||||
|
tp = re.sub(r"\([0-9]+\)", "", target_parent).split()
|
||||||
|
for i in tp:
|
||||||
|
target = target.replace("||", " [" + i + "] !!", 1)
|
||||||
|
target = target.replace("!!", "||")
|
||||||
|
|
||||||
|
rule = False
|
||||||
|
search_pattern = "||| " + source + " ||| " + target + "| --- ||| " + alignment + "|"
|
||||||
|
|
||||||
|
sentence.spans[span].append(search_pattern)
|
||||||
|
# print search_pattern, span
|
||||||
|
if len(sentence.spans[span]) < cell_limit:
|
||||||
|
sentence.spans[span].append(search_pattern)
|
||||||
|
else:
|
||||||
|
pass
|
||||||
|
if sentence is not None:
|
||||||
|
sentence.set_length()
|
||||||
|
self.sentences.append(sentence)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Single():
|
||||||
|
def __init__(self):
|
||||||
|
self.number = None
|
||||||
|
self.spans = {}
|
||||||
|
self.length = None
|
||||||
|
|
||||||
|
def set_length(self):
|
||||||
|
self.length = max([x[1] for x in self.spans.keys()])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
number = str(self.number)
|
||||||
|
length = str(self.length)
|
||||||
|
spans = "\n"
|
||||||
|
for i in self.spans.keys():
|
||||||
|
spans += str(i) + " - " + str(self.spans[i]) + "\n"
|
||||||
|
return str((number, length, spans))
|
||||||
|
|
||||||
|
class Multiple():
|
||||||
|
def __init__(self):
|
||||||
|
self.number = None
|
||||||
|
self.spans = collections.defaultdict(list)
|
||||||
|
self.length = None
|
||||||
|
|
||||||
|
def set_length(self):
|
||||||
|
self.length = max([x[1] for x in self.spans.keys()])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
number = str(self.number)
|
||||||
|
length = str(self.length)
|
||||||
|
spans = "\n"
|
||||||
|
for i in self.spans.keys():
|
||||||
|
spans += str(i) + " - " + str(self.spans[i]) + "\n"
|
||||||
|
return str((number, length, spans))
|
||||||
|
|
||||||
|
|
||||||
|
|
381
contrib/DIMwid/DIMterface.py
Normal file
381
contrib/DIMwid/DIMterface.py
Normal file
@ -0,0 +1,381 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from PyQt4 import QtCore, QtGui
|
||||||
|
|
||||||
|
import DIMputs as my_DI
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class MainWindow(QtGui.QWidget):
|
||||||
|
updateSignal = QtCore.pyqtSignal()
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
|
||||||
|
|
||||||
|
self.path = ""
|
||||||
|
self.cur_rein_num = 0
|
||||||
|
self.data = None
|
||||||
|
self.format = ""
|
||||||
|
self.cell_limit = float("inf")
|
||||||
|
|
||||||
|
super(MainWindow, self).__init__(parent)
|
||||||
|
|
||||||
|
# upper buttons
|
||||||
|
pathLabel = QtGui.QLabel("Path:")
|
||||||
|
self.pathLabel = QtGui.QLabel(self.path)
|
||||||
|
self.pathLabel.setFrameStyle(QtGui.QFrame.StyledPanel |
|
||||||
|
QtGui.QFrame.Sunken)
|
||||||
|
self.pathLabel.setToolTip("Current File")
|
||||||
|
self.pathButton = QtGui.QPushButton("P&ath...")
|
||||||
|
self.pathButton.setToolTip("Set the item you want to inspect")
|
||||||
|
self.connect(self.pathButton, QtCore.SIGNAL("clicked()"), self.setPath)
|
||||||
|
|
||||||
|
|
||||||
|
# cell limit label and text field
|
||||||
|
cell_limit_label = QtGui.QLabel("Cell Limit:")
|
||||||
|
self.cell_limit_chooser = QtGui.QSpinBox()
|
||||||
|
self.cell_limit_chooser.setMaximum(99999)
|
||||||
|
cell_limit_label.setToolTip("Limits the number of elements per cell")
|
||||||
|
self.cell_limit_chooser.setToolTip("Set to zero to show all elements")
|
||||||
|
|
||||||
|
# format drop down menu
|
||||||
|
self.format_drop = QtGui.QToolButton(self)
|
||||||
|
self.format_drop.setPopupMode(QtGui.QToolButton.MenuButtonPopup)
|
||||||
|
self.format_drop.setMenu(QtGui.QMenu(self.format_drop))
|
||||||
|
self.format_drop.setText("Format")
|
||||||
|
|
||||||
|
self.format_syntax = QtGui.QPushButton("Syntax")
|
||||||
|
self.format_phrase = QtGui.QPushButton("Phrase")
|
||||||
|
self.format_syntaxCube = QtGui.QPushButton("Syntax Cube (-Tall flag)")
|
||||||
|
self.format_phraseStackFlag = QtGui.QPushButton("Phrase Stack (search-graph)")
|
||||||
|
self.format_phraseStackVerbose = QtGui.QPushButton("Phrase Stack (verbose)")
|
||||||
|
self.format_syntaxCubeFlag = QtGui.QPushButton("Syntax Cube (search-graph)")
|
||||||
|
self.format_mbot = QtGui.QPushButton("MBOT")
|
||||||
|
|
||||||
|
|
||||||
|
format_action_syntax = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_syntax.setDefaultWidget(self.format_syntax)
|
||||||
|
|
||||||
|
format_action_phrase = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_phrase.setDefaultWidget(self.format_phrase)
|
||||||
|
|
||||||
|
format_action_syntaxCube = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_syntaxCube.setDefaultWidget(self.format_syntaxCube)
|
||||||
|
|
||||||
|
format_action_phraseStackFlag = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_phraseStackFlag.setDefaultWidget(self.format_phraseStackFlag)
|
||||||
|
|
||||||
|
format_action_phraseStackVerbose = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_phraseStackVerbose.setDefaultWidget(self.format_phraseStackVerbose)
|
||||||
|
|
||||||
|
format_action_syntaxCubeFlag = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_syntaxCubeFlag.setDefaultWidget(self.format_syntaxCubeFlag)
|
||||||
|
|
||||||
|
format_action_mbot = QtGui.QWidgetAction(self.format_drop)
|
||||||
|
format_action_mbot.setDefaultWidget(self.format_mbot)
|
||||||
|
|
||||||
|
self.format_drop.menu().addAction(format_action_syntax)
|
||||||
|
self.format_drop.menu().addAction(format_action_phrase)
|
||||||
|
self.format_drop.menu().addAction(format_action_syntaxCube)
|
||||||
|
self.format_drop.menu().addAction(format_action_phraseStackFlag)
|
||||||
|
self.format_drop.menu().addAction(format_action_phraseStackVerbose)
|
||||||
|
self.format_drop.menu().addAction(format_action_syntaxCubeFlag)
|
||||||
|
self.format_drop.menu().addAction(format_action_mbot)
|
||||||
|
|
||||||
|
|
||||||
|
self.format_syntax.clicked.connect(self.set_format_syntax)
|
||||||
|
self.format_phrase.clicked.connect(self.set_format_phrase)
|
||||||
|
self.format_syntaxCube.clicked.connect(self.set_format_syntaxCube)
|
||||||
|
self.format_phraseStackFlag.clicked.connect(self.set_format_phraseStackFlag)
|
||||||
|
self.format_phraseStackVerbose.clicked.connect(self.set_format_phraseStackVerbose)
|
||||||
|
self.format_syntaxCubeFlag.clicked.connect(self.set_format_syntaxCubeFlag)
|
||||||
|
self.format_mbot.clicked.connect(self.set_format_mbot)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# table
|
||||||
|
self.table_widget = HoverTable(self)
|
||||||
|
self.w = [] # future popup window
|
||||||
|
# self.table_widget = QtGui.QTableWidget(self)
|
||||||
|
|
||||||
|
# lower buttons
|
||||||
|
self.buttonBox = QtGui.QDialogButtonBox()
|
||||||
|
self.sentence_spinbox = QtGui.QSpinBox(parent=self.buttonBox)
|
||||||
|
self.sentence_spinbox.setMaximum(999999)
|
||||||
|
|
||||||
|
self.goto_button = self.buttonBox.addButton(
|
||||||
|
"&GoTo", QtGui.QDialogButtonBox.ActionRole)
|
||||||
|
self.next_button = self.buttonBox.addButton(
|
||||||
|
"&Next", QtGui.QDialogButtonBox.ActionRole)
|
||||||
|
self.prev_button = self.buttonBox.addButton(
|
||||||
|
"&Prev", QtGui.QDialogButtonBox.ActionRole)
|
||||||
|
self.next_button.clicked.connect(self.next_parse)
|
||||||
|
self.prev_button.clicked.connect(self.prev_parse)
|
||||||
|
self.goto_button.clicked.connect(self.cur_parse)
|
||||||
|
self.quit_button = self.buttonBox.addButton(
|
||||||
|
"&Quit", QtGui.QDialogButtonBox.ActionRole)
|
||||||
|
self.quit_button.clicked.connect(
|
||||||
|
QtCore.QCoreApplication.instance().quit)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Disable navigation buttons until data is loaded: see setPath for reactivation
|
||||||
|
self.goto_button.setDisabled(True)
|
||||||
|
self.next_button.setDisabled(True)
|
||||||
|
self.prev_button.setDisabled(True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Layouting
|
||||||
|
|
||||||
|
layout = QtGui.QVBoxLayout()
|
||||||
|
|
||||||
|
topLayout = QtGui.QHBoxLayout()
|
||||||
|
topLayout.addWidget(self.format_drop)
|
||||||
|
topLayout.addWidget(cell_limit_label)
|
||||||
|
topLayout.addWidget(self.cell_limit_chooser)
|
||||||
|
self.cell_limit_chooser.valueChanged.connect(self.setCellLimit)
|
||||||
|
topLayout.addWidget(pathLabel)
|
||||||
|
topLayout.addWidget(self.pathLabel, 1)
|
||||||
|
topLayout.addWidget(self.pathButton)
|
||||||
|
|
||||||
|
bottomLayout = QtGui.QHBoxLayout()
|
||||||
|
bottomLayout.addWidget(self.buttonBox)
|
||||||
|
|
||||||
|
layout.addLayout(topLayout)
|
||||||
|
layout.addWidget(self.table_widget)
|
||||||
|
layout.addLayout(bottomLayout)
|
||||||
|
|
||||||
|
self.sentence_spinbox.valueChanged.connect(self.set_cur_rein_num)
|
||||||
|
|
||||||
|
self.setLayout(layout)
|
||||||
|
self.updateSignal.connect(self.update_table)
|
||||||
|
|
||||||
|
QtCore.QObject.connect(
|
||||||
|
self.table_widget,
|
||||||
|
QtCore.SIGNAL("cellDoubleClicked(int, int)"),
|
||||||
|
self.popup)
|
||||||
|
|
||||||
|
|
||||||
|
def closeEvent(self, *args, **kwargs):
|
||||||
|
# reimplementation of the close-event for closing down everything
|
||||||
|
# when the main window is closed
|
||||||
|
QtCore.QCoreApplication.quit()
|
||||||
|
return QtGui.QWidget.closeEvent(self, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def setCellLimit(self, value):
|
||||||
|
if value == 0:
|
||||||
|
value = float("inf")
|
||||||
|
self.cell_limit = value
|
||||||
|
|
||||||
|
|
||||||
|
def setPath(self):
|
||||||
|
path = QtGui.QFileDialog.getOpenFileName(self,
|
||||||
|
"Select File", self.pathLabel.text())
|
||||||
|
if path:
|
||||||
|
self.goto_button.setDisabled(False)
|
||||||
|
self.prev_button.setDisabled(False)
|
||||||
|
self.next_button.setDisabled(False)
|
||||||
|
self.pathLabel.setText(QtCore.QDir.toNativeSeparators(path))
|
||||||
|
self.path = unicode(path)
|
||||||
|
self.data = my_DI.DataInput(self.path)
|
||||||
|
try:
|
||||||
|
if self.format == "syntax":
|
||||||
|
self.data.read_syntax()
|
||||||
|
elif self.format == "phrase":
|
||||||
|
self.data.read_phrase()
|
||||||
|
elif self.format == "syntaxCube":
|
||||||
|
self.data.read_syntax_cubes(self.cell_limit)
|
||||||
|
elif self.format == "phraseStackFlag":
|
||||||
|
self.data.read_phrase_stack_flag(self.cell_limit)
|
||||||
|
elif self.format == "phraseStackVerbose":
|
||||||
|
self.data.read_phrase_stack_verbose(self.cell_limit)
|
||||||
|
elif self.format == "syntaxCubeFlag":
|
||||||
|
self.data.read_syntax_cube_flag(self.cell_limit)
|
||||||
|
elif self.format == "mbot":
|
||||||
|
self.data.read_mbot(self.cell_limit)
|
||||||
|
self.populate(0)
|
||||||
|
self.sentence_spinbox.setValue(0)
|
||||||
|
except (ValueError, IndexError) as exc:
|
||||||
|
self.error_dialog = QtGui.QDialog()
|
||||||
|
self.error_dialog.setModal(True)
|
||||||
|
layout = QtGui.QVBoxLayout()
|
||||||
|
text = QtGui.QLabel(
|
||||||
|
"""Something went wrong when choosing your input format/file
|
||||||
|
\n""")
|
||||||
|
button = QtGui.QPushButton("Ok")
|
||||||
|
button.clicked.connect(self.error_dialog.close)
|
||||||
|
layout.addWidget(text)
|
||||||
|
layout.addWidget(button)
|
||||||
|
self.error_dialog.setLayout(layout)
|
||||||
|
self.error_dialog.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def next_parse(self):
|
||||||
|
self.cur_rein_num += 1
|
||||||
|
if self.cur_rein_num < 0:
|
||||||
|
self.cur_rein_num = len(self.data.sentences) + self.cur_rein_num
|
||||||
|
if self.cur_rein_num >= len(self.data.sentences):
|
||||||
|
self.cur_rein_num = 0
|
||||||
|
self.sentence_spinbox.setValue(self.cur_rein_num)
|
||||||
|
self.populate(self.cur_rein_num)
|
||||||
|
|
||||||
|
def prev_parse(self):
|
||||||
|
self.cur_rein_num -= 1
|
||||||
|
if self.cur_rein_num < 0:
|
||||||
|
self.cur_rein_num = len(self.data.sentences) + self.cur_rein_num
|
||||||
|
if self.cur_rein_num >= len(self.data.sentences):
|
||||||
|
self.cur_rein_num = 0
|
||||||
|
self.sentence_spinbox.setValue(self.cur_rein_num)
|
||||||
|
self.populate(self.cur_rein_num)
|
||||||
|
|
||||||
|
def cur_parse(self):
|
||||||
|
if self.cur_rein_num >= len(self.data.sentences):
|
||||||
|
self.cur_rein_num = 0
|
||||||
|
self.sentence_spinbox.setValue(self.cur_rein_num)
|
||||||
|
self.populate(self.cur_rein_num)
|
||||||
|
|
||||||
|
|
||||||
|
def set_cur_rein_num(self, value):
|
||||||
|
self.cur_rein_num = value # self.sentence_spinbox.value()
|
||||||
|
|
||||||
|
def populate(self, cur_rein_num):
|
||||||
|
cur_sent = self.data.sentences[cur_rein_num]
|
||||||
|
nrows, ncols = cur_sent.length + 1, cur_sent.length + 1
|
||||||
|
nrows, ncols = ncols, nrows # switcher
|
||||||
|
self.table_widget.setSortingEnabled(False)
|
||||||
|
self.table_widget.setRowCount(nrows)
|
||||||
|
self.table_widget.setColumnCount(ncols)
|
||||||
|
# for starting the numbering of the table at zero as the spans
|
||||||
|
self.table_widget.setHorizontalHeaderLabels([str(x) for x in range(ncols)])
|
||||||
|
self.table_widget.setVerticalHeaderLabels([str(x) for x in range(nrows)])
|
||||||
|
for i in range(nrows):
|
||||||
|
for j in range(ncols):
|
||||||
|
try:
|
||||||
|
# item = TableItem("%s:%s \n %s"
|
||||||
|
# % (i+1, j+1, cur_sent.spans[(i,j)]))
|
||||||
|
item = str(i) + ".." + str(j) + " \n"
|
||||||
|
if isinstance(cur_sent.spans[(i, j)], basestring):
|
||||||
|
item += cur_sent.spans[(i, j)] + "\n"
|
||||||
|
else:
|
||||||
|
for rule in cur_sent.spans[(i, j)]:
|
||||||
|
item += str(rule) + "\n"
|
||||||
|
if cur_sent.spans[(i, j)] == []:
|
||||||
|
if j - i < 0:
|
||||||
|
item = ""
|
||||||
|
else:
|
||||||
|
item = "-"
|
||||||
|
item = TableItem(item.decode("utf-8"))
|
||||||
|
|
||||||
|
|
||||||
|
except KeyError:
|
||||||
|
if j - i < 0:
|
||||||
|
item = QtGui.QTableWidgetItem("")
|
||||||
|
else:
|
||||||
|
item = QtGui.QTableWidgetItem("-")
|
||||||
|
self.table_widget.setItem(i, j, item)
|
||||||
|
self.table_widget.setColumnWidth(j, 40)
|
||||||
|
# self.connect(
|
||||||
|
# self.table_widget, QtCore.SIGNAL("itemDoubleClicked(QTableWidgetItem)"),
|
||||||
|
# self.popup)
|
||||||
|
|
||||||
|
self.updateSignal.emit()
|
||||||
|
self.table_widget.setSortingEnabled(True)
|
||||||
|
|
||||||
|
def update_table(self):
|
||||||
|
self.table_widget.sortItems(0, QtCore.Qt.DescendingOrder)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def set_format_syntax(self):
|
||||||
|
self.format = "syntax"
|
||||||
|
self.format_drop.setText("Syntax")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_phrase(self):
|
||||||
|
self.format = "phrase"
|
||||||
|
self.format_drop.setText("Phrase")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_syntaxCube(self):
|
||||||
|
self.format = "syntaxCube"
|
||||||
|
self.format_drop.setText("Syntax Cube (-Tall flag)")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_phraseStackFlag(self):
|
||||||
|
self.format = "phraseStackFlag"
|
||||||
|
self.format_drop.setText("Phrase Stack (search-graph)")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_phraseStackVerbose(self):
|
||||||
|
self.format = "phraseStackVerbose"
|
||||||
|
self.format_drop.setText("Phrase Stack (verbose)")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_syntaxCubeFlag(self):
|
||||||
|
self.format = "syntaxCubeFlag"
|
||||||
|
self.format_drop.setText("Syntax Cube (search-graph)")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
def set_format_mbot(self):
|
||||||
|
self.format = "mbot"
|
||||||
|
self.format_drop.setText("MBOT")
|
||||||
|
self.format_drop.menu().hide()
|
||||||
|
|
||||||
|
|
||||||
|
# @QtCore.pyqtSlot(QtGui.QTableWidgetItem, result=QtCore.QObject)
|
||||||
|
# def popup(self, item):
|
||||||
|
# @pyqtSlot(int, int, result=QtCore.QObject)
|
||||||
|
# @pyqtSignature("popup(int int)")
|
||||||
|
def popup(self, r, c):
|
||||||
|
# """ C++: QObject popup(int, int) """
|
||||||
|
# self.w = PopUpCell(item.text)
|
||||||
|
self.w.append(PopUpCell(self.table_widget.item(r, c).text()))
|
||||||
|
# self.w.setGeometry(QRect(100, 100, 400, 200))
|
||||||
|
self.w[-1].show()
|
||||||
|
|
||||||
|
|
||||||
|
class HoverTable(QtGui.QTableWidget):
|
||||||
|
|
||||||
|
def __init__(self, parent=None):
|
||||||
|
super(HoverTable, self).__init__(parent)
|
||||||
|
self.setMouseTracking(True)
|
||||||
|
self.horizontalHeader().setClickable(False)
|
||||||
|
# self.verticalHeader().setDefaultSectionSize(self.verticalHeader.fontMetrics().height()+2);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class PopUpCell(QtGui.QWidget):
|
||||||
|
def __init__(self, cell_text):
|
||||||
|
QtGui.QWidget.__init__(self)
|
||||||
|
layout = QtGui.QHBoxLayout()
|
||||||
|
text_list = map(lambda x: x, cell_text.split("\n"))
|
||||||
|
wind_cont = QtGui.QTextEdit() # "<br/>".join(text_list[1:]))
|
||||||
|
wind_cont.setReadOnly(True)
|
||||||
|
wind_cont.setWindowTitle(text_list[0])
|
||||||
|
wind_cont.setPlainText(cell_text) # "\n".join(text_list))
|
||||||
|
layout.addWidget(wind_cont)
|
||||||
|
self.setWindowTitle(text_list[0])
|
||||||
|
self.setLayout(layout)
|
||||||
|
self.resize(960, 320)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TableItem(QtGui.QTableWidgetItem):
|
||||||
|
|
||||||
|
def __init__(self, cell_text, type=1000):
|
||||||
|
super(TableItem, self).__init__(cell_text)
|
||||||
|
if len(cell_text.split("\n")) > 20:
|
||||||
|
self.setToolTip("\n".join(cell_text.split("\n")[:19]))
|
||||||
|
else:
|
||||||
|
self.setToolTip(cell_text)
|
||||||
|
self.cell_text = cell_text
|
||||||
|
|
||||||
|
|
16
contrib/DIMwid/DIMwid.py
Normal file
16
contrib/DIMwid/DIMwid.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import sys
|
||||||
|
from PyQt4 import QtCore, QtGui
|
||||||
|
|
||||||
|
import DIMterface as my_gui
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = QtGui.QApplication(sys.argv)
|
||||||
|
wnd = my_gui.MainWindow()
|
||||||
|
wnd.resize(640, 480)
|
||||||
|
wnd.setWindowTitle("DIMwid")
|
||||||
|
wnd.show()
|
||||||
|
sys.exit(app.exec_())
|
20
contrib/DIMwid/LICENSE
Normal file
20
contrib/DIMwid/LICENSE
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2013 RobinQrtz
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
67
contrib/DIMwid/README.md
Normal file
67
contrib/DIMwid/README.md
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
DIMwid
|
||||||
|
======
|
||||||
|
|
||||||
|
DIMwid (Decoder Inspection for Moses using widgets) is a tool
|
||||||
|
presenting Moses' different chart/stack outputs in a readable tabular
|
||||||
|
view.
|
||||||
|
|
||||||
|
|
||||||
|
Installation
|
||||||
|
============
|
||||||
|
|
||||||
|
In order to run DIMwid you need to install PyQt, Qt 4.8 and Python
|
||||||
|
2.7. Other versions have not yet been tested. Linux/Unix users simply
|
||||||
|
install these packages using their package-manager or built them from
|
||||||
|
source. Windows can skip the installation of Qt since PyQt itself
|
||||||
|
does cover everything, except Python.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
|
||||||
|
Users are recommended to read the accompanying paper "DIMwid --
|
||||||
|
Decoder Inspection for Moses (using Widgets)" appearing in PBML XY.
|
||||||
|
|
||||||
|
DIMwid is able to read multiple decoder outputs of the Moses
|
||||||
|
translation system. These include the standard trace outputs for both
|
||||||
|
phrase- and syntax-based decoding, the search-graphs for both, the
|
||||||
|
"level 3 verbose" output for phrase-based and a special trace output
|
||||||
|
(available as a Moses fork at :
|
||||||
|
https://github.com/RobinQrtz/mosesdecoder) for all possible
|
||||||
|
translations for syntax-based decoding.
|
||||||
|
|
||||||
|
After producing the outputs from Moses, start DIMwid by running
|
||||||
|
DIMwid.py and first select your format and after that your file. If
|
||||||
|
you have chosen the wrong file or format an error message will
|
||||||
|
appear. Otherwise you will see the first sentence. Cells can be
|
||||||
|
inspected by either double-clicking, opening a new window with the
|
||||||
|
full content, or hovering over the cell, showing a tooltip with the
|
||||||
|
first 20 lines of the cell's content.
|
||||||
|
|
||||||
|
If needed, the user can restrict the number of rules per cell, using
|
||||||
|
the "Cell Limit" spinbox.
|
||||||
|
|
||||||
|
Navigating through the sentences of the input file can be done by
|
||||||
|
either using the "Next" and "Prev" buttons, or choosing a certain
|
||||||
|
sentence number using the lower left spinbox and clicking the "GoTo"
|
||||||
|
button.
|
||||||
|
|
||||||
|
Moses
|
||||||
|
=====
|
||||||
|
|
||||||
|
Information about Moses can be found here: http://statmt.org/moses/
|
||||||
|
|
||||||
|
The used flags for the output are:
|
||||||
|
* -t for phrase-based trace
|
||||||
|
* -T for syntax-based trace
|
||||||
|
* -v 3 for phrase-based verbose level 3
|
||||||
|
* -output-search-graph for both search graphs
|
||||||
|
* -Tall for the Moses fork's new feature
|
||||||
|
|
||||||
|
|
||||||
|
Trouble
|
||||||
|
=======
|
||||||
|
|
||||||
|
If you are running into trouble using DIMwid or have suggestions for
|
||||||
|
improvements or new features email me at
|
||||||
|
|
||||||
|
robin DOT qrtz AT gmail DOT com
|
@ -447,11 +447,15 @@ public:
|
|||||||
stack.back().src=newSrc;
|
stack.back().src=newSrc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::cerr << "newSrc=" << newSrc << std::endl;
|
||||||
|
|
||||||
std::vector<StringTgtCand> tcands;
|
std::vector<StringTgtCand> tcands;
|
||||||
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
// now, look up the target candidates (aprx. TargetPhraseCollection) for
|
||||||
// the current path through the CN
|
// the current path through the CN
|
||||||
m_dict->GetTargetCandidates(nextP,tcands);
|
m_dict->GetTargetCandidates(nextP,tcands);
|
||||||
|
|
||||||
|
std::cerr << "tcands=" << tcands.size() << std::endl;
|
||||||
|
|
||||||
if(newRange.second>=exploredPaths.size()+newRange.first)
|
if(newRange.second>=exploredPaths.size()+newRange.first)
|
||||||
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
exploredPaths.resize(newRange.second-newRange.first+1,0);
|
||||||
++exploredPaths[newRange.second-newRange.first];
|
++exploredPaths[newRange.second-newRange.first];
|
||||||
|
Loading…
Reference in New Issue
Block a user