mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-19 07:07:24 +03:00
commit
f04da9bcd5
110
scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
Normal file
110
scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lt
Normal file
@ -0,0 +1,110 @@
|
||||
# Anything in this file, followed by a period (and an upper-case word),
|
||||
# does NOT indicate an end-of-sentence marker.
|
||||
# Special cases are included for prefixes that ONLY appear before 0-9 numbers.
|
||||
|
||||
# Any single upper case letter followed by a period is not a sentence ender
|
||||
# (excluding I occasionally, but we leave it in)
|
||||
# usually upper case letters are initials in a name
|
||||
A
|
||||
Ā
|
||||
B
|
||||
C
|
||||
Č
|
||||
D
|
||||
E
|
||||
Ē
|
||||
F
|
||||
G
|
||||
Ģ
|
||||
H
|
||||
I
|
||||
Ī
|
||||
J
|
||||
K
|
||||
Ķ
|
||||
L
|
||||
Ļ
|
||||
M
|
||||
N
|
||||
Ņ
|
||||
O
|
||||
P
|
||||
Q
|
||||
R
|
||||
S
|
||||
Š
|
||||
T
|
||||
U
|
||||
Ū
|
||||
V
|
||||
W
|
||||
X
|
||||
Y
|
||||
Z
|
||||
Ž
|
||||
|
||||
# Abbreviations m. menesis d. diena g. gimes
|
||||
m
|
||||
d
|
||||
g
|
||||
|
||||
# Day and month abbreviations
|
||||
# Pirmadienis Penktadienis
|
||||
Pr
|
||||
Pn
|
||||
Pirm
|
||||
Antr
|
||||
Treč
|
||||
Ketv
|
||||
Penkt
|
||||
Šešt
|
||||
Sekm
|
||||
Saus
|
||||
Vas
|
||||
Kov
|
||||
Bal
|
||||
Geg
|
||||
Birž
|
||||
Liep
|
||||
Rugpj
|
||||
Rugs
|
||||
Spal
|
||||
Lapkr
|
||||
Gruod
|
||||
|
||||
# List of titles. These are often followed by upper-case names, but do
|
||||
# not indicate sentence breaks
|
||||
#
|
||||
# Gerbiamasis
|
||||
Gerb
|
||||
|
||||
# XXX TODO .. Below are not quite correct, copied from latvian
|
||||
dr
|
||||
Dr
|
||||
med
|
||||
prof
|
||||
Prof
|
||||
inž
|
||||
Inž
|
||||
ist.loc
|
||||
Ist.loc
|
||||
kor.loc
|
||||
Kor.loc
|
||||
v.i
|
||||
vietn
|
||||
Vietn
|
||||
|
||||
# misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT
|
||||
# fall into this category - it sometimes ends a sentence)
|
||||
# angl angliskai
|
||||
# dab dabartine
|
||||
angl
|
||||
dab
|
||||
|
||||
|
||||
#Numbers only. These should only induce breaks when followed by a numeric sequence
|
||||
# add NUMERIC_ONLY after the word for this function
|
||||
#This case is mostly for the english "No." which can either be a sentence of its own, or
|
||||
#if followed by a number, a non-breaking prefix
|
||||
No #NUMERIC_ONLY#
|
||||
Nr #NUMERIC_ONLY#
|
Loading…
Reference in New Issue
Block a user