tests: unify test-encoding

This commit is contained in:
Matt Mackall 2010-09-26 13:41:32 -05:00
parent 0d2bcd1b2a
commit 048deb9ab0
3 changed files with 245 additions and 236 deletions

View File

@ -1,62 +0,0 @@
#!/bin/sh
hg init t
cd t
# we need a repo with some legacy latin-1 changesets
hg unbundle $TESTDIR/legacy-encoding.hg
hg co
python << EOF
f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
EOF
echo % should fail with encoding error
echo "plain old ascii" > a
hg st
HGENCODING=ascii hg ci -l latin-1
echo % these should work
echo "latin-1" > a
HGENCODING=latin-1 hg ci -l latin-1
echo "utf-8" > a
HGENCODING=utf-8 hg ci -l utf-8
HGENCODING=latin-1 hg tag `cat latin-1-tag`
HGENCODING=latin-1 hg branch `cat latin-1-tag`
HGENCODING=latin-1 hg ci -m 'latin1 branch'
rm .hg/branch
echo "% hg log (ascii)"
hg --encoding ascii log
echo "% hg log (latin-1)"
hg --encoding latin-1 log
echo "% hg log (utf-8)"
hg --encoding utf-8 log
echo "% hg tags (ascii)"
HGENCODING=ascii hg tags
echo "% hg tags (latin-1)"
HGENCODING=latin-1 hg tags
echo "% hg tags (utf-8)"
HGENCODING=utf-8 hg tags
echo "% hg branches (ascii)"
HGENCODING=ascii hg branches
echo "% hg branches (latin-1)"
HGENCODING=latin-1 hg branches
echo "% hg branches (utf-8)"
HGENCODING=utf-8 hg branches
echo '[ui]' >> .hg/hgrc
echo 'fallbackencoding = koi8-r' >> .hg/hgrc
echo "% hg log (utf-8)"
HGENCODING=utf-8 hg log
echo "% hg log (dolphin)"
HGENCODING=dolphin hg log
HGENCODING=ascii hg branch `cat latin-1-tag`
cp latin-1-tag .hg/branch
HGENCODING=latin-1 hg ci -m 'should fail'
exit 0

View File

@ -1,174 +0,0 @@
adding changesets
adding manifests
adding file changes
added 2 changesets with 2 changes to 1 files
(run 'hg update' to get a working copy)
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
% should fail with encoding error
M a
? latin-1
? latin-1-tag
? utf-8
transaction abort!
rollback completed
abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
% these should work
marked working directory as branch é
% hg log (ascii)
changeset: 5:093c6077d1c8
branch: ?
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag ? for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: ?
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: ?
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: ?
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': ? = u'\xe9'
% hg log (latin-1)
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ÒÔÕÔØ = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': é = u'\xe9'
% hg log (utf-8)
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ÒÔÕÔØ = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': é = u'\xe9'
% hg tags (ascii)
tip 5:093c6077d1c8
? 3:ca661e7520de
% hg tags (latin-1)
tip 5:093c6077d1c8
é 3:ca661e7520de
% hg tags (utf-8)
tip 5:093c6077d1c8
é 3:ca661e7520de
% hg branches (ascii)
? 5:093c6077d1c8
default 4:94db611b4196 (inactive)
% hg branches (latin-1)
é 5:093c6077d1c8
default 4:94db611b4196 (inactive)
% hg branches (utf-8)
é 5:093c6077d1c8
default 4:94db611b4196 (inactive)
% hg log (utf-8)
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ртуть = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': И = u'\xe9'
% hg log (dolphin)
abort: unknown encoding: dolphin, please check your locale settings
abort: decoding near 'é': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)!
abort: branch name not in UTF-8!

245
tests/test-encoding.t Normal file
View File

@ -0,0 +1,245 @@
Test character encoding
$ hg init t
$ cd t
we need a repo with some legacy latin-1 changesets
$ hg unbundle $TESTDIR/legacy-encoding.hg
adding changesets
adding manifests
adding file changes
added 2 changesets with 2 changes to 1 files
(run 'hg update' to get a working copy)
$ hg co
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
$ python << EOF
> f = file('latin-1', 'w'); f.write("latin-1 e' encoded: \xe9"); f.close()
> f = file('utf-8', 'w'); f.write("utf-8 e' encoded: \xc3\xa9"); f.close()
> f = file('latin-1-tag', 'w'); f.write("\xe9"); f.close()
> EOF
should fail with encoding error
$ echo "plain old ascii" > a
$ hg st
M a
? latin-1
? latin-1-tag
? utf-8
$ HGENCODING=ascii hg ci -l latin-1
transaction abort!
rollback completed
abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
[255]
these should work
$ echo "latin-1" > a
$ HGENCODING=latin-1 hg ci -l latin-1
$ echo "utf-8" > a
$ HGENCODING=utf-8 hg ci -l utf-8
$ HGENCODING=latin-1 hg tag `cat latin-1-tag`
$ HGENCODING=latin-1 hg branch `cat latin-1-tag`
marked working directory as branch é
$ HGENCODING=latin-1 hg ci -m 'latin1 branch'
$ rm .hg/branch
hg log (ascii)
$ hg --encoding ascii log
changeset: 5:093c6077d1c8
branch: ?
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag ? for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: ?
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: ?
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: ?
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ????? = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': ? = u'\xe9'
hg log (latin-1)
$ hg --encoding latin-1 log
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ÒÔÕÔØ = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': é = u'\xe9'
hg log (utf-8)
$ hg --encoding utf-8 log
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ÃÃÃÃØ = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': é = u'\xe9'
hg tags (ascii)
$ HGENCODING=ascii hg tags
tip 5:093c6077d1c8
? 3:ca661e7520de
hg tags (latin-1)
$ HGENCODING=latin-1 hg tags
tip 5:093c6077d1c8
é 3:ca661e7520de
hg tags (utf-8)
$ HGENCODING=utf-8 hg tags
tip 5:093c6077d1c8
é 3:ca661e7520de
hg branches (ascii)
$ HGENCODING=ascii hg branches
? 5:093c6077d1c8
default 4:94db611b4196 (inactive)
hg branches (latin-1)
$ HGENCODING=latin-1 hg branches
é 5:093c6077d1c8
default 4:94db611b4196 (inactive)
hg branches (utf-8)
$ HGENCODING=utf-8 hg branches
é 5:093c6077d1c8
default 4:94db611b4196 (inactive)
$ echo '[ui]' >> .hg/hgrc
$ echo 'fallbackencoding = koi8-r' >> .hg/hgrc
hg log (utf-8)
$ HGENCODING=utf-8 hg log
changeset: 5:093c6077d1c8
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
changeset: 4:94db611b4196
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: Added tag é for changeset ca661e7520de
changeset: 3:ca661e7520de
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
changeset: 2:650c6f3d55dd
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
changeset: 1:0e5b7e3f9c4a
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: koi8-r: ÑÑуÑÑŒ = u'\u0440\u0442\u0443\u0442\u044c'
changeset: 0:1e78a93102a3
user: test
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': И = u'\xe9'
hg log (dolphin)
$ HGENCODING=dolphin hg log
abort: unknown encoding: dolphin, please check your locale settings
[255]
$ HGENCODING=ascii hg branch `cat latin-1-tag`
abort: decoding near 'é': 'ascii' codec can't decode byte 0xe9 in position 0: ordinal not in range(128)!
[255]
$ cp latin-1-tag .hg/branch
$ HGENCODING=latin-1 hg ci -m 'should fail'
abort: branch name not in UTF-8!
[255]