open-source-search-engine/Iso8859.cpp
2013-08-02 13:12:24 -07:00

439 lines
24 KiB
C++

#include "Iso8859.h"
// default for charsets that are highly "non-latin"
// i.e. only allow ASCII to pass...
const unsigned char map_8859_default[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
};
// Latin-1
// Adapted from Wikipedia:
// Albanian, Basque, Catalan, Danish, Dutch (missing some letters),
// English, Estonian (missing some letters), Faroese,
// French (missing some letters), Finnish (missing some letters),
// Galician, German, Icelandic, Irish (new orthography), Italian,
// Latin, Norwegian, Portuguese, Rhaeto-Romanic, Scottish, Spanish,
// Swedish, Afrikaans, Swahili
const unsigned char map_8859_1[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
};
// Latin-2
// Adapted from Wikipedia:
// Bosnian, Croatian, Czech, Hungarian, Polish, Romainian, Serbian,
// Serbocroatian, Slovak, Slovenian, Upper Sorbian and Lower Sorbian
const unsigned char map_8859_2[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'A', ' ', 'L', ' ', 'L', 'S', ' ', ' ', 'S', 'S', 'T', 'Z', ' ', 'Z', 'Z',
' ', 'a', ' ', 'l', ' ', 'l', 's', ' ', ' ', 's', 's', 't', 'z', ' ', 'z', 'z',
'R', 'A', 'A', 'A', 'A', 'L', 'C', 'C', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'D',
'D', 'N', 'N', 'O', 'O', 'O', 'O', ' ', 'R', 'U', 'U', 'U', 'U', 'Y', 'T', ' ',
'r', 'a', 'a', 'a', 'a', 'l', 'c', 'c', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'd',
'd', 'n', 'n', 'o', 'o', 'o', 'o', ' ', 'r', 'u', 'u', 'u', 'u', 'y', 't', ' '
};
// Latin-3 (South European)
// Adapted from Wikipedia:
// Turkish (superceded by 8859-9), Maltese, Esperanto
const unsigned char map_8859_3[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'H', ' ', ' ', ' ', ' ', 'H', ' ', ' ', 'L', 'S', 'G', 'J', ' ', ' ', 'Z',
' ', 'h', ' ', ' ', ' ', ' ', 'h', ' ', ' ', 'l', 's', 'g', 'j', ' ', ' ', 'z',
'A', 'A', 'A', ' ', 'A', 'C', 'C', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
' ', 'N', 'O', 'O', 'O', 'G', 'O', ' ', 'G', 'U', 'U', 'U', 'U', 'U', 'S', ' ',
'a', 'a', 'a', ' ', 'a', 'c', 'c', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
' ', 'n', 'o', 'o', 'o', 'g', 'o', ' ', 'g', 'u', 'u', 'u', 'u', 'u', 's', ' '
};
// Latin-4 (North European)
// Adapted from Wikipedia:
// Estonian, Latvian, Lithuanian, Greenlandic, and Sami
const unsigned char map_8859_4[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'A', 'k', 'R', ' ', 'I', 'L', ' ', ' ', 'S', 'E', 'G', 'T', ' ', 'Z', ' ',
' ', 'a', ' ', 'r', ' ', 'i', 'l', ' ', ' ', 's', 'e', 'g', 't', 'N', 'z', 'n',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'I', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I',
'D', 'N', 'O', 'K', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'U', 'U', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'i', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i',
'd', 'n', 'o', 'k', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'u', 'u', ' '
};
// Latin-5 (Turkish)
// Adapted from Wikipedia:
// Turkish
const unsigned char map_8859_9[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'G', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'I', 'S', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'g', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'i', 's', 'y'
};
// Latin-6 (Nordic)
// Adapted from Wikipedia
const unsigned char map_8859_10[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'A', 'E', 'G', 'I', 'I', 'K', ' ', 'L', 'D', 'S', 'T', 'Z', ' ', 'U', 'N',
' ', 'a', 'e', 'g', 'i', 'i', 'k', ' ', 'l', 'd', 's', 't', 'z', ' ', 'u', 'n',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'I', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I',
'D', 'N', 'O', 'O', 'O', 'O', 'O', 'U', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'i', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i',
'o', 'n', 'o', 'o', 'o', 'o', 'o', 'u', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'k'
};
// Latin-7 (Baltic Rim)
// Adapted from Wikipedia:
const unsigned char map_8859_13[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'R', ' ', ' ', ' ', ' ', 'A',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
'A', 'I', 'A', 'C', 'A', 'A', 'E', 'E', 'C', 'E', 'Z', 'E', 'G', 'K', 'I', 'L',
'S', 'N', 'N', 'O', 'O', 'O', 'O', ' ', 'U', 'L', 'S', 'U', 'U', 'Z', 'Z', ' ',
'a', 'i', 'a', 'c', 'a', 'a', 'e', 'e', 'c', 'e', 'z', 'e', 'g', 'k', 'i', 'l',
's', 'n', 'n', 'o', 'o', 'o', 'o', ' ', 'u', 'l', 's', 'u', 'u', 'z', 'z', ' '
};
// Latin-8 (Celtic)
// Adapted from Wikipedia:
// Gaelic, Welsh, Breton
const unsigned char map_8859_14[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'B', 'b', ' ', 'C', 'c', 'D', ' ', 'W', ' ', 'W', 'd', 'Y', ' ', ' ', 'Y',
'F', 'f', 'G', 'g', 'M', 'm', ' ', 'P', 'w', 'p', 'w', 'S', 'y', 'W', 'w', 's',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'W', 'N', 'O', 'O', 'O', 'O', 'O', 'T', ' ', 'U', 'U', 'U', 'U', 'Y', 'y', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'w', 'n', 'o', 'o', 'o', 'o', 'o', 't', ' ', 'u', 'u', 'u', 'u', 'y', 'y', 'y'
};
// Latin-9
// Adapted from Wikipedia:
// Update of 8859-1
// English, French, German, Spanish and Portuguese
const unsigned char map_8859_15[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 's', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', 'Z', ' ', ' ', ' ', 'z', ' ', ' ', ' ', 'O', 'o', 'Y', ' ',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
};
// Latin-10 "South-Eastern European"
// Adapted from Wikipedia:
// Albanian, Croatian, Hungarian, Polish, Romanian and Slovenian, French,
// Italian and Irish Gaelic (new orthography).
const unsigned char map_8859_16[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', 'A', 'a', 'L', ' ', ' ', 'S', ' ', 's', ' ', 'S', ' ', 'Z', ' ', 'z', 'Z',
' ', ' ', 'C', 'l', 'Z', ' ', ' ', ' ', 'z', 'c', 's', ' ', 'O', 'o', 'Y', 'z',
'A', 'A', 'A', 'A', 'A', 'C', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'D', 'N', 'O', 'O', 'O', 'O', 'O', 'S', 'U', 'U', 'U', 'U', 'U', 'E', 'T', ' ',
'a', 'a', 'a', 'a', 'a', 'c', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'd', 'n', 'o', 'o', 'o', 'o', 'o', 's', 'u', 'u', 'u', 'u', 'u', 'e', 't', 'y'
};
// array of all 8859 charsets
const unsigned char* map_8859[ISO_8859_NUM_CHARSETS + 1] =
{
map_8859_default,
map_8859_1,
map_8859_2,
map_8859_3,
map_8859_4,
map_8859_default,
map_8859_default,
map_8859_default,
map_8859_default,
map_8859_9,
map_8859_10,
map_8859_default,
map_8859_default,
map_8859_13,
map_8859_14,
map_8859_15,
map_8859_16
};
const unsigned char map_win_1251[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'K', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'k', ' ', ' ',
// ^^^ control chars ^^^
' ', 'Y', 'y', 'J', ' ', ' ', ' ', ' ', 'E', ' ', ' ', ' ', ' ', '-', ' ', 'I',
' ', 'I', 'i', ' ', ' ', ' ', ' ', ' ', 'e', ' ', ' ', ' ', 'j', 'S', 's', 'i',
'A', 'b', 'B', ' ', ' ', 'E', ' ', ' ', 'N', 'N', 'K', ' ', 'M', 'H', 'O', ' ',
'P', 'C', 'T', 'Y', ' ', 'X', ' ', ' ', 'W', 'W', 'b', ' ', 'b', ' ', ' ', 'R',
'a', 'o', 'b', ' ', ' ', 'e', ' ', ' ', 'n', 'n', 'k', ' ', 'm', 'h', 'o', ' ',
'p', 'c', 't', 'y', ' ', 'x', ' ', ' ', 'w', 'w', 'b', ' ', 'b', ' ', ' ', 'r'
};
const unsigned char map_win_1252[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 'O', ' ', 'Z', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 's', ' ', 'o', ' ', 'z', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '-', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'D', 'N', 'O', 'O', 'O', 'O', 'O', ' ', ' ', 'U', 'U', 'U', 'U', 'Y', ' ', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'o', 'n', 'o', 'o', 'o', 'o', 'o', ' ', ' ', 'u', 'u', 'u', 'u', 'y', ' ', 'y'
};
const unsigned char map_win_1253[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', 'A', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'E', 'H', 'I', ' ', 'O', ' ', 'Y', ' ',
' ', 'A', 'B', ' ', ' ', 'E', 'Z', 'H', ' ', 'I', 'K', ' ', 'M', 'N', ' ', 'O',
' ', 'P', ' ', ' ', 'T', 'Y', ' ', 'X', ' ', ' ', 'I', 'Y', 'a', 'e', 'n', 'i',
'v', 'a', 'b', ' ', ' ', 'e', ' ', 'n', ' ', 'l', 'k', ' ', ' ', 'v', ' ', 'o',
' ', 'p', ' ', 'o', 't', 'v', ' ', 'X', ' ', 'w', 'i', 'v', 'o', 'v', 'w', ' '
};
const unsigned char map_win_1254[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'S', ' ', 'O', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 's', ' ', 'o', ' ', ' ', 'Y',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
'G', 'N', 'O', 'O', 'O', 'O', 'O', 'X', '0', 'U', 'U', 'U', 'U', 'I', 'S', ' ',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
'g', 'n', 'o', 'o', 'o', 'o', 'o', ' ', 'o', 'u', 'u', 'u', 'u', 'i', 's', ' '
};
const unsigned char map_win_1255[256] =
{
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',0x09,0x0A, ' ', ' ',0x0D, ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E, ' ',
// vvv control chars vvv
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
// ^^^ control chars ^^^
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '
};
// array of all windows charsets
const unsigned char* map_win_125x[WIN_125X_NUM_CHARSETS + 1] =
{
map_win_1252, // dummy, will never be used - no NULL here...
map_win_1251,
map_win_1252,
map_win_1253,
map_win_1254,
map_win_1255
};