Convert Converter to class

This commit is contained in:
lionel-rowe 2024-05-15 11:09:06 +08:00
parent 1dc965d9a8
commit d75473b2e8
No known key found for this signature in database
2 changed files with 40 additions and 47 deletions

View File

@ -3,7 +3,7 @@ import { type ConverterId, SKIP_ASCII_HTML, SKIP_ASCII_JS, converters } from './
describe('text-to-unicode (legacy tests)', () => {
const convertTextToUnicode = (text: string) => converters.decimalEntities.escape(text, false);
const convertUnicodeToText = converters.decimalEntities.unescape;
const convertUnicodeToText = (escaped: string) => converters.decimalEntities.unescape(escaped);
describe('convertTextToUnicode', () => {
it('a text string is converted to unicode representation', () => {

View File

@ -11,70 +11,63 @@ function codePoints(text: string): number[] {
return [...text].map(char => char.codePointAt(0));
}
export interface Converter {
interface ConverterConfig {
name: string
escape(text: string, skipAscii: boolean): string
unescape(text: string): string
};
escape: {
charValues?(text: string): number[]
mapper(charValue: number): string
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
asciiSkipper: RegExp
}
unescape: {
regex: RegExp
radix: number
}
}
class Converter {
constructor(public config: ConverterConfig) {}
interface EscapeConfig {
charValues?(text: string): number[]
mapper(charValue: number): string
/** @prop regular expression for default content to skip. Must have exactly 1 capture group. */
asciiSkipper: RegExp
};
function escaper({ charValues: getCharValues, mapper, asciiSkipper: skipper }: EscapeConfig) {
return (text: string, skip: boolean): string => {
getCharValues ??= codePoints;
escape(text: string, skipAscii: boolean): string {
const { asciiSkipper, charValues, mapper } = this.config.escape;
const getCharValues = charValues ?? codePoints;
return text
.split(skip ? skipper : SKIP_NOTHING_RE)
.flatMap((x, i) => {
if (i % 2) {
return x;
}
return getCharValues(x).map(mapper);
})
.split(skipAscii ? asciiSkipper : SKIP_NOTHING_RE)
.flatMap((x, i) => i % 2 ? x : getCharValues(x).map(mapper))
.join('');
};
}
}
interface UnescapeConfig {
regex: RegExp
radix: number
};
unescape(escaped: string): string {
const { regex, radix } = this.config.unescape;
function unescaper({ regex, radix }: UnescapeConfig) {
return (escaped: string): string => {
return escaped.replace(regex, (match) => {
return String.fromCodePoint(Number.parseInt(match.replace(/\P{AHex}/gu, ''), radix));
});
};
}
}
export type ConverterId = keyof typeof converters;
const converters = {
fullUnicode: {
fullUnicode: new Converter({
name: 'Full Unicode',
escape: escaper({ mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
unescape: unescaper({ regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 }),
},
utf16: {
escape: { mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: { regex: /\\u\p{AHex}{4}|\\u\{\p{AHex}{1,6}\}/gu, radix: 16 },
}),
utf16: new Converter({
name: 'UTF-16 Code Units',
escape: escaper({ charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS }),
unescape: unescaper({ regex: /\\u\p{AHex}{4}/gu, radix: 16 }),
},
hexEntities: {
escape: { charValues: codeUnits, mapper: convertCodePointToUnicode, asciiSkipper: SKIP_ASCII_JS },
unescape: { regex: /\\u\p{AHex}{4}/gu, radix: 16 },
}),
hexEntities: new Converter({
name: 'HTML Entities (Hex)',
escape: escaper({ mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML }),
unescape: unescaper({ regex: /&#x\p{AHex}{1,6};/gu, radix: 16 }),
},
decimalEntities: {
escape: { mapper: toHexEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: { regex: /&#x\p{AHex}{1,6};/gu, radix: 16 },
}),
decimalEntities: new Converter({
name: 'HTML Entities (Decimal)',
escape: escaper({ mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML }),
unescape: unescaper({ regex: /&#\d+;/gu, radix: 10 }),
},
escape: { mapper: toDecimalEntities, asciiSkipper: SKIP_ASCII_HTML },
unescape: { regex: /&#\d+;/gu, radix: 10 },
}),
} satisfies Record<string, Converter>;
function convertCodePointToUnicode(codePoint: number): string {