Skip to content

Commit a782598

Browse files
ChALkeRaduh95
authored andcommitted
lib: unify ICU and no-ICU TextDecoder
PR-URL: #61409 Reviewed-By: Aviv Keller <[email protected]> Reviewed-By: Yagiz Nizipli <[email protected]> Reviewed-By: Gürgün Dayıoğlu <[email protected]> Reviewed-By: Richard Lau <[email protected]>
1 parent 0ceb8ca commit a782598

File tree

2 files changed

+111
-162
lines changed

2 files changed

+111
-162
lines changed

lib/internal/encoding.js

Lines changed: 84 additions & 138 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ const {
2323
ERR_INVALID_THIS,
2424
ERR_NO_ICU,
2525
} = require('internal/errors').codes;
26-
const kMethod = Symbol('method');
26+
const kSingleByte = Symbol('single-byte');
2727
const kHandle = Symbol('handle');
2828
const kFlags = Symbol('flags');
2929
const kEncoding = Symbol('encoding');
@@ -53,6 +53,8 @@ const {
5353
validateObject,
5454
kValidateObjectAllowObjectsAndNull,
5555
} = require('internal/validators');
56+
57+
const { hasIntl } = internalBinding('config');
5658
const binding = internalBinding('encoding_binding');
5759
const {
5860
encodeInto,
@@ -406,166 +408,110 @@ function parseInput(input) {
406408
}
407409
}
408410

409-
const TextDecoder =
410-
internalBinding('config').hasIntl ?
411-
makeTextDecoderICU() :
412-
makeTextDecoderJS();
413-
414-
function makeTextDecoderICU() {
415-
const {
416-
decode: _decode,
417-
getConverter,
418-
} = internalBinding('icu');
419-
420-
class TextDecoder {
421-
constructor(encoding = 'utf-8', options = kEmptyObject) {
422-
encoding = `${encoding}`;
423-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
424-
425-
const enc = getEncodingFromLabel(encoding);
426-
if (enc === undefined)
427-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
428-
429-
let flags = 0;
430-
if (options !== null) {
431-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
432-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
433-
}
434-
435-
this[kDecoder] = true;
436-
this[kFlags] = flags;
437-
this[kEncoding] = enc;
438-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
439-
this[kFatal] = Boolean(options?.fatal);
440-
// Only support fast path for UTF-8.
441-
this[kUTF8FastPath] = enc === 'utf-8';
442-
this[kHandle] = undefined;
443-
this[kMethod] = undefined;
444-
445-
if (isSinglebyteEncoding(this.encoding)) {
446-
this[kMethod] = createSinglebyteDecoder(this.encoding, this[kFatal]);
447-
} else if (!this[kUTF8FastPath]) {
448-
this.#prepareConverter();
449-
}
450-
}
451-
452-
#prepareConverter() {
453-
if (this[kHandle] !== undefined) return;
454-
let icuEncoding = this[kEncoding];
455-
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
456-
const handle = getConverter(icuEncoding, this[kFlags]);
457-
if (handle === undefined)
458-
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
459-
this[kHandle] = handle;
460-
}
461-
462-
decode(input = empty, options = kEmptyObject) {
463-
validateDecoder(this);
464-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
411+
let icuDecode, icuGetConverter;
412+
if (hasIntl) {
413+
;({
414+
decode: icuDecode,
415+
getConverter: icuGetConverter,
416+
} = internalBinding('icu'));
417+
}
465418

466-
if (this[kMethod]) return this[kMethod](parseInput(input));
419+
const kBOMSeen = Symbol('BOM seen');
467420

468-
this[kUTF8FastPath] &&= !(options?.stream);
421+
let StringDecoder;
422+
function lazyStringDecoder() {
423+
if (StringDecoder === undefined)
424+
({ StringDecoder } = require('string_decoder'));
425+
return StringDecoder;
426+
}
469427

470-
if (this[kUTF8FastPath]) {
471-
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
472-
}
428+
class TextDecoder {
429+
constructor(encoding = 'utf-8', options = kEmptyObject) {
430+
encoding = `${encoding}`;
431+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
473432

474-
this.#prepareConverter();
433+
const enc = getEncodingFromLabel(encoding);
434+
if (enc === undefined)
435+
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
475436

476-
let flags = 0;
477-
if (options !== null)
478-
flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH;
437+
let flags = 0;
438+
if (options !== null) {
439+
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
440+
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
441+
}
479442

480-
return _decode(this[kHandle], input, flags, this.encoding);
443+
this[kDecoder] = true;
444+
this[kFlags] = flags;
445+
this[kEncoding] = enc;
446+
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
447+
this[kFatal] = Boolean(options?.fatal);
448+
this[kUTF8FastPath] = false;
449+
this[kHandle] = undefined;
450+
this[kSingleByte] = undefined; // Does not care about streaming or BOM
451+
452+
if (enc === 'utf-8') {
453+
this[kUTF8FastPath] = true;
454+
} else if (isSinglebyteEncoding(enc)) {
455+
this[kSingleByte] = createSinglebyteDecoder(enc, this[kFatal]);
456+
} else {
457+
this.#prepareConverter(); // Need to throw early if we don't support the encoding
481458
}
482459
}
483460

484-
return TextDecoder;
485-
}
486-
487-
function makeTextDecoderJS() {
488-
let StringDecoder;
489-
function lazyStringDecoder() {
490-
if (StringDecoder === undefined)
491-
({ StringDecoder } = require('string_decoder'));
492-
return StringDecoder;
461+
#prepareConverter() {
462+
if (this[kHandle] !== undefined) return;
463+
if (hasIntl) {
464+
let icuEncoding = this[kEncoding];
465+
if (icuEncoding === 'gbk') icuEncoding = 'gb18030'; // 10.1.1. GBK's decoder is gb18030's decoder
466+
const handle = icuGetConverter(icuEncoding, this[kFlags]);
467+
if (handle === undefined)
468+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
469+
this[kHandle] = handle;
470+
} else if (this[kEncoding] === 'utf-8' || this[kEncoding] === 'utf-16le') {
471+
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
472+
this[kHandle] = new (lazyStringDecoder())(this[kEncoding]);
473+
this[kBOMSeen] = false;
474+
} else {
475+
throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]);
476+
}
493477
}
494478

495-
const kBOMSeen = Symbol('BOM seen');
479+
decode(input = empty, options = kEmptyObject) {
480+
validateDecoder(this);
481+
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
496482

497-
function hasConverter(encoding) {
498-
return encoding === 'utf-8' || encoding === 'utf-16le';
499-
}
483+
if (this[kSingleByte]) return this[kSingleByte](parseInput(input));
500484

501-
class TextDecoder {
502-
constructor(encoding = 'utf-8', options = kEmptyObject) {
503-
encoding = `${encoding}`;
504-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
485+
const stream = options?.stream;
486+
if (this[kUTF8FastPath]) {
487+
if (!stream) return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
488+
this[kUTF8FastPath] = false;
489+
}
505490

506-
const enc = getEncodingFromLabel(encoding);
507-
if (enc === undefined)
508-
throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
491+
this.#prepareConverter();
509492

510-
let flags = 0;
511-
if (options !== null) {
512-
flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0;
513-
flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0;
514-
}
515-
516-
this[kDecoder] = true;
517-
this[kFlags] = flags;
518-
this[kEncoding] = enc;
519-
this[kIgnoreBOM] = Boolean(options?.ignoreBOM);
520-
this[kFatal] = Boolean(options?.fatal);
521-
this[kBOMSeen] = false;
522-
this[kMethod] = undefined;
523-
524-
if (isSinglebyteEncoding(enc)) {
525-
this[kMethod] = createSinglebyteDecoder(enc, this[kFatal]);
526-
} else {
527-
if (!hasConverter(enc)) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
528-
if (this[kFatal]) throw new ERR_NO_ICU('"fatal" option');
529-
// StringDecoder will normalize WHATWG encoding to Node.js encoding.
530-
this[kHandle] = new (lazyStringDecoder())(enc);
531-
}
493+
if (hasIntl) {
494+
const flags = stream ? 0 : CONVERTER_FLAGS_FLUSH;
495+
return icuDecode(this[kHandle], input, flags, this[kEncoding]);
532496
}
533497

534-
decode(input = empty, options = kEmptyObject) {
535-
validateDecoder(this);
536-
input = parseInput(input);
537-
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
498+
input = parseInput(input);
538499

539-
if (this[kMethod]) return this[kMethod](input);
540-
541-
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
542-
this[kBOMSeen] = false;
543-
}
500+
let result = stream ? this[kHandle].write(input) : this[kHandle].end(input);
544501

545-
if (options !== null && options.stream) {
546-
this[kFlags] &= ~CONVERTER_FLAGS_FLUSH;
547-
} else {
548-
this[kFlags] |= CONVERTER_FLAGS_FLUSH;
502+
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
503+
// If the very first result in the stream is a BOM, and we are not
504+
// explicitly told to ignore it, then we discard it.
505+
if (result[0] === '\ufeff') {
506+
result = StringPrototypeSlice(result, 1);
549507
}
508+
this[kBOMSeen] = true;
509+
}
550510

551-
let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ?
552-
this[kHandle].end(input) :
553-
this[kHandle].write(input);
554-
555-
if (result.length > 0 && !this[kBOMSeen] && !this[kIgnoreBOM]) {
556-
// If the very first result in the stream is a BOM, and we are not
557-
// explicitly told to ignore it, then we discard it.
558-
if (result[0] === '\ufeff') {
559-
result = StringPrototypeSlice(result, 1);
560-
}
561-
this[kBOMSeen] = true;
562-
}
511+
if (!stream) this[kBOMSeen] = false;
563512

564-
return result;
565-
}
513+
return result;
566514
}
567-
568-
return TextDecoder;
569515
}
570516

571517
// Mix in some shared properties.

test/parallel/test-whatwg-encoding-custom-textdecoder.js

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -67,32 +67,34 @@ assert(TextDecoder);
6767
}
6868

6969
// Test TextDecoder, UTF-8, fatal: true, ignoreBOM: false
70-
if (common.hasIntl) {
71-
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
72-
const dec = new TextDecoder(i, { fatal: true });
73-
assert.throws(() => dec.decode(buf.slice(0, 8)),
74-
{
75-
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
76-
name: 'TypeError',
77-
message: 'The encoded data was not valid ' +
78-
'for encoding utf-8'
79-
});
80-
});
81-
82-
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
83-
const dec = new TextDecoder(i, { fatal: true });
70+
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
71+
const dec = new TextDecoder(i, { fatal: true });
72+
assert.throws(() => dec.decode(buf.slice(0, 8)),
73+
{
74+
code: 'ERR_ENCODING_INVALID_ENCODED_DATA',
75+
name: 'TypeError',
76+
message: 'The encoded data was not valid ' +
77+
'for encoding utf-8'
78+
});
79+
});
80+
81+
['unicode-1-1-utf-8', 'utf8', 'utf-8'].forEach((i) => {
82+
const dec = new TextDecoder(i, { fatal: true });
83+
if (common.hasIntl) {
8484
dec.decode(buf.slice(0, 8), { stream: true });
8585
dec.decode(buf.slice(8));
86-
});
87-
} else {
88-
assert.throws(
89-
() => new TextDecoder('utf-8', { fatal: true }),
90-
{
91-
code: 'ERR_NO_ICU',
92-
name: 'TypeError',
93-
message: '"fatal" option is not supported on Node.js compiled without ICU'
94-
});
95-
}
86+
} else {
87+
assert.throws(
88+
() => {
89+
dec.decode(buf.slice(0, 8), { stream: true });
90+
},
91+
{
92+
code: 'ERR_NO_ICU',
93+
name: 'TypeError',
94+
message: '"fatal" option is not supported on Node.js compiled without ICU'
95+
});
96+
}
97+
});
9698

9799
// Test TextDecoder, label undefined, options null
98100
{
@@ -132,6 +134,7 @@ if (common.hasIntl) {
132134
'}'
133135
);
134136
} else {
137+
dec.decode(Uint8Array.of(0), { stream: true });
135138
assert.strictEqual(
136139
util.inspect(dec, { showHidden: true }),
137140
'TextDecoder {\n' +

0 commit comments

Comments
 (0)