1- import { NumberUtils } from "./utils/number_utils" ;
2-
3- const FIRST_BIT = 0x80 ;
4- const FIRST_TWO_BITS = 0xc0 ;
5- const FIRST_THREE_BITS = 0xe0 ;
6- const FIRST_FOUR_BITS = 0xf0 ;
7- const FIRST_FIVE_BITS = 0xf8 ;
8-
9- const TWO_BIT_CHAR = 0xc0 ;
10- const THREE_BIT_CHAR = 0xe0 ;
11- const FOUR_BIT_CHAR = 0xf0 ;
12- const CONTINUING_CHAR = 0x80 ;
13-
14- // max utf8 values representable in given number of bytes
15- const ONE_BYTE_MAX = 0x7f ;
16- const TWO_BYTE_MAX = 0x7ff ;
17- const THREE_BYTE_MAX = 0xf7ff ;
18-
1+ import { BSONError } from './error' ;
2+
3+ type TextDecoder = {
4+ readonly encoding : string ;
5+ readonly fatal : boolean ;
6+ readonly ignoreBOM : boolean ;
7+ decode ( input ?: Uint8Array ) : string ;
8+ } ;
9+ type TextDecoderConstructor = {
10+ new ( label : 'utf8' , options : { fatal : boolean ; ignoreBOM ?: boolean } ) : TextDecoder ;
11+ } ;
12+
13+ type TextEncoder = {
14+ readonly encoding : string ;
15+ encode ( input ?: string ) : Uint8Array ;
16+ } ;
17+ type TextEncoderConstructor = {
18+ new ( ) : TextEncoder ;
19+ } ;
20+
21+ // Node byte utils global
22+ declare const TextDecoder : TextDecoderConstructor ;
23+ declare const TextEncoder : TextEncoderConstructor ;
1924
2025/**
2126 * Determines if the passed in bytes are valid utf8
@@ -24,45 +29,17 @@ const THREE_BYTE_MAX = 0xf7ff;
2429 * @param end - The index to end validating
2530 */
2631export function validateUtf8 (
27- bytes : { [ index : number ] : number } ,
32+ buffer : Uint8Array ,
2833 start : number ,
29- end : number
30- ) : boolean {
31- let continuation = 0 ;
32-
33- for ( let i = start ; i < end ; i += 1 ) {
34- const byte = bytes [ i ] ;
35-
36- if ( continuation ) {
37- if ( ( byte & FIRST_TWO_BITS ) !== CONTINUING_CHAR ) {
38- return false ;
39- }
40- continuation -= 1 ;
41- } else if ( byte & FIRST_BIT &&
42- parseUtf8Bytes ( [ byte , bytes [ i + 1 ] ] ) > ONE_BYTE_MAX ) {
43- if ( ( byte & FIRST_THREE_BITS ) === TWO_BIT_CHAR ) {
44- continuation = 1 ;
45- } else if ( ( byte & FIRST_FOUR_BITS ) === THREE_BIT_CHAR &&
46- parseUtf8Bytes ( [ byte , bytes [ i + 1 ] , bytes [ i + 2 ] ] ) > TWO_BYTE_MAX ) {
47- continuation = 2 ;
48- } else if ( ( byte & FIRST_FIVE_BITS ) === FOUR_BIT_CHAR &&
49- parseUtf8Bytes ( [ byte , bytes [ i + 1 ] , bytes [ i + 2 ] , bytes [ i + 3 ] ] ) > THREE_BYTE_MAX ) {
50- continuation = 3 ;
51- } else {
52- return false ;
53- }
34+ end : number ,
35+ fatal : boolean
36+ ) : string {
37+ if ( fatal ) {
38+ try {
39+ return new TextDecoder ( 'utf8' , { fatal } ) . decode ( buffer . slice ( start , end ) ) ;
40+ } catch ( cause ) {
41+ throw new BSONError ( 'Invalid UTF-8 string in BSON document' , { cause } ) ;
5442 }
5543 }
56-
57- return ! continuation ;
44+ return new TextDecoder ( 'utf8' , { fatal } ) . decode ( buffer . slice ( start , end ) ) ;
5845}
59-
60- function parseUtf8Bytes ( arr : number [ ] ) : number {
61- arr [ 0 ] >>= ( arr . length - 1 ) ;
62- for ( let i = 1 ; i < arr . length ; i ++ ) {
63- arr [ i ] >>= 2 ;
64- arr [ i ] <<= i * 8 ;
65- arr [ 0 ] = arr [ 0 ] | arr [ i ]
66- }
67- return arr [ 0 ] ;
68- }
0 commit comments