2121
2222var StringDecoder = exports . StringDecoder = function ( encoding ) {
2323 this . encoding = ( encoding || 'utf8' ) . toLowerCase ( ) . replace ( / [ - _ ] / , '' ) ;
24- if ( this . encoding === 'utf8' ) {
25- this . charBuffer = new Buffer ( 6 ) ;
26- this . charReceived = 0 ;
27- this . charLength = 0 ;
24+ switch ( this . encoding ) {
25+ case 'utf8' :
26+ // CESU-8 represents each of Surrogate Pair by 3-bytes
27+ this . surrogateSize = 3 ;
28+ break ;
29+ case 'ucs2' :
30+ case 'utf16le' :
31+ // UTF-16 represents each of Surrogate Pair by 2-bytes
32+ this . surrogateSize = 2 ;
33+ this . detectIncompleteChar = utf16DetectIncompleteChar ;
34+ break ;
35+ default :
36+ this . write = passThroughWrite ;
37+ return ;
2838 }
39+
40+ this . charBuffer = new Buffer ( 6 ) ;
41+ this . charReceived = 0 ;
42+ this . charLength = 0 ;
2943} ;
3044
3145
3246StringDecoder . prototype . write = function ( buffer ) {
33- // If not utf8...
34- if ( this . encoding !== 'utf8' ) {
35- return buffer . toString ( this . encoding ) ;
36- }
37-
3847 var charStr = '' ;
3948 var offset = 0 ;
49+
4050 // if our last write ended with an incomplete multibyte character
4151 while ( this . charLength ) {
4252 // determine how many remaining bytes this buffer has to offer for this char
@@ -55,16 +65,14 @@ StringDecoder.prototype.write = function(buffer) {
5565 }
5666
5767 // get the character that was split
58- charStr = this . charBuffer . slice ( 0 , this . charLength ) . toString ( ) ;
68+ charStr = this . charBuffer . slice ( 0 , this . charLength ) . toString ( this . encoding ) ;
5969
6070 // lead surrogate (D800-DBFF) is also the incomplete character
61- if ( this . charLength === 3 ) {
62- var charCode = charStr . charCodeAt ( 0 ) ;
63- if ( charCode >= 0xD800 && charCode <= 0xDBFF ) {
64- charStr = '' ;
65- this . charLength += 3 ; // size of trail surrogate (DC00-DFFF)
66- continue ;
67- }
71+ var charCode = charStr . charCodeAt ( charStr . length - 1 ) ;
72+ if ( charCode >= 0xD800 && charCode <= 0xDBFF ) {
73+ this . charLength += this . surrogateSize ;
74+ charStr = '' ;
75+ continue ;
6876 }
6977 this . charReceived = this . charLength = 0 ;
7078
@@ -76,7 +84,35 @@ StringDecoder.prototype.write = function(buffer) {
7684 break ;
7785 }
7886
87+ var lenIncomplete = this . detectIncompleteChar ( buffer ) ;
88+
89+ var end = buffer . length ;
90+ if ( this . charLength ) {
91+ // buffer the incomplete character bytes we got
92+ buffer . copy ( this . charBuffer , 0 , buffer . length - lenIncomplete , end ) ;
93+ this . charReceived = lenIncomplete ;
94+ end -= lenIncomplete ;
95+ }
96+
97+ charStr += buffer . toString ( this . encoding , 0 , end ) ;
98+
99+ var end = charStr . length - 1 ;
100+ var charCode = charStr . charCodeAt ( end ) ;
101+ // lead surrogate (D800-DBFF) is also the incomplete character
102+ if ( charCode >= 0xD800 && charCode <= 0xDBFF ) {
103+ var size = this . surrogateSize ;
104+ this . charLength += size ;
105+ this . charReceived += size ;
106+ this . charBuffer . copy ( this . charBuffer , size , 0 , size ) ;
107+ this . charBuffer . write ( charStr . charAt ( charStr . length - 1 ) , this . encoding ) ;
108+ return charStr . substring ( 0 , end ) ;
109+ }
110+
111+ // or just emit the charStr
112+ return charStr ;
113+ } ;
79114
115+ StringDecoder . prototype . detectIncompleteChar = function ( buffer ) {
80116 // determine how many bytes we have to check at the end of this buffer
81117 var i = ( buffer . length >= 3 ) ? 3 : buffer . length ;
82118
@@ -106,28 +142,15 @@ StringDecoder.prototype.write = function(buffer) {
106142 }
107143 }
108144
109- var end = buffer . length ;
110- if ( this . charLength ) {
111- // buffer the incomplete character bytes we got
112- buffer . copy ( this . charBuffer , 0 , buffer . length - i , buffer . length ) ;
113- this . charReceived = i ;
114- end -= i ;
115- }
116-
117- charStr += buffer . toString ( 'utf8' , 0 , end ) ;
145+ return i ;
146+ } ;
118147
119- // lead surrogate (D800-DBFF) is also the incomplete character
120- end = charStr . length - 1 ;
121- var charCode = charStr . charCodeAt ( end ) ;
122- if ( charCode >= 0xD800 && charCode <= 0xDBFF ) {
123- // CESU-8 represents each of Surrogate Pair by 3-bytes
124- this . charLength += 3
125- this . charReceived += 3
126- this . charBuffer . copy ( this . charBuffer , 3 , 0 , 3 ) ;
127- this . charBuffer . write ( charStr . charAt ( end ) ) ;
128- return charStr . substring ( 0 , end ) ;
129- }
148+ function passThroughWrite ( buffer ) {
149+ return buffer . toString ( this . encoding ) ;
150+ }
130151
131- // or just emit the charStr
132- return charStr ;
133- } ;
152+ function utf16DetectIncompleteChar ( buffer ) {
153+ var incomplete = this . charReceived = buffer . length % 2 ;
154+ this . charLength = incomplete ? 2 : 0 ;
155+ return incomplete ;
156+ }
0 commit comments