Skip to content

Commit 9fbd0f0

Browse files
felixgetjfontaine
authored andcommitted
string_decoder: Fix failures from new test cases
This patch simplifies the implementation of StringDecoder, fixes the failures from the new test cases, and also no longer relies on v8's WriteUtf8 function to encode individual surrogates.
1 parent 22b8398 commit 9fbd0f0

1 file changed

Lines changed: 21 additions & 25 deletions

File tree

lib/string_decoder.js

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -57,29 +57,29 @@ var StringDecoder = exports.StringDecoder = function(encoding) {
5757

5858
StringDecoder.prototype.write = function(buffer) {
5959
var charStr = '';
60-
var offset = 0;
61-
6260
// if our last write ended with an incomplete multibyte character
6361
while (this.charLength) {
6462
// determine how many remaining bytes this buffer has to offer for this char
65-
var i = (buffer.length >= this.charLength - this.charReceived) ?
63+
var available = (buffer.length >= this.charLength - this.charReceived) ?
6664
this.charLength - this.charReceived :
6765
buffer.length;
6866

6967
// add the new bytes to the char buffer
70-
buffer.copy(this.charBuffer, this.charReceived, offset, i);
71-
this.charReceived += (i - offset);
72-
offset = i;
68+
buffer.copy(this.charBuffer, this.charReceived, 0, available);
69+
this.charReceived += available;
7370

7471
if (this.charReceived < this.charLength) {
7572
// still not enough chars in this buffer? wait for more ...
7673
return '';
7774
}
7875

76+
// remove bytes belonging to the current character from the buffer
77+
buffer = buffer.slice(available, buffer.length);
78+
7979
// get the character that was split
8080
charStr = this.charBuffer.slice(0, this.charLength).toString(this.encoding);
8181

82-
// lead surrogate (D800-DBFF) is also the incomplete character
82+
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
8383
var charCode = charStr.charCodeAt(charStr.length - 1);
8484
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
8585
this.charLength += this.surrogateSize;
@@ -89,34 +89,33 @@ StringDecoder.prototype.write = function(buffer) {
8989
this.charReceived = this.charLength = 0;
9090

9191
// if there are no more bytes in this buffer, just emit our char
92-
if (i == buffer.length) return charStr;
93-
94-
// otherwise cut off the characters end from the beginning of this buffer
95-
buffer = buffer.slice(i, buffer.length);
92+
if (buffer.length === 0) {
93+
return charStr;
94+
}
9695
break;
9796
}
9897

99-
var lenIncomplete = this.detectIncompleteChar(buffer);
98+
// determine and set charLength / charReceived
99+
this.detectIncompleteChar(buffer);
100100

101101
var end = buffer.length;
102102
if (this.charLength) {
103103
// buffer the incomplete character bytes we got
104-
buffer.copy(this.charBuffer, 0, buffer.length - lenIncomplete, end);
105-
this.charReceived = lenIncomplete;
106-
end -= lenIncomplete;
104+
buffer.copy(this.charBuffer, 0, buffer.length - this.charReceived, end);
105+
end -= this.charReceived;
107106
}
108107

109108
charStr += buffer.toString(this.encoding, 0, end);
110109

111110
var end = charStr.length - 1;
112111
var charCode = charStr.charCodeAt(end);
113-
// lead surrogate (D800-DBFF) is also the incomplete character
112+
// CESU-8: lead surrogate (D800-DBFF) is also the incomplete character
114113
if (charCode >= 0xD800 && charCode <= 0xDBFF) {
115114
var size = this.surrogateSize;
116115
this.charLength += size;
117116
this.charReceived += size;
118117
this.charBuffer.copy(this.charBuffer, size, 0, size);
119-
this.charBuffer.write(charStr.charAt(charStr.length - 1), this.encoding);
118+
buffer.copy(this.charBuffer, 0, 0, size);
120119
return charStr.substring(0, end);
121120
}
122121

@@ -153,8 +152,7 @@ StringDecoder.prototype.detectIncompleteChar = function(buffer) {
153152
break;
154153
}
155154
}
156-
157-
return i;
155+
this.charReceived = i;
158156
};
159157

160158
StringDecoder.prototype.end = function(buffer) {
@@ -177,13 +175,11 @@ function passThroughWrite(buffer) {
177175
}
178176

179177
function utf16DetectIncompleteChar(buffer) {
180-
var incomplete = this.charReceived = buffer.length % 2;
181-
this.charLength = incomplete ? 2 : 0;
182-
return incomplete;
178+
this.charReceived = buffer.length % 2;
179+
this.charLength = this.charReceived ? 2 : 0;
183180
}
184181

185182
function base64DetectIncompleteChar(buffer) {
186-
var incomplete = this.charReceived = buffer.length % 3;
187-
this.charLength = incomplete ? 3 : 0;
188-
return incomplete;
183+
this.charReceived = buffer.length % 3;
184+
this.charLength = this.charReceived ? 3 : 0;
189185
}

0 commit comments

Comments
 (0)