mirror of
https://github.com/denoland/deno.git
synced 2025-01-21 21:50:00 -05:00
fix(fetch): Correctly decode multipart/form-data
names and filenames (#19145)
Currently the `multipart/form-data` parser in `Request.prototype.formData` and `Response.prototype.formData` decodes non-ASCII filenames incorrectly, as if they were encoded in Latin-1 rather than UTF-8. This happens because the header section of each `multipart/form-data` entry is decoded as Latin-1 in order to be parsed with `Headers`, which only allows `ByteString`s, but the names and filenames are never decoded correctly. This PR fixes this as a post-processing step. Note that the `multipart/form-data` parsing for this APIs in the Fetch spec is very much underspecified, and it does not specify that names and filenames must be decoded as UTF-8. However, it does require that the bodies of non-`File` entries are decoded as UTF-8, and in browsers, names and filenames always use the same encoding as the body. Closes #19142.
This commit is contained in:
parent
1c74b41855
commit
9ba2c4c42f
2 changed files with 81 additions and 5 deletions
|
@ -53,6 +53,59 @@ Deno.test(
|
|||
},
|
||||
);
|
||||
|
||||
// FormData: non-ASCII names and filenames
|
||||
Deno.test(
|
||||
{ permissions: { net: true } },
|
||||
async function bodyMultipartFormDataNonAsciiNames() {
|
||||
const boundary = "----01230123";
|
||||
const payload = [
|
||||
`--${boundary}`,
|
||||
`Content-Disposition: form-data; name="文字"`,
|
||||
"",
|
||||
"文字",
|
||||
`--${boundary}`,
|
||||
`Content-Disposition: form-data; name="file"; filename="文字"`,
|
||||
"Content-Type: application/octet-stream",
|
||||
"",
|
||||
"",
|
||||
`--${boundary}--`,
|
||||
].join("\r\n");
|
||||
|
||||
const body = buildBody(
|
||||
new TextEncoder().encode(payload),
|
||||
new Headers({
|
||||
"Content-Type": `multipart/form-data; boundary=${boundary}`,
|
||||
}),
|
||||
);
|
||||
|
||||
const formData = await body.formData();
|
||||
assert(formData.has("文字"));
|
||||
assertEquals(formData.get("文字"), "文字");
|
||||
assert(formData.has("file"));
|
||||
assert(formData.get("file") instanceof File);
|
||||
assertEquals((formData.get("file") as File).name, "文字");
|
||||
},
|
||||
);
|
||||
|
||||
// FormData: non-ASCII names and filenames roundtrip
|
||||
Deno.test(
|
||||
{ permissions: { net: true } },
|
||||
async function bodyMultipartFormDataNonAsciiRoundtrip() {
|
||||
const inFormData = new FormData();
|
||||
inFormData.append("文字", "文字");
|
||||
inFormData.append("file", new File([], "文字"));
|
||||
|
||||
const body = buildBody(inFormData);
|
||||
|
||||
const formData = await body.formData();
|
||||
assert(formData.has("文字"));
|
||||
assertEquals(formData.get("文字"), "文字");
|
||||
assert(formData.has("file"));
|
||||
assert(formData.get("file") instanceof File);
|
||||
assertEquals((formData.get("file") as File).name, "文字");
|
||||
},
|
||||
);
|
||||
|
||||
Deno.test(
|
||||
{ permissions: { net: true } },
|
||||
async function bodyURLEncodedFormData() {
|
||||
|
|
|
@ -41,6 +41,7 @@ const {
|
|||
StringPrototypeReplaceAll,
|
||||
TypeError,
|
||||
TypedArrayPrototypeSubarray,
|
||||
Uint8Array,
|
||||
} = primordials;
|
||||
|
||||
const entryList = Symbol("entry list");
|
||||
|
@ -358,6 +359,20 @@ function parseContentDisposition(value) {
|
|||
return params;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes a string containing UTF-8 mistakenly decoded as Latin-1 and
|
||||
* decodes it correctly.
|
||||
* @param {string} latin1String
|
||||
* @returns {string}
|
||||
*/
|
||||
function decodeLatin1StringAsUtf8(latin1String) {
|
||||
const buffer = new Uint8Array(latin1String.length);
|
||||
for (let i = 0; i < latin1String.length; i++) {
|
||||
buffer[i] = latin1String.charCodeAt(i);
|
||||
}
|
||||
return core.decode(buffer);
|
||||
}
|
||||
|
||||
const CRLF = "\r\n";
|
||||
const LF = StringPrototypeCodePointAt(CRLF, 1);
|
||||
const CR = StringPrototypeCodePointAt(CRLF, 0);
|
||||
|
@ -465,23 +480,31 @@ class MultipartParser {
|
|||
i - boundaryIndex - 1,
|
||||
);
|
||||
// https://fetch.spec.whatwg.org/#ref-for-dom-body-formdata
|
||||
const filename = MapPrototypeGet(disposition, "filename");
|
||||
const name = MapPrototypeGet(disposition, "name");
|
||||
// These are UTF-8 decoded as if it was Latin-1.
|
||||
// TODO(@andreubotella): Maybe we shouldn't be parsing entry headers
|
||||
// as Latin-1.
|
||||
const latin1Filename = MapPrototypeGet(disposition, "filename");
|
||||
const latin1Name = MapPrototypeGet(disposition, "name");
|
||||
|
||||
state = 5;
|
||||
// Reset
|
||||
boundaryIndex = 0;
|
||||
headerText = "";
|
||||
|
||||
if (!name) {
|
||||
if (!latin1Name) {
|
||||
continue; // Skip, unknown name
|
||||
}
|
||||
|
||||
if (filename) {
|
||||
const name = decodeLatin1StringAsUtf8(latin1Name);
|
||||
if (latin1Filename) {
|
||||
const blob = new Blob([content], {
|
||||
type: headers.get("Content-Type") || "application/octet-stream",
|
||||
});
|
||||
formData.append(name, blob, filename);
|
||||
formData.append(
|
||||
name,
|
||||
blob,
|
||||
decodeLatin1StringAsUtf8(latin1Filename),
|
||||
);
|
||||
} else {
|
||||
formData.append(name, core.decode(content));
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue