mirror of
https://github.com/denoland/deno.git
synced 2025-03-03 17:34:47 -05:00
fix: parse error when transpiling code with BOM (#11688)
Co-authored-by: David Sherret <dsherret@gmail.com>
This commit is contained in:
parent
02b23e0575
commit
163f2ef571
7 changed files with 44 additions and 17 deletions
|
@ -25,6 +25,7 @@
|
||||||
"cli/tests/testdata/inline_js_source_map*",
|
"cli/tests/testdata/inline_js_source_map*",
|
||||||
"cli/tests/testdata/badly_formatted.md",
|
"cli/tests/testdata/badly_formatted.md",
|
||||||
"cli/tests/testdata/badly_formatted.json",
|
"cli/tests/testdata/badly_formatted.json",
|
||||||
|
"cli/tests/testdata/byte_order_mark.ts",
|
||||||
"cli/tsc/*typescript.js",
|
"cli/tsc/*typescript.js",
|
||||||
"test_util/std",
|
"test_util/std",
|
||||||
"test_util/wpt",
|
"test_util/wpt",
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
use crate::config_file;
|
use crate::config_file;
|
||||||
use crate::media_type::MediaType;
|
use crate::media_type::MediaType;
|
||||||
|
use crate::text_encoding::strip_bom;
|
||||||
|
|
||||||
use deno_core::error::AnyError;
|
use deno_core::error::AnyError;
|
||||||
use deno_core::resolve_url_or_path;
|
use deno_core::resolve_url_or_path;
|
||||||
|
@ -392,10 +393,15 @@ pub fn parse(
|
||||||
source: &str,
|
source: &str,
|
||||||
media_type: &MediaType,
|
media_type: &MediaType,
|
||||||
) -> Result<ParsedModule, AnyError> {
|
) -> Result<ParsedModule, AnyError> {
|
||||||
|
let source = strip_bom(source);
|
||||||
let info = SourceFileInfo::new(specifier, source);
|
let info = SourceFileInfo::new(specifier, source);
|
||||||
let input =
|
let input =
|
||||||
StringInput::new(source, BytePos(0), BytePos(source.len() as u32));
|
StringInput::new(source, BytePos(0), BytePos(source.len() as u32));
|
||||||
let (comments, module) = parse_string_input(&info, input, media_type)?;
|
let (comments, module) =
|
||||||
|
parse_string_input(input, media_type).map_err(|err| Diagnostic {
|
||||||
|
location: info.get_location(err.span().lo),
|
||||||
|
message: err.into_kind().msg().to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
Ok(ParsedModule {
|
Ok(ParsedModule {
|
||||||
info: Arc::new(info),
|
info: Arc::new(info),
|
||||||
|
@ -468,13 +474,17 @@ pub fn transpile_module(
|
||||||
globals: &Globals,
|
globals: &Globals,
|
||||||
cm: Rc<SourceMap>,
|
cm: Rc<SourceMap>,
|
||||||
) -> Result<(Rc<SourceFile>, Module), AnyError> {
|
) -> Result<(Rc<SourceFile>, Module), AnyError> {
|
||||||
let info = SourceFileInfo::new(specifier, source);
|
let source = strip_bom(source);
|
||||||
let source_file = cm.new_source_file(
|
let source_file = cm.new_source_file(
|
||||||
FileName::Custom(specifier.to_string()),
|
FileName::Custom(specifier.to_string()),
|
||||||
source.to_string(),
|
source.to_string(),
|
||||||
);
|
);
|
||||||
let input = StringInput::from(&*source_file);
|
let input = StringInput::from(&*source_file);
|
||||||
let (comments, module) = parse_string_input(&info, input, media_type)?;
|
let (comments, module) =
|
||||||
|
parse_string_input(input, media_type).map_err(|err| Diagnostic {
|
||||||
|
location: cm.lookup_char_pos(err.span().lo).into(),
|
||||||
|
message: err.into_kind().msg().to_string(),
|
||||||
|
})?;
|
||||||
|
|
||||||
let jsx_pass = react::react(
|
let jsx_pass = react::react(
|
||||||
cm,
|
cm,
|
||||||
|
@ -511,19 +521,17 @@ pub fn transpile_module(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_string_input(
|
fn parse_string_input(
|
||||||
info: &SourceFileInfo,
|
|
||||||
input: StringInput,
|
input: StringInput,
|
||||||
media_type: &MediaType,
|
media_type: &MediaType,
|
||||||
) -> Result<(SingleThreadedComments, Module), AnyError> {
|
) -> Result<
|
||||||
|
(SingleThreadedComments, Module),
|
||||||
|
swc_ecmascript::parser::error::Error,
|
||||||
|
> {
|
||||||
let syntax = get_syntax(media_type);
|
let syntax = get_syntax(media_type);
|
||||||
let comments = SingleThreadedComments::default();
|
let comments = SingleThreadedComments::default();
|
||||||
let lexer = Lexer::new(syntax, TARGET, input, Some(&comments));
|
let lexer = Lexer::new(syntax, TARGET, input, Some(&comments));
|
||||||
let mut parser = swc_ecmascript::parser::Parser::new_from(lexer);
|
let mut parser = swc_ecmascript::parser::Parser::new_from(lexer);
|
||||||
|
let module = parser.parse_module()?;
|
||||||
let module = parser.parse_module().map_err(|err| Diagnostic {
|
|
||||||
location: info.get_location(err.span().lo),
|
|
||||||
message: err.into_kind().msg().to_string(),
|
|
||||||
})?;
|
|
||||||
|
|
||||||
Ok((comments, module))
|
Ok((comments, module))
|
||||||
}
|
}
|
||||||
|
|
|
@ -1788,3 +1788,8 @@ itest!(tls_connecttls {
|
||||||
args: "run --quiet --reload --allow-net --allow-read --cert tls/RootCA.pem tls_connecttls.js",
|
args: "run --quiet --reload --allow-net --allow-read --cert tls/RootCA.pem tls_connecttls.js",
|
||||||
output: "tls.out",
|
output: "tls.out",
|
||||||
});
|
});
|
||||||
|
|
||||||
|
itest!(byte_order_mark {
|
||||||
|
args: "run --no-check byte_order_mark.ts",
|
||||||
|
output: "byte_order_mark.out",
|
||||||
|
});
|
||||||
|
|
1
cli/tests/testdata/byte_order_mark.out
vendored
Normal file
1
cli/tests/testdata/byte_order_mark.out
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
Hello World
|
4
cli/tests/testdata/byte_order_mark.ts
vendored
Normal file
4
cli/tests/testdata/byte_order_mark.ts
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
import "./001_hello.js";
|
||||||
|
// Note this file starts with special byte order mark <U+FEFF>
|
||||||
|
// it's important that this file is a .ts typescript file which is passed to
|
||||||
|
// deno through `--no-check` mode.
|
|
@ -6,6 +6,8 @@ use std::{
|
||||||
io::{Error, ErrorKind},
|
io::{Error, ErrorKind},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const BOM_CHAR: char = '\u{FEFF}';
|
||||||
|
|
||||||
/// Attempts to detect the character encoding of the provided bytes.
|
/// Attempts to detect the character encoding of the provided bytes.
|
||||||
///
|
///
|
||||||
/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
|
/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
|
||||||
|
@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Strips the byte order mark from the provided text if it exists.
|
||||||
|
pub fn strip_bom(text: &str) -> &str {
|
||||||
|
if text.starts_with(BOM_CHAR) {
|
||||||
|
&text[BOM_CHAR.len_utf8()..]
|
||||||
|
} else {
|
||||||
|
text
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
|
@ -28,8 +28,6 @@ use std::path::PathBuf;
|
||||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
const BOM_CHAR: char = '\u{FEFF}';
|
|
||||||
|
|
||||||
/// Format JavaScript/TypeScript files.
|
/// Format JavaScript/TypeScript files.
|
||||||
pub async fn format(
|
pub async fn format(
|
||||||
args: Vec<PathBuf>,
|
args: Vec<PathBuf>,
|
||||||
|
@ -350,12 +348,11 @@ fn read_file_contents(file_path: &Path) -> Result<FileContents, AnyError> {
|
||||||
let file_bytes = fs::read(&file_path)?;
|
let file_bytes = fs::read(&file_path)?;
|
||||||
let charset = text_encoding::detect_charset(&file_bytes);
|
let charset = text_encoding::detect_charset(&file_bytes);
|
||||||
let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
|
let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
|
||||||
let had_bom = file_text.starts_with(BOM_CHAR);
|
let had_bom = file_text.starts_with(text_encoding::BOM_CHAR);
|
||||||
let text = if had_bom {
|
let text = if had_bom {
|
||||||
// remove the BOM
|
text_encoding::strip_bom(&file_text).to_string()
|
||||||
String::from(&file_text[BOM_CHAR.len_utf8()..])
|
|
||||||
} else {
|
} else {
|
||||||
String::from(file_text)
|
file_text.to_string()
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(FileContents { text, had_bom })
|
Ok(FileContents { text, had_bom })
|
||||||
|
@ -367,7 +364,7 @@ fn write_file_contents(
|
||||||
) -> Result<(), AnyError> {
|
) -> Result<(), AnyError> {
|
||||||
let file_text = if file_contents.had_bom {
|
let file_text = if file_contents.had_bom {
|
||||||
// add back the BOM
|
// add back the BOM
|
||||||
format!("{}{}", BOM_CHAR, file_contents.text)
|
format!("{}{}", text_encoding::BOM_CHAR, file_contents.text)
|
||||||
} else {
|
} else {
|
||||||
file_contents.text
|
file_contents.text
|
||||||
};
|
};
|
||||||
|
|
Loading…
Add table
Reference in a new issue