fix: parse error when transpiling code with BOM (#11688)

Co-authored-by: David Sherret <dsherret@gmail.com>
2025-03-03 17:34:47 -05:00 · 2021-08-16 09:28:29 +02:00 · 2021-08-16 09:28:29 +02:00 · 163f2ef571
commit 163f2ef571
parent 02b23e0575
7 changed files with 44 additions and 17 deletions
--- a/.dprint.json
+++ b/.dprint.json
@ -25,6 +25,7 @@
    "cli/tests/testdata/inline_js_source_map*",
    "cli/tests/testdata/badly_formatted.md",
    "cli/tests/testdata/badly_formatted.json",
    "cli/tests/testdata/byte_order_mark.ts",
    "cli/tsc/*typescript.js",
    "test_util/std",
    "test_util/wpt",
--- a/cli/ast/mod.rs
+++ b/cli/ast/mod.rs
@ -2,6 +2,7 @@
 use crate::config_file;
 use crate::media_type::MediaType;
 use crate::text_encoding::strip_bom;
 use deno_core::error::AnyError;
 use deno_core::resolve_url_or_path;
@ -392,10 +393,15 @@ pub fn parse(
  source: &str,
  media_type: &MediaType,
 ) -> Result<ParsedModule, AnyError> {
  let source = strip_bom(source);
  let info = SourceFileInfo::new(specifier, source);
  let input =
    StringInput::new(source, BytePos(0), BytePos(source.len() as u32));
-  let (comments, module) = parse_string_input(&info, input, media_type)?;
+  let (comments, module) =
    parse_string_input(input, media_type).map_err(|err| Diagnostic {
      location: info.get_location(err.span().lo),
      message: err.into_kind().msg().to_string(),
    })?;
  Ok(ParsedModule {
    info: Arc::new(info),
@ -468,13 +474,17 @@ pub fn transpile_module(
  globals: &Globals,
  cm: Rc<SourceMap>,
 ) -> Result<(Rc<SourceFile>, Module), AnyError> {
-  let info = SourceFileInfo::new(specifier, source);
+  let source = strip_bom(source);
  let source_file = cm.new_source_file(
    FileName::Custom(specifier.to_string()),
    source.to_string(),
  );
  let input = StringInput::from(&*source_file);
-  let (comments, module) = parse_string_input(&info, input, media_type)?;
+  let (comments, module) =
    parse_string_input(input, media_type).map_err(|err| Diagnostic {
      location: cm.lookup_char_pos(err.span().lo).into(),
      message: err.into_kind().msg().to_string(),
    })?;
  let jsx_pass = react::react(
    cm,
@ -511,19 +521,17 @@ pub fn transpile_module(
 }
 fn parse_string_input(
  info: &SourceFileInfo,
  input: StringInput,
  media_type: &MediaType,
-) -> Result<(SingleThreadedComments, Module), AnyError> {
+) -> Result<
  (SingleThreadedComments, Module),
  swc_ecmascript::parser::error::Error,
 > {
  let syntax = get_syntax(media_type);
  let comments = SingleThreadedComments::default();
  let lexer = Lexer::new(syntax, TARGET, input, Some(&comments));
  let mut parser = swc_ecmascript::parser::Parser::new_from(lexer);
-
+  let module = parser.parse_module()?;
  let module = parser.parse_module().map_err(|err| Diagnostic {
    location: info.get_location(err.span().lo),
    message: err.into_kind().msg().to_string(),
  })?;
  Ok((comments, module))
 }
--- a/cli/tests/integration/run_tests.rs
+++ b/cli/tests/integration/run_tests.rs
@ -1788,3 +1788,8 @@ itest!(tls_connecttls {
  args: "run --quiet --reload --allow-net --allow-read --cert tls/RootCA.pem tls_connecttls.js",
  output: "tls.out",
 });
 itest!(byte_order_mark {
  args: "run --no-check byte_order_mark.ts",
  output: "byte_order_mark.out",
 });
--- a/cli/tests/testdata/byte_order_mark.out
+++ b/cli/tests/testdata/byte_order_mark.out
@ -0,0 +1 @@
 Hello World
--- a/cli/tests/testdata/byte_order_mark.ts
+++ b/cli/tests/testdata/byte_order_mark.ts
@ -0,0 +1,4 @@
 import "./001_hello.js";
 // Note this file starts with special byte order mark <U+FEFF>
 // it's important that this file is a .ts typescript file which is passed to
 // deno through `--no-check` mode.
--- a/cli/text_encoding.rs
+++ b/cli/text_encoding.rs
@ -6,6 +6,8 @@ use std::{
  io::{Error, ErrorKind},
 };
 pub const BOM_CHAR: char = '\u{FEFF}';
 /// Attempts to detect the character encoding of the provided bytes.
 ///
 /// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>(
  }
 }
 /// Strips the byte order mark from the provided text if it exists.
 pub fn strip_bom(text: &str) -> &str {
  if text.starts_with(BOM_CHAR) {
    &text[BOM_CHAR.len_utf8()..]
  } else {
    text
  }
 }
 #[cfg(test)]
 mod tests {
  use super::*;
--- a/cli/tools/fmt.rs
+++ b/cli/tools/fmt.rs
@ -28,8 +28,6 @@ use std::path::PathBuf;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 const BOM_CHAR: char = '\u{FEFF}';
 /// Format JavaScript/TypeScript files.
 pub async fn format(
  args: Vec<PathBuf>,
@ -350,12 +348,11 @@ fn read_file_contents(file_path: &Path) -> Result<FileContents, AnyError> {
  let file_bytes = fs::read(&file_path)?;
  let charset = text_encoding::detect_charset(&file_bytes);
  let file_text = text_encoding::convert_to_utf8(&file_bytes, charset)?;
-  let had_bom = file_text.starts_with(BOM_CHAR);
+  let had_bom = file_text.starts_with(text_encoding::BOM_CHAR);
  let text = if had_bom {
-    // remove the BOM
+    text_encoding::strip_bom(&file_text).to_string()
    String::from(&file_text[BOM_CHAR.len_utf8()..])
  } else {
-    String::from(file_text)
+    file_text.to_string()
  };
  Ok(FileContents { text, had_bom })
@ -367,7 +364,7 @@ fn write_file_contents(
 ) -> Result<(), AnyError> {
  let file_text = if file_contents.had_bom {
    // add back the BOM
-    format!("{}{}", BOM_CHAR, file_contents.text)
+    format!("{}{}", text_encoding::BOM_CHAR, file_contents.text)
  } else {
    file_contents.text
  };