// Copyright 2018-2024 the Deno authors. All rights reserved. MIT license. use std::fmt::Display; use deno_ast::swc::common::Span; use deno_ast::swc::common::DUMMY_SP; use indexmap::IndexMap; /// Each property has this flag to mark what kind of value it holds- /// Plain objects and arrays are not supported yet, but could be easily /// added if needed. #[derive(Debug, PartialEq)] pub enum PropFlags { Ref, RefArr, String, Bool, Null, Undefined, } impl From for u8 { fn from(m: PropFlags) -> u8 { m as u8 } } impl TryFrom for PropFlags { type Error = &'static str; fn try_from(value: u8) -> Result { match value { 0 => Ok(PropFlags::Ref), 1 => Ok(PropFlags::RefArr), 2 => Ok(PropFlags::String), 3 => Ok(PropFlags::Bool), 4 => Ok(PropFlags::Null), 5 => Ok(PropFlags::Undefined), _ => Err("Unknown Prop flag"), } } } const MASK_U32_1: u32 = 0b11111111_00000000_00000000_00000000; const MASK_U32_2: u32 = 0b00000000_11111111_00000000_00000000; const MASK_U32_3: u32 = 0b00000000_00000000_11111111_00000000; const MASK_U32_4: u32 = 0b00000000_00000000_00000000_11111111; // TODO: There is probably a native Rust function to do this. pub fn append_u32(result: &mut Vec, value: u32) { let v1: u8 = ((value & MASK_U32_1) >> 24) as u8; let v2: u8 = ((value & MASK_U32_2) >> 16) as u8; let v3: u8 = ((value & MASK_U32_3) >> 8) as u8; let v4: u8 = (value & MASK_U32_4) as u8; result.push(v1); result.push(v2); result.push(v3); result.push(v4); } pub fn append_usize(result: &mut Vec, value: usize) { let raw = u32::try_from(value).unwrap(); append_u32(result, raw); } pub fn write_usize(result: &mut [u8], value: usize, idx: usize) { let raw = u32::try_from(value).unwrap(); let v1: u8 = ((raw & MASK_U32_1) >> 24) as u8; let v2: u8 = ((raw & MASK_U32_2) >> 16) as u8; let v3: u8 = ((raw & MASK_U32_3) >> 8) as u8; let v4: u8 = (raw & MASK_U32_4) as u8; result[idx] = v1; result[idx + 1] = v2; result[idx + 2] = v3; result[idx + 3] = v4; } #[derive(Debug)] pub struct StringTable { id: usize, table: IndexMap, } impl StringTable { pub fn new() -> Self { Self { id: 0, table: IndexMap::new(), } } pub fn insert(&mut self, s: &str) -> usize { if let Some(id) = self.table.get(s) { return *id; } let id = self.id; self.id += 1; self.table.insert(s.to_string(), id); id } pub fn serialize(&mut self) -> Vec { let mut result: Vec = vec![]; append_u32(&mut result, self.table.len() as u32); // Assume that it's sorted by id for (s, _id) in &self.table { let bytes = s.as_bytes(); append_u32(&mut result, bytes.len() as u32); result.append(&mut bytes.to_vec()); } result } } #[derive(Debug, Clone, Copy, PartialEq)] pub struct NodeRef(pub usize); #[derive(Debug)] pub struct BoolPos(pub usize); #[derive(Debug)] pub struct FieldPos(pub usize); #[derive(Debug)] pub struct FieldArrPos(pub usize); #[derive(Debug)] pub struct StrPos(pub usize); #[derive(Debug)] pub struct UndefPos(pub usize); #[derive(Debug)] pub struct NullPos(pub usize); #[derive(Debug)] pub enum NodePos { Bool(BoolPos), #[allow(dead_code)] Field(FieldPos), #[allow(dead_code)] FieldArr(FieldArrPos), Str(StrPos), Undef(UndefPos), #[allow(dead_code)] Null(NullPos), } pub trait AstBufSerializer where K: Into + Display, P: Into + Display, { fn header( &mut self, kind: K, parent: NodeRef, span: &Span, prop_count: usize, ) -> NodeRef; fn ref_field(&mut self, prop: P) -> FieldPos; fn ref_vec_field(&mut self, prop: P, len: usize) -> FieldArrPos; fn str_field(&mut self, prop: P) -> StrPos; fn bool_field(&mut self, prop: P) -> BoolPos; fn undefined_field(&mut self, prop: P) -> UndefPos; #[allow(dead_code)] fn null_field(&mut self, prop: P) -> NullPos; fn write_ref(&mut self, pos: FieldPos, value: NodeRef); fn write_maybe_ref(&mut self, pos: FieldPos, value: Option); fn write_refs(&mut self, pos: FieldArrPos, value: Vec); fn write_str(&mut self, pos: StrPos, value: &str); fn write_bool(&mut self, pos: BoolPos, value: bool); fn serialize(&mut self) -> Vec; } #[derive(Debug)] pub struct SerializeCtx { buf: Vec, start_buf: NodeRef, str_table: StringTable, kind_map: Vec, prop_map: Vec, } /// This is the internal context used to allocate and fill the buffer. The point /// is to be able to write absolute offsets directly in place. /// /// The typical workflow is to reserve all necessary space for the currrent /// node with placeholders for the offsets of the child nodes. Once child /// nodes have been traversed, we know their offsets and can replace the /// placeholder values with the actual ones. impl SerializeCtx { pub fn new(kind_len: u8, prop_len: u8) -> Self { let kind_size = kind_len as usize; let prop_size = prop_len as usize; let mut ctx = Self { start_buf: NodeRef(0), buf: vec![], str_table: StringTable::new(), kind_map: vec![0; kind_size + 1], prop_map: vec![0; prop_size + 1], }; ctx.str_table.insert(""); // Placeholder node is always 0 ctx.append_node(0, NodeRef(0), &DUMMY_SP, 0); ctx.kind_map[0] = 0; ctx.start_buf = NodeRef(ctx.buf.len()); // Insert default props that are always present let type_str = ctx.str_table.insert("type"); let parent_str = ctx.str_table.insert("parent"); let range_str = ctx.str_table.insert("range"); let length_str = ctx.str_table.insert("length"); // These values are expected to be in this order on the JS side ctx.prop_map[0] = type_str; ctx.prop_map[1] = parent_str; ctx.prop_map[2] = range_str; ctx.prop_map[3] = length_str; ctx } /// Allocate a node's header fn field_header

(&mut self, prop: P, prop_flags: PropFlags) -> usize where P: Into + Display + Clone, { let offset = self.buf.len(); let n: u8 = prop.clone().into(); self.buf.push(n); if let Some(v) = self.prop_map.get::(n.into()) { if *v == 0 { let id = self.str_table.insert(&format!("{prop}")); self.prop_map[n as usize] = id; } } let flags: u8 = prop_flags.into(); self.buf.push(flags); offset } /// Allocate a property pointing to another node. fn field

(&mut self, prop: P, prop_flags: PropFlags) -> usize where P: Into + Display + Clone, { let offset = self.field_header(prop, prop_flags); append_usize(&mut self.buf, 0); offset } fn append_node( &mut self, kind: u8, parent: NodeRef, span: &Span, prop_count: usize, ) -> NodeRef { let offset = self.buf.len(); // Node type fits in a u8 self.buf.push(kind); // Offset to the parent node. Will be 0 if none exists append_usize(&mut self.buf, parent.0); // Span, the start and end location of this node append_u32(&mut self.buf, span.lo.0); append_u32(&mut self.buf, span.hi.0); // No node has more than <10 properties debug_assert!(prop_count < 10); self.buf.push(prop_count as u8); NodeRef(offset) } /// Allocate the node header. It's always the same for every node. /// /// /// /// /// (There is no node with more than 10 properties) pub fn header( &mut self, kind: N, parent: NodeRef, span: &Span, prop_count: usize, ) -> NodeRef where N: Into + Display + Clone, { let n: u8 = kind.clone().into(); if let Some(v) = self.kind_map.get::(n.into()) { if *v == 0 { let id = self.str_table.insert(&format!("{kind}")); self.kind_map[n as usize] = id; } } self.append_node(n, parent, span, prop_count) } /// Allocate a reference property that will hold the offset of /// another node. pub fn ref_field

(&mut self, prop: P) -> usize where P: Into + Display + Clone, { self.field(prop, PropFlags::Ref) } /// Allocate a property that is a vec of node offsets pointing to other /// nodes. pub fn ref_vec_field

(&mut self, prop: P, len: usize) -> usize where P: Into + Display + Clone, { let offset = self.field(prop, PropFlags::RefArr); for _ in 0..len { append_u32(&mut self.buf, 0); } offset } // Allocate a property representing a string. Strings are deduplicated // in the message and the property will only contain the string id. pub fn str_field

(&mut self, prop: P) -> usize where P: Into + Display + Clone, { self.field(prop, PropFlags::String) } /// Allocate a bool field pub fn bool_field

(&mut self, prop: P) -> usize where P: Into + Display + Clone, { let offset = self.field_header(prop, PropFlags::Bool); self.buf.push(0); offset } /// Allocate an undefined field pub fn undefined_field

(&mut self, prop: P) -> usize where P: Into + Display + Clone, { self.field_header(prop, PropFlags::Undefined) } /// Allocate an undefined field #[allow(dead_code)] pub fn null_field

(&mut self, prop: P) -> usize where P: Into + Display + Clone, { self.field_header(prop, PropFlags::Null) } /// Replace the placeholder of a reference field with the actual offset /// to the node we want to point to. pub fn write_ref(&mut self, field_offset: usize, value: NodeRef) { #[cfg(debug_assertions)] { let value_kind = self.buf[field_offset + 1]; if PropFlags::try_from(value_kind).unwrap() != PropFlags::Ref { panic!("Trying to write a ref into a non-ref field") } } write_usize(&mut self.buf, value.0, field_offset + 2); } /// Helper for writing optional node offsets pub fn write_maybe_ref( &mut self, field_offset: usize, value: Option, ) { #[cfg(debug_assertions)] { let value_kind = self.buf[field_offset + 1]; if PropFlags::try_from(value_kind).unwrap() != PropFlags::Ref { panic!("Trying to write a ref into a non-ref field") } } let ref_value = if let Some(v) = value { v } else { NodeRef(0) }; write_usize(&mut self.buf, ref_value.0, field_offset + 2); } /// Write a vec of node offsets into the property. The necessary space /// has been reserved earlier. pub fn write_refs(&mut self, field_offset: usize, value: Vec) { #[cfg(debug_assertions)] { let value_kind = self.buf[field_offset + 1]; if PropFlags::try_from(value_kind).unwrap() != PropFlags::RefArr { panic!("Trying to write a ref into a non-ref array field") } } let mut offset = field_offset + 2; write_usize(&mut self.buf, value.len(), offset); offset += 4; for item in value { write_usize(&mut self.buf, item.0, offset); offset += 4; } } /// Store the string in our string table and save the id of the string /// in the current field. pub fn write_str(&mut self, field_offset: usize, value: &str) { #[cfg(debug_assertions)] { let value_kind = self.buf[field_offset + 1]; if PropFlags::try_from(value_kind).unwrap() != PropFlags::String { panic!("Trying to write a ref into a non-string field") } } let id = self.str_table.insert(value); write_usize(&mut self.buf, id, field_offset + 2); } /// Write a bool to a field. pub fn write_bool(&mut self, field_offset: usize, value: bool) { #[cfg(debug_assertions)] { let value_kind = self.buf[field_offset + 1]; if PropFlags::try_from(value_kind).unwrap() != PropFlags::Bool { panic!("Trying to write a ref into a non-bool field") } } self.buf[field_offset + 2] = if value { 1 } else { 0 }; } /// Serialize all information we have into a buffer that can be sent to JS. /// It has the following structure: /// /// <...ast> /// /// <- node kind id maps to string id /// <- node property id maps to string id /// /// /// pub fn serialize(&mut self) -> Vec { let mut buf: Vec = vec![]; // The buffer starts with the serialized AST first, because that // contains absolute offsets. By butting this at the start of the // message we don't have to waste time updating any offsets. buf.append(&mut self.buf); // Next follows the string table. We'll keep track of the offset // in the message of where the string table begins let offset_str_table = buf.len(); // Serialize string table buf.append(&mut self.str_table.serialize()); // Next, serialize the mappings of kind -> string of encountered // nodes in the AST. We use this additional lookup table to compress // the message so that we can save space by using a u8 . All nodes of // JS, TS and JSX together are <200 let offset_kind_map = buf.len(); // Write the total number of entries in the kind -> str mapping table // TODO: make this a u8 append_usize(&mut buf, self.kind_map.len()); for v in &self.kind_map { append_usize(&mut buf, *v); } // Store offset to prop -> string map. It's the same as with node kind // as the total number of properties is <120 which allows us to store it // as u8. let offset_prop_map = buf.len(); // Write the total number of entries in the kind -> str mapping table append_usize(&mut buf, self.prop_map.len()); for v in &self.prop_map { append_usize(&mut buf, *v); } // Putting offsets of relevant parts of the buffer at the end. This // allows us to hop to the relevant part by merely looking at the last // for values in the message. Each value represents an offset into the // buffer. append_usize(&mut buf, offset_kind_map); append_usize(&mut buf, offset_prop_map); append_usize(&mut buf, offset_str_table); append_usize(&mut buf, self.start_buf.0); buf } }