supa_mdx_lint/
parser.rs
1use std::any::Any;
2
3use anyhow::{anyhow, Result};
4use log::{debug, trace};
5use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
6
7use crate::{location::AdjustedOffset, rope::Rope};
8
9type Frontmatter = Box<dyn Any>;
10
11#[derive(Debug)]
12pub(crate) struct ParseMetadata {
13 content_start_offset: AdjustedOffset,
14 #[allow(unused)]
15 frontmatter: Option<Frontmatter>,
16}
17
18#[derive(Debug)]
19pub(crate) struct ParseResult {
20 ast: Node,
21 rope: Rope,
22 metadata: ParseMetadata,
23}
24
25impl ParseResult {
26 pub(crate) fn ast(&self) -> &Node {
27 &self.ast
28 }
29
30 pub(crate) fn rope(&self) -> &Rope {
31 &self.rope
32 }
33
34 pub(crate) fn content_start_offset(&self) -> AdjustedOffset {
35 self.metadata.content_start_offset
36 }
37}
38
39pub(crate) fn parse(input: &str) -> Result<ParseResult> {
40 let (content, rope, content_start_offset, frontmatter) = process_raw_content_string(input);
41 let ast = parse_internal(content)?;
42
43 trace!("AST: {:#?}", ast);
44
45 Ok(ParseResult {
46 ast,
47 rope,
48 metadata: ParseMetadata {
49 content_start_offset,
50 frontmatter,
51 },
52 })
53}
54
55fn process_raw_content_string(input: &str) -> (&str, Rope, AdjustedOffset, Option<Frontmatter>) {
56 let rope = Rope::from(input);
57 let mut frontmatter = None;
58 let mut content = input;
59
60 let mut content_start_offset = AdjustedOffset::default();
61
62 if content.trim_start().starts_with("---") {
63 let frontmatter_start_offset: AdjustedOffset = (content.find("---").unwrap() + 3).into();
64
65 if let Some(frontmatter_end_index) = content[frontmatter_start_offset.into()..].find("---")
66 {
67 let mut end_offset: AdjustedOffset =
68 (Into::<usize>::into(frontmatter_start_offset) + frontmatter_end_index).into();
69
70 let frontmatter_str = &content[frontmatter_start_offset.into()..end_offset.into()];
71
72 if let Ok(toml_frontmatter) = toml::from_str::<toml::Value>(frontmatter_str) {
73 debug!("Parsed as TOML: {toml_frontmatter:#?}");
74 frontmatter = Some(Box::new(toml_frontmatter) as Frontmatter);
75 } else if let Ok(yaml_frontmatter) =
76 serde_yaml::from_str::<serde_yaml::Value>(frontmatter_str)
77 {
78 debug!("Parsed as YAML: {yaml_frontmatter:#?}");
79 frontmatter = Some(Box::new(yaml_frontmatter) as Frontmatter);
80 } else {
81 debug!("Failed to parse frontmatter as TOML or YAML: {frontmatter_str}")
82 }
83
84 end_offset.increment(3);
88
89 let mut remaining_index = 0;
91 let remaining = &content[end_offset.into()..];
92 while remaining_index < remaining.len() {
93 if remaining[remaining_index..].starts_with(char::is_whitespace) {
94 remaining_index += 1;
95 } else {
96 break;
97 }
98 }
99 end_offset.increment(remaining_index);
100
101 content_start_offset = end_offset;
102 }
103 }
104
105 content = &input[content_start_offset.into()..];
106
107 (content, rope, content_start_offset, frontmatter)
108}
109
110fn parse_internal(input: &str) -> Result<Node> {
111 let mdast = to_mdast(
112 input,
113 &ParseOptions {
114 constructs: Constructs {
115 autolink: false,
116 code_indented: false,
117 frontmatter: true,
118 gfm_footnote_definition: true,
119 gfm_label_start_footnote: true,
120 gfm_table: true,
121 html_flow: false,
122 html_text: false,
123 mdx_esm: true,
124 mdx_expression_flow: true,
125 mdx_expression_text: true,
126 mdx_jsx_flow: true,
127 mdx_jsx_text: true,
128 ..Default::default()
129 },
130 ..Default::default()
131 },
132 )
133 .map_err(|e| anyhow!("Not valid Markdown: {:?}", e))?;
134
135 Ok(mdast)
136}
137
138pub(crate) trait CommentString {
139 fn is_comment(&self) -> bool;
140 fn as_comment(&self) -> Option<&str>;
141}
142
143impl CommentString for str {
144 fn is_comment(&self) -> bool {
145 let trimmed = self.trim();
146 trimmed.starts_with("/*") && trimmed.ends_with("*/")
147 }
148
149 fn as_comment(&self) -> Option<&str> {
150 let trimmed = self.trim();
151 if !self.is_comment() {
152 return None;
153 }
154
155 Some(
156 trimmed
157 .trim_start_matches("/*")
158 .trim_end_matches("*/")
159 .trim(),
160 )
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 #[test]
169 fn test_parse_markdown_without_frontmatter() {
170 let input = r#"# Heading
171
172Content here."#;
173 let result = parse(input).unwrap();
174
175 assert_eq!(
176 result.metadata.content_start_offset,
177 AdjustedOffset::from(0)
178 );
179 assert!(result.metadata.frontmatter.is_none());
180
181 let root = result.ast;
182 let heading = root.children().unwrap().first().unwrap();
183 assert_eq!(heading.position().unwrap().start.line, 1);
184 assert_eq!(heading.position().unwrap().start.column, 1);
185 assert_eq!(heading.position().unwrap().start.offset, 0);
186 }
187
188 #[test]
189 fn test_parse_markdown_with_yaml_frontmatter() {
190 let input = r#"---
191title: Test
192---
193
194# Heading
195
196Content here."#;
197 let result = parse(input).unwrap();
198
199 assert_eq!(
200 result.metadata.content_start_offset,
201 AdjustedOffset::from(21)
202 );
203 assert!(result.metadata.frontmatter.is_some());
204
205 let frontmatter = result.metadata.frontmatter.unwrap();
206 let yaml = frontmatter.downcast_ref::<serde_yaml::Value>().unwrap();
207 if let serde_yaml::Value::Mapping(map) = yaml {
208 assert_eq!(map.len(), 1);
209 assert!(map.contains_key(&serde_yaml::Value::String("title".to_string())));
210 } else {
211 panic!("Expected YAML frontmatter to be a mapping");
212 }
213
214 let root = result.ast;
215 let heading = root.children().unwrap().first().unwrap();
216 assert_eq!(heading.position().unwrap().start.line, 1);
217 assert_eq!(heading.position().unwrap().start.column, 1);
218 }
219
220 #[test]
221 fn test_parse_markdown_with_toml_frontmatter() {
222 let input = r#"---
223title = "TOML Test"
224[author]
225name = "John Doe"
226---
227
228# TOML Heading
229
230Content with TOML frontmatter."#;
231 let result = parse(input).unwrap();
232
233 assert_eq!(
234 result.metadata.content_start_offset,
235 AdjustedOffset::from(56)
236 );
237 assert!(result.metadata.frontmatter.is_some());
238
239 let frontmatter = result.metadata.frontmatter.unwrap();
240 let toml = frontmatter.downcast_ref::<toml::Value>().unwrap();
241
242 assert!(toml.is_table());
243 let table = toml.as_table().unwrap();
244
245 assert!(table.contains_key("title"));
246
247 let root = result.ast;
248 let heading = root.children().unwrap().first().unwrap();
249 assert_eq!(heading.position().unwrap().start.line, 1);
250 assert_eq!(heading.position().unwrap().start.column, 1);
251 }
252
253 #[test]
254 fn test_parse_markdown_with_frontmatter_and_multiple_newlines() {
255 let input = r#"---
256title: Test
257---
258
259
260# Heading
261
262Content here."#;
263 let result = parse(input).unwrap();
264 assert_eq!(
265 result.metadata.content_start_offset,
266 AdjustedOffset::from(22)
267 );
268 assert!(result.metadata.frontmatter.is_some());
269
270 let root = result.ast;
271 let heading = root.children().unwrap().first().unwrap();
272 assert_eq!(heading.position().unwrap().start.line, 1);
273 assert_eq!(heading.position().unwrap().start.column, 1);
274 }
275}