supa_mdx_lint/
parser.rs

1use std::any::Any;
2
3use anyhow::{anyhow, Result};
4use log::{debug, trace};
5use markdown::{mdast::Node, to_mdast, Constructs, ParseOptions};
6
7use crate::{location::AdjustedOffset, rope::Rope};
8
9type Frontmatter = Box<dyn Any>;
10
11#[derive(Debug)]
12pub(crate) struct ParseMetadata {
13    content_start_offset: AdjustedOffset,
14    #[allow(unused)]
15    frontmatter: Option<Frontmatter>,
16}
17
18#[derive(Debug)]
19pub(crate) struct ParseResult {
20    ast: Node,
21    rope: Rope,
22    metadata: ParseMetadata,
23}
24
25impl ParseResult {
26    pub(crate) fn ast(&self) -> &Node {
27        &self.ast
28    }
29
30    pub(crate) fn rope(&self) -> &Rope {
31        &self.rope
32    }
33
34    pub(crate) fn content_start_offset(&self) -> AdjustedOffset {
35        self.metadata.content_start_offset
36    }
37}
38
39pub(crate) fn parse(input: &str) -> Result<ParseResult> {
40    let (content, rope, content_start_offset, frontmatter) = process_raw_content_string(input);
41    let ast = parse_internal(content)?;
42
43    trace!("AST: {:#?}", ast);
44
45    Ok(ParseResult {
46        ast,
47        rope,
48        metadata: ParseMetadata {
49            content_start_offset,
50            frontmatter,
51        },
52    })
53}
54
55fn process_raw_content_string(input: &str) -> (&str, Rope, AdjustedOffset, Option<Frontmatter>) {
56    let rope = Rope::from(input);
57    let mut frontmatter = None;
58    let mut content = input;
59
60    let mut content_start_offset = AdjustedOffset::default();
61
62    if content.trim_start().starts_with("---") {
63        let frontmatter_start_offset: AdjustedOffset = (content.find("---").unwrap() + 3).into();
64
65        if let Some(frontmatter_end_index) = content[frontmatter_start_offset.into()..].find("---")
66        {
67            let mut end_offset: AdjustedOffset =
68                (Into::<usize>::into(frontmatter_start_offset) + frontmatter_end_index).into();
69
70            let frontmatter_str = &content[frontmatter_start_offset.into()..end_offset.into()];
71
72            if let Ok(toml_frontmatter) = toml::from_str::<toml::Value>(frontmatter_str) {
73                debug!("Parsed as TOML: {toml_frontmatter:#?}");
74                frontmatter = Some(Box::new(toml_frontmatter) as Frontmatter);
75            } else if let Ok(yaml_frontmatter) =
76                serde_yaml::from_str::<serde_yaml::Value>(frontmatter_str)
77            {
78                debug!("Parsed as YAML: {yaml_frontmatter:#?}");
79                frontmatter = Some(Box::new(yaml_frontmatter) as Frontmatter);
80            } else {
81                debug!("Failed to parse frontmatter as TOML or YAML: {frontmatter_str}")
82            }
83
84            // Update end_offset to include the closing "---" and following blank lines
85
86            // Move past the closing "---"
87            end_offset.increment(3);
88
89            // Skip all whitespace and newlines after the closing "---"
90            let mut remaining_index = 0;
91            let remaining = &content[end_offset.into()..];
92            while remaining_index < remaining.len() {
93                if remaining[remaining_index..].starts_with(char::is_whitespace) {
94                    remaining_index += 1;
95                } else {
96                    break;
97                }
98            }
99            end_offset.increment(remaining_index);
100
101            content_start_offset = end_offset;
102        }
103    }
104
105    content = &input[content_start_offset.into()..];
106
107    (content, rope, content_start_offset, frontmatter)
108}
109
110fn parse_internal(input: &str) -> Result<Node> {
111    let mdast = to_mdast(
112        input,
113        &ParseOptions {
114            constructs: Constructs {
115                autolink: false,
116                code_indented: false,
117                frontmatter: true,
118                gfm_footnote_definition: true,
119                gfm_label_start_footnote: true,
120                gfm_table: true,
121                html_flow: false,
122                html_text: false,
123                mdx_esm: true,
124                mdx_expression_flow: true,
125                mdx_expression_text: true,
126                mdx_jsx_flow: true,
127                mdx_jsx_text: true,
128                ..Default::default()
129            },
130            ..Default::default()
131        },
132    )
133    .map_err(|e| anyhow!("Not valid Markdown: {:?}", e))?;
134
135    Ok(mdast)
136}
137
138pub(crate) trait CommentString {
139    fn is_comment(&self) -> bool;
140    fn as_comment(&self) -> Option<&str>;
141}
142
143impl CommentString for str {
144    fn is_comment(&self) -> bool {
145        let trimmed = self.trim();
146        trimmed.starts_with("/*") && trimmed.ends_with("*/")
147    }
148
149    fn as_comment(&self) -> Option<&str> {
150        let trimmed = self.trim();
151        if !self.is_comment() {
152            return None;
153        }
154
155        Some(
156            trimmed
157                .trim_start_matches("/*")
158                .trim_end_matches("*/")
159                .trim(),
160        )
161    }
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn test_parse_markdown_without_frontmatter() {
170        let input = r#"# Heading
171
172Content here."#;
173        let result = parse(input).unwrap();
174
175        assert_eq!(
176            result.metadata.content_start_offset,
177            AdjustedOffset::from(0)
178        );
179        assert!(result.metadata.frontmatter.is_none());
180
181        let root = result.ast;
182        let heading = root.children().unwrap().first().unwrap();
183        assert_eq!(heading.position().unwrap().start.line, 1);
184        assert_eq!(heading.position().unwrap().start.column, 1);
185        assert_eq!(heading.position().unwrap().start.offset, 0);
186    }
187
188    #[test]
189    fn test_parse_markdown_with_yaml_frontmatter() {
190        let input = r#"---
191title: Test
192---
193
194# Heading
195
196Content here."#;
197        let result = parse(input).unwrap();
198
199        assert_eq!(
200            result.metadata.content_start_offset,
201            AdjustedOffset::from(21)
202        );
203        assert!(result.metadata.frontmatter.is_some());
204
205        let frontmatter = result.metadata.frontmatter.unwrap();
206        let yaml = frontmatter.downcast_ref::<serde_yaml::Value>().unwrap();
207        if let serde_yaml::Value::Mapping(map) = yaml {
208            assert_eq!(map.len(), 1);
209            assert!(map.contains_key(&serde_yaml::Value::String("title".to_string())));
210        } else {
211            panic!("Expected YAML frontmatter to be a mapping");
212        }
213
214        let root = result.ast;
215        let heading = root.children().unwrap().first().unwrap();
216        assert_eq!(heading.position().unwrap().start.line, 1);
217        assert_eq!(heading.position().unwrap().start.column, 1);
218    }
219
220    #[test]
221    fn test_parse_markdown_with_toml_frontmatter() {
222        let input = r#"---
223title = "TOML Test"
224[author]
225name = "John Doe"
226---
227
228# TOML Heading
229
230Content with TOML frontmatter."#;
231        let result = parse(input).unwrap();
232
233        assert_eq!(
234            result.metadata.content_start_offset,
235            AdjustedOffset::from(56)
236        );
237        assert!(result.metadata.frontmatter.is_some());
238
239        let frontmatter = result.metadata.frontmatter.unwrap();
240        let toml = frontmatter.downcast_ref::<toml::Value>().unwrap();
241
242        assert!(toml.is_table());
243        let table = toml.as_table().unwrap();
244
245        assert!(table.contains_key("title"));
246
247        let root = result.ast;
248        let heading = root.children().unwrap().first().unwrap();
249        assert_eq!(heading.position().unwrap().start.line, 1);
250        assert_eq!(heading.position().unwrap().start.column, 1);
251    }
252
253    #[test]
254    fn test_parse_markdown_with_frontmatter_and_multiple_newlines() {
255        let input = r#"---
256title: Test
257---
258
259
260# Heading
261
262Content here."#;
263        let result = parse(input).unwrap();
264        assert_eq!(
265            result.metadata.content_start_offset,
266            AdjustedOffset::from(22)
267        );
268        assert!(result.metadata.frontmatter.is_some());
269
270        let root = result.ast;
271        let heading = root.children().unwrap().first().unwrap();
272        assert_eq!(heading.position().unwrap().start.line, 1);
273        assert_eq!(heading.position().unwrap().start.column, 1);
274    }
275}