supa_mdx_lint/utils/
regex.rs

1#![allow(dead_code)]
2
3use std::collections::HashSet;
4
5use bon::builder;
6use regex_syntax::ast::{
7    parse::Parser, Ast, ClassPerlKind, ClassSet, ClassSetItem, Concat, RepetitionKind,
8};
9
10use crate::utils::char_tree::CharNode;
11
12/// Expand a regex pattern into a list of strings.
13///
14/// Because of the nature of regex and wanting to return (a) a finite result
15/// in (b) some reasonable amount of time, the returned list of strings is _not_
16/// exhaustive. Even for a valid and theoretically finite regex pattern, None
17/// may be returned if a performant expansion is too difficult.
18///
19/// ```ignore
20/// let result = expand_regex(r"test(s|ed)?");
21/// assert_eq!(result, Some(vec!["test", "tests", "tested"]));
22/// ```
23#[builder]
24pub fn expand_regex(
25    regex: &str,
26    /// Whether to trim non-alphabetic characters from beginning and end of
27    /// expanded string. Defaults to true.
28    trim_non_alphabetic: Option<bool>,
29) -> Option<Vec<String>> {
30    let trim_non_alphabetic = trim_non_alphabetic.unwrap_or(true);
31
32    let ast = Parser::new().parse(regex).ok()?;
33    expand_ast(&ast, trim_non_alphabetic)
34}
35
36fn expand_ast(ast: &Ast, trim_non_alphabetic: bool) -> Option<Vec<String>> {
37    #[derive(Debug)]
38    enum NextNode {
39        Single(CharNode),
40        Multiple(Vec<CharNode>),
41    }
42
43    fn expand_ast_internal(ast: &Ast, char_tree: &mut Option<CharNode>) -> Option<NextNode> {
44        match ast {
45            Ast::Assertion(_) => Some(NextNode::Single(
46                char_tree.clone().unwrap_or_else(CharNode::initiate),
47            )),
48            Ast::Literal(literal) => match char_tree {
49                Some(ref mut node) => {
50                    let new_node = node.append(literal.c);
51                    Some(NextNode::Single(new_node))
52                }
53                None => {
54                    let new_tree = char_tree.insert(CharNode::initiate());
55                    let new_node = new_tree.append(literal.c);
56                    Some(NextNode::Single(new_node))
57                }
58            },
59            Ast::ClassBracketed(class_bracketed) if !class_bracketed.negated => {
60                let new_nodes = expand_class_set(char_tree, &class_bracketed.kind);
61                new_nodes.map(NextNode::Multiple)
62            }
63            Ast::Repetition(repetition)
64                if matches!(repetition.op.kind, RepetitionKind::ZeroOrOne) =>
65            {
66                let mut tree = char_tree.get_or_insert_with(CharNode::initiate).clone();
67                if !tree.is_root() {
68                    tree.mark_finished_word();
69                }
70
71                let mut next_nodes = vec![tree.clone()];
72
73                let alt_branch = expand_ast_internal(repetition.ast.as_ref(), char_tree);
74                match alt_branch {
75                    Some(NextNode::Single(node)) if node != tree => {
76                        next_nodes.push(node);
77                    }
78                    Some(NextNode::Multiple(nodes)) => {
79                        for node in nodes.into_iter() {
80                            if node != tree {
81                                next_nodes.push(node);
82                            }
83                        }
84                    }
85                    _ => {}
86                }
87
88                Some(NextNode::Multiple(next_nodes))
89            }
90            Ast::Group(group) => expand_ast_internal(group.ast.as_ref(), char_tree),
91            Ast::Alternation(alternation) => {
92                let mut next = Vec::new();
93
94                for ast in alternation.asts.iter() {
95                    match expand_ast_internal(ast, char_tree) {
96                        Some(NextNode::Single(node)) => next.push(node),
97                        Some(NextNode::Multiple(nodes)) => next.extend(nodes),
98                        _ => {}
99                    }
100                }
101
102                Some(NextNode::Multiple(next))
103            }
104            Ast::Concat(concat) => expand_concat(char_tree, concat),
105            Ast::ClassPerl(perl_class)
106                if perl_class.kind == ClassPerlKind::Space && !perl_class.negated =>
107            {
108                Some(NextNode::Single(
109                    char_tree.clone().unwrap_or_else(CharNode::initiate),
110                ))
111            }
112            _ => {
113                // Too complex to list all the possibilities, just abort
114                if let Some(ref mut node) = char_tree {
115                    node.abort();
116                }
117                None
118            }
119        }
120    }
121
122    fn expand_concat(char_tree: &mut Option<CharNode>, concat: &Concat) -> Option<NextNode> {
123        let tree = char_tree.get_or_insert_with(CharNode::initiate).clone();
124        let mut next_node = Some(NextNode::Single(tree));
125
126        for ast in concat.asts.iter() {
127            match next_node {
128                Some(NextNode::Single(node)) => {
129                    let mut node = Some(node);
130                    next_node = expand_ast_internal(ast, &mut node);
131                }
132                Some(NextNode::Multiple(nodes)) => {
133                    let mut next = Vec::new();
134
135                    nodes.into_iter().for_each(|node| {
136                        let mut node = Some(node);
137                        match expand_ast_internal(ast, &mut node) {
138                            Some(NextNode::Single(node)) => next.push(node),
139                            Some(NextNode::Multiple(nodes)) => next.extend(nodes),
140                            _ => {}
141                        }
142                    });
143
144                    next_node = Some(NextNode::Multiple(next));
145                }
146                _ => {}
147            }
148        }
149
150        next_node
151    }
152
153    fn expand_class_set(
154        char_tree: &mut Option<CharNode>,
155        class_set: &ClassSet,
156    ) -> Option<Vec<CharNode>> {
157        let mut result = None::<Vec<CharNode>>;
158
159        match class_set {
160            ClassSet::Item(ClassSetItem::Literal(literal)) => {
161                let tree = char_tree.get_or_insert_with(CharNode::initiate);
162                let new_node = tree.append(literal.c);
163                result.get_or_insert_with(Vec::new).push(new_node);
164            }
165            ClassSet::Item(ClassSetItem::Union(union)) => {
166                for item in union.items.iter() {
167                    let class_set = ClassSet::Item(item.clone());
168                    if let Some(new_nodes) = expand_class_set(char_tree, &class_set) {
169                        result.get_or_insert_with(Vec::new).extend(new_nodes);
170                    }
171                }
172            }
173            _ => {
174                // Too complex to list all the possibilities, just abort
175                if let Some(ref mut node) = char_tree {
176                    node.abort();
177                }
178            }
179        }
180
181        result
182    }
183
184    let mut char_tree = None::<CharNode>;
185    expand_ast_internal(ast, &mut char_tree);
186    char_tree.map(|tree| {
187        tree.collect()
188            .into_iter()
189            .map(|s| {
190                if trim_non_alphabetic {
191                    s.trim_matches(|c: char| !c.is_alphabetic()).to_string()
192                } else {
193                    s
194                }
195            })
196            .collect::<HashSet<_>>()
197            .into_iter()
198            .collect::<Vec<_>>()
199    })
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205
206    #[test]
207    fn test_expand_regex_blank_returns_none() {
208        let result = expand_regex().regex("").call();
209        assert_eq!(result, None);
210    }
211
212    #[test]
213    fn test_expand_regex_literal_into_itself() {
214        let result = expand_regex().regex("test").call();
215        assert_eq!(result, Some(vec!["test".to_string()]));
216    }
217
218    #[test]
219    fn test_expand_regex_alternates() {
220        let mut result = expand_regex().regex("test(s|ed)").call().unwrap();
221        result.sort();
222        assert_eq!(result, vec!["tested".to_string(), "tests".to_string()]);
223    }
224
225    #[test]
226    fn test_expand_regex_optional() {
227        let mut result = expand_regex().regex("tests?").call().unwrap();
228        result.sort();
229        assert_eq!(result, vec!["test".to_string(), "tests".to_string()]);
230    }
231
232    #[test]
233    fn test_expand_regex_alternates_optional() {
234        let mut result = expand_regex().regex("test(s|ed)?").call().unwrap();
235        result.sort();
236        assert_eq!(
237            result,
238            vec![
239                "test".to_string(),
240                "tested".to_string(),
241                "tests".to_string(),
242            ]
243        );
244    }
245
246    #[test]
247    fn test_expand_regex_alternates_class_set() {
248        let mut result = expand_regex().regex("[Aa]pple").call().unwrap();
249        result.sort();
250        assert_eq!(result, vec!["Apple".to_string(), "apple".to_string()]);
251    }
252
253    #[test]
254    fn text_expand_regex_initial_optional() {
255        let mut result = expand_regex().regex("(pre)?determine").call().unwrap();
256        result.sort();
257        assert_eq!(
258            result,
259            vec!["determine".to_string(), "predetermine".to_string()]
260        )
261    }
262
263    #[test]
264    fn test_expand_regex_aborted_case() {
265        let result = expand_regex().regex("[^Aa]pple").call().unwrap();
266        assert_eq!(result, Vec::<String>::new());
267
268        let result = expand_regex().regex("a[^Aa]pple").call().unwrap();
269        assert_eq!(result, Vec::<String>::new());
270    }
271
272    #[test]
273    fn test_expand_regex_trim_non_alphabetic() {
274        let result = expand_regex().regex(" test ").call();
275        assert_eq!(result, Some(vec!["test".to_string()]));
276
277        let result = expand_regex().regex("!test!").call();
278        assert_eq!(result, Some(vec!["test".to_string()]));
279
280        let result = expand_regex()
281            .regex("!test!")
282            .trim_non_alphabetic(false)
283            .call();
284        assert_eq!(result, Some(vec!["!test!".to_string()]));
285    }
286
287    #[test]
288    fn test_expand_regex_deduplication() {
289        let result = expand_regex().regex("test|test").call().unwrap();
290        assert_eq!(result, vec!["test".to_string()]);
291    }
292
293    #[test]
294    fn test_expand_regex_with_assertions() {
295        let result = expand_regex().regex("^test$").call().unwrap();
296        assert_eq!(result, vec!["test"]);
297    }
298
299    #[test]
300    fn test_expand_regex_with_perl_classes() {
301        let result = expand_regex().regex("\\stest\\s").call().unwrap();
302        assert_eq!(result, vec!["test"]);
303    }
304}