supa_mdx_lint/utils/
regex.rs
1#![allow(dead_code)]
2
3use std::collections::HashSet;
4
5use bon::builder;
6use regex_syntax::ast::{
7 parse::Parser, Ast, ClassPerlKind, ClassSet, ClassSetItem, Concat, RepetitionKind,
8};
9
10use crate::utils::char_tree::CharNode;
11
12#[builder]
24pub fn expand_regex(
25 regex: &str,
26 trim_non_alphabetic: Option<bool>,
29) -> Option<Vec<String>> {
30 let trim_non_alphabetic = trim_non_alphabetic.unwrap_or(true);
31
32 let ast = Parser::new().parse(regex).ok()?;
33 expand_ast(&ast, trim_non_alphabetic)
34}
35
36fn expand_ast(ast: &Ast, trim_non_alphabetic: bool) -> Option<Vec<String>> {
37 #[derive(Debug)]
38 enum NextNode {
39 Single(CharNode),
40 Multiple(Vec<CharNode>),
41 }
42
43 fn expand_ast_internal(ast: &Ast, char_tree: &mut Option<CharNode>) -> Option<NextNode> {
44 match ast {
45 Ast::Assertion(_) => Some(NextNode::Single(
46 char_tree.clone().unwrap_or_else(CharNode::initiate),
47 )),
48 Ast::Literal(literal) => match char_tree {
49 Some(ref mut node) => {
50 let new_node = node.append(literal.c);
51 Some(NextNode::Single(new_node))
52 }
53 None => {
54 let new_tree = char_tree.insert(CharNode::initiate());
55 let new_node = new_tree.append(literal.c);
56 Some(NextNode::Single(new_node))
57 }
58 },
59 Ast::ClassBracketed(class_bracketed) if !class_bracketed.negated => {
60 let new_nodes = expand_class_set(char_tree, &class_bracketed.kind);
61 new_nodes.map(NextNode::Multiple)
62 }
63 Ast::Repetition(repetition)
64 if matches!(repetition.op.kind, RepetitionKind::ZeroOrOne) =>
65 {
66 let mut tree = char_tree.get_or_insert_with(CharNode::initiate).clone();
67 if !tree.is_root() {
68 tree.mark_finished_word();
69 }
70
71 let mut next_nodes = vec![tree.clone()];
72
73 let alt_branch = expand_ast_internal(repetition.ast.as_ref(), char_tree);
74 match alt_branch {
75 Some(NextNode::Single(node)) if node != tree => {
76 next_nodes.push(node);
77 }
78 Some(NextNode::Multiple(nodes)) => {
79 for node in nodes.into_iter() {
80 if node != tree {
81 next_nodes.push(node);
82 }
83 }
84 }
85 _ => {}
86 }
87
88 Some(NextNode::Multiple(next_nodes))
89 }
90 Ast::Group(group) => expand_ast_internal(group.ast.as_ref(), char_tree),
91 Ast::Alternation(alternation) => {
92 let mut next = Vec::new();
93
94 for ast in alternation.asts.iter() {
95 match expand_ast_internal(ast, char_tree) {
96 Some(NextNode::Single(node)) => next.push(node),
97 Some(NextNode::Multiple(nodes)) => next.extend(nodes),
98 _ => {}
99 }
100 }
101
102 Some(NextNode::Multiple(next))
103 }
104 Ast::Concat(concat) => expand_concat(char_tree, concat),
105 Ast::ClassPerl(perl_class)
106 if perl_class.kind == ClassPerlKind::Space && !perl_class.negated =>
107 {
108 Some(NextNode::Single(
109 char_tree.clone().unwrap_or_else(CharNode::initiate),
110 ))
111 }
112 _ => {
113 if let Some(ref mut node) = char_tree {
115 node.abort();
116 }
117 None
118 }
119 }
120 }
121
122 fn expand_concat(char_tree: &mut Option<CharNode>, concat: &Concat) -> Option<NextNode> {
123 let tree = char_tree.get_or_insert_with(CharNode::initiate).clone();
124 let mut next_node = Some(NextNode::Single(tree));
125
126 for ast in concat.asts.iter() {
127 match next_node {
128 Some(NextNode::Single(node)) => {
129 let mut node = Some(node);
130 next_node = expand_ast_internal(ast, &mut node);
131 }
132 Some(NextNode::Multiple(nodes)) => {
133 let mut next = Vec::new();
134
135 nodes.into_iter().for_each(|node| {
136 let mut node = Some(node);
137 match expand_ast_internal(ast, &mut node) {
138 Some(NextNode::Single(node)) => next.push(node),
139 Some(NextNode::Multiple(nodes)) => next.extend(nodes),
140 _ => {}
141 }
142 });
143
144 next_node = Some(NextNode::Multiple(next));
145 }
146 _ => {}
147 }
148 }
149
150 next_node
151 }
152
153 fn expand_class_set(
154 char_tree: &mut Option<CharNode>,
155 class_set: &ClassSet,
156 ) -> Option<Vec<CharNode>> {
157 let mut result = None::<Vec<CharNode>>;
158
159 match class_set {
160 ClassSet::Item(ClassSetItem::Literal(literal)) => {
161 let tree = char_tree.get_or_insert_with(CharNode::initiate);
162 let new_node = tree.append(literal.c);
163 result.get_or_insert_with(Vec::new).push(new_node);
164 }
165 ClassSet::Item(ClassSetItem::Union(union)) => {
166 for item in union.items.iter() {
167 let class_set = ClassSet::Item(item.clone());
168 if let Some(new_nodes) = expand_class_set(char_tree, &class_set) {
169 result.get_or_insert_with(Vec::new).extend(new_nodes);
170 }
171 }
172 }
173 _ => {
174 if let Some(ref mut node) = char_tree {
176 node.abort();
177 }
178 }
179 }
180
181 result
182 }
183
184 let mut char_tree = None::<CharNode>;
185 expand_ast_internal(ast, &mut char_tree);
186 char_tree.map(|tree| {
187 tree.collect()
188 .into_iter()
189 .map(|s| {
190 if trim_non_alphabetic {
191 s.trim_matches(|c: char| !c.is_alphabetic()).to_string()
192 } else {
193 s
194 }
195 })
196 .collect::<HashSet<_>>()
197 .into_iter()
198 .collect::<Vec<_>>()
199 })
200}
201
202#[cfg(test)]
203mod tests {
204 use super::*;
205
206 #[test]
207 fn test_expand_regex_blank_returns_none() {
208 let result = expand_regex().regex("").call();
209 assert_eq!(result, None);
210 }
211
212 #[test]
213 fn test_expand_regex_literal_into_itself() {
214 let result = expand_regex().regex("test").call();
215 assert_eq!(result, Some(vec!["test".to_string()]));
216 }
217
218 #[test]
219 fn test_expand_regex_alternates() {
220 let mut result = expand_regex().regex("test(s|ed)").call().unwrap();
221 result.sort();
222 assert_eq!(result, vec!["tested".to_string(), "tests".to_string()]);
223 }
224
225 #[test]
226 fn test_expand_regex_optional() {
227 let mut result = expand_regex().regex("tests?").call().unwrap();
228 result.sort();
229 assert_eq!(result, vec!["test".to_string(), "tests".to_string()]);
230 }
231
232 #[test]
233 fn test_expand_regex_alternates_optional() {
234 let mut result = expand_regex().regex("test(s|ed)?").call().unwrap();
235 result.sort();
236 assert_eq!(
237 result,
238 vec![
239 "test".to_string(),
240 "tested".to_string(),
241 "tests".to_string(),
242 ]
243 );
244 }
245
246 #[test]
247 fn test_expand_regex_alternates_class_set() {
248 let mut result = expand_regex().regex("[Aa]pple").call().unwrap();
249 result.sort();
250 assert_eq!(result, vec!["Apple".to_string(), "apple".to_string()]);
251 }
252
253 #[test]
254 fn text_expand_regex_initial_optional() {
255 let mut result = expand_regex().regex("(pre)?determine").call().unwrap();
256 result.sort();
257 assert_eq!(
258 result,
259 vec!["determine".to_string(), "predetermine".to_string()]
260 )
261 }
262
263 #[test]
264 fn test_expand_regex_aborted_case() {
265 let result = expand_regex().regex("[^Aa]pple").call().unwrap();
266 assert_eq!(result, Vec::<String>::new());
267
268 let result = expand_regex().regex("a[^Aa]pple").call().unwrap();
269 assert_eq!(result, Vec::<String>::new());
270 }
271
272 #[test]
273 fn test_expand_regex_trim_non_alphabetic() {
274 let result = expand_regex().regex(" test ").call();
275 assert_eq!(result, Some(vec!["test".to_string()]));
276
277 let result = expand_regex().regex("!test!").call();
278 assert_eq!(result, Some(vec!["test".to_string()]));
279
280 let result = expand_regex()
281 .regex("!test!")
282 .trim_non_alphabetic(false)
283 .call();
284 assert_eq!(result, Some(vec!["!test!".to_string()]));
285 }
286
287 #[test]
288 fn test_expand_regex_deduplication() {
289 let result = expand_regex().regex("test|test").call().unwrap();
290 assert_eq!(result, vec!["test".to_string()]);
291 }
292
293 #[test]
294 fn test_expand_regex_with_assertions() {
295 let result = expand_regex().regex("^test$").call().unwrap();
296 assert_eq!(result, vec!["test"]);
297 }
298
299 #[test]
300 fn test_expand_regex_with_perl_classes() {
301 let result = expand_regex().regex("\\stest\\s").call().unwrap();
302 assert_eq!(result, vec!["test"]);
303 }
304}