supa_mdx_lint/rules/
rule004_exclude_words.rs

1use std::{borrow::Cow, collections::HashMap, iter::Peekable, sync::LazyLock};
2
3use bon::bon;
4use crop::RopeSlice;
5use indexmap::IndexSet;
6use log::{debug, trace};
7use markdown::mdast;
8use regex::Regex;
9use serde::{
10    de::{MapAccess, SeqAccess},
11    ser::{SerializeMap, SerializeTuple},
12    Deserialize, Serialize, Serializer,
13};
14use supa_mdx_macros::RuleName;
15
16use crate::{
17    context::Context,
18    errors::LintError,
19    fix::LintCorrection,
20    location::{AdjustedRange, DenormalizedLocation},
21    rope::Rope,
22    utils::words::{
23        extras::{WordIteratorExtension, WordIteratorPrefix},
24        WordIterator, WordIteratorItem,
25    },
26    LintLevel,
27};
28
29use super::{Rule, RuleName, RuleSettings};
30
31#[derive(Debug, Default, RuleName)]
32pub struct Rule004ExcludeWords(WordExclusionIndex);
33
34/// Provides an index of exclusions to allow for easy lookup and matching based
35/// on the first word of the exclusion.
36#[derive(Debug, Default)]
37struct WordExclusionIndex {
38    index: WordExclusionIndexInner,
39    rules: Vec<RuleMeta>,
40}
41
42#[derive(Debug, Default)]
43struct WordExclusionIndexInner(HashMap<Prefix<'static>, WordExclusionMeta>);
44
45#[derive(Debug, Default, PartialEq, Eq, Hash)]
46struct Prefix<'a>(Cow<'a, str>, CaseSensitivity);
47
48#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, Deserialize, Serialize)]
49enum CaseSensitivity {
50    Sensitive,
51    #[default]
52    Insensitive,
53}
54
55#[derive(Debug, Default)]
56struct WordExclusionMeta {
57    /// The trailing part of an exclusion, after the first word is stripped.
58    remainders: IndexSet<String>,
59    /// The rule indexes and replacements associated with these exclusions, if
60    /// any. Rule indexes correspond to the position within the rules of the
61    /// WordExclusionIndex.
62    ///
63    /// Invariant: Ordering must correspond to the ordering of `remainders`.
64    details: Vec<(usize, Option<String>)>,
65}
66
67/// The definition of a user-defined rule.
68///
69/// ## Fields
70/// * `String` - A human-readable description of the rule
71/// * `LintLevel` - The level at which the rule should be linted
72#[derive(Debug, Default, Clone)]
73struct RuleMeta(String, LintLevel);
74
75/// A structure to allow for deserialization from an easy-to-write rule config
76/// format.
77#[derive(Debug, Default)]
78struct WordExclusionIndexIntermediate {
79    rule: HashMap<String, WordExclusionMetaIntermediate>,
80}
81
82impl Serialize for WordExclusionIndexIntermediate {
83    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
84    where
85        S: Serializer,
86    {
87        let mut map = serializer.serialize_map(Some(self.rule.len()))?;
88        for (key, value) in &self.rule {
89            map.serialize_entry(key, value)?;
90        }
91        map.end()
92    }
93}
94
95impl<'de> Deserialize<'de> for WordExclusionIndexIntermediate {
96    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
97    where
98        D: serde::Deserializer<'de>,
99    {
100        struct Visitor;
101
102        impl<'de> serde::de::Visitor<'de> for Visitor {
103            type Value = WordExclusionIndexIntermediate;
104
105            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
106                formatter.write_str("A map of rule names to their exclusion details")
107            }
108
109            fn visit_map<M>(self, mut map: M) -> Result<Self::Value, M::Error>
110            where
111                M: MapAccess<'de>,
112            {
113                let mut rule = HashMap::new();
114
115                while let Some((key, value)) =
116                    map.next_entry::<String, WordExclusionMetaIntermediate>()?
117                {
118                    rule.insert(key, value);
119                }
120
121                Ok(WordExclusionIndexIntermediate { rule })
122            }
123        }
124
125        deserializer.deserialize_any(Visitor)
126    }
127}
128
129#[derive(Debug, Default, Deserialize, Serialize)]
130struct WordExclusionMetaIntermediate {
131    #[serde(default)]
132    level: LintLevel,
133    #[serde(default)]
134    case_sensitive: bool,
135    words: Vec<ExclusionDefinition>,
136    description: String,
137}
138
139#[derive(Debug)]
140enum ExclusionDefinition {
141    ExcludeOnly(String),
142    WithReplace(String, String),
143}
144
145impl Serialize for ExclusionDefinition {
146    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
147    where
148        S: serde::Serializer,
149    {
150        match self {
151            ExclusionDefinition::ExcludeOnly(s) => serializer.serialize_str(s),
152            ExclusionDefinition::WithReplace(a, b) => {
153                let mut seq = serializer.serialize_tuple(2)?;
154                seq.serialize_element(a)?;
155                seq.serialize_element(b)?;
156                seq.end()
157            }
158        }
159    }
160}
161
162impl<'de> Deserialize<'de> for ExclusionDefinition {
163    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
164    where
165        D: serde::Deserializer<'de>,
166    {
167        struct Visitor;
168
169        impl<'de> serde::de::Visitor<'de> for Visitor {
170            type Value = ExclusionDefinition;
171
172            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
173                formatter.write_str("A string (representing an exclusion) or a tuple of two strings (representing an exclusion and its replacement")
174            }
175
176            fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
177            where
178                E: serde::de::Error,
179            {
180                Ok(ExclusionDefinition::ExcludeOnly(value.to_string()))
181            }
182
183            fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
184            where
185                A: SeqAccess<'de>,
186            {
187                let first: String = seq
188                    .next_element()?
189                    .ok_or_else(|| serde::de::Error::invalid_length(0, &self))?;
190                let second: String = seq
191                    .next_element()?
192                    .ok_or_else(|| serde::de::Error::invalid_length(1, &self))?;
193                Ok(ExclusionDefinition::WithReplace(first, second))
194            }
195        }
196
197        deserializer.deserialize_any(Visitor)
198    }
199}
200
201#[derive(Debug)]
202struct IndexLookupResult<'a> {
203    case_sensitive_details: Option<&'a WordExclusionMeta>,
204    case_insensitive_details: Option<&'a WordExclusionMeta>,
205}
206
207impl From<bool> for CaseSensitivity {
208    fn from(case_sensitive: bool) -> CaseSensitivity {
209        if case_sensitive {
210            CaseSensitivity::Sensitive
211        } else {
212            CaseSensitivity::Insensitive
213        }
214    }
215}
216
217impl<'a> From<(Cow<'a, str>, CaseSensitivity)> for Prefix<'a> {
218    fn from((s, case_sensitivity): (Cow<'a, str>, CaseSensitivity)) -> Self {
219        let prefix = match case_sensitivity {
220            CaseSensitivity::Sensitive => s,
221            CaseSensitivity::Insensitive => s.to_lowercase().into(),
222        };
223        Prefix(prefix, case_sensitivity)
224    }
225}
226
227impl RuleMeta {
228    fn description(&self) -> &str {
229        &self.0
230    }
231
232    fn level(&self) -> LintLevel {
233        self.1
234    }
235}
236
237impl ExclusionDefinition {
238    fn into_parts(self) -> (String, Option<String>) {
239        match self {
240            ExclusionDefinition::ExcludeOnly(w) => (w, None),
241            ExclusionDefinition::WithReplace(w, r) => (w, Some(r)),
242        }
243    }
244}
245
246#[bon]
247impl WordExclusionIndex {
248    #[builder]
249    fn insert_exclusion(
250        &mut self,
251        exclusion: ExclusionDefinition,
252        case_sensitivity: CaseSensitivity,
253        rule_index: usize,
254    ) {
255        let (word, replacement) = exclusion.into_parts();
256
257        let rope = Rope::from(word.as_ref());
258        let mut iter = WordIterator::new(rope.byte_slice(..), 0, Default::default());
259
260        let prefix = iter.next();
261        let remainder = iter.collect_remainder();
262
263        if let Some(prefix) = prefix {
264            self.handle_insert_prefix()
265                .prefix(prefix.1.to_string())
266                .maybe_remainder(remainder)
267                .maybe_replacement(replacement)
268                .case_sensitivity(case_sensitivity)
269                .rule_index(rule_index)
270                .call();
271        }
272    }
273
274    #[builder]
275    fn handle_insert_prefix(
276        &mut self,
277        prefix: String,
278        remainder: Option<String>,
279        replacement: Option<String>,
280        case_sensitivity: CaseSensitivity,
281        rule_index: usize,
282    ) {
283        let prefix = Prefix::from((Cow::from(prefix), case_sensitivity));
284        let remainder = remainder.unwrap_or_default();
285
286        let existing = self.index.0.get_mut(&prefix);
287        match existing {
288            Some(existing) => {
289                let (inserted_idx, is_new) = existing.remainders.insert_full(remainder);
290
291                if is_new {
292                    existing.details.push((rule_index, replacement))
293                } else {
294                    let rule_meta = self
295                        .rules
296                        .get(rule_index)
297                        .expect("Rule meta previously inserted into global rule map");
298                    let new_rule_level = rule_meta.level();
299                    match self.rules.get_mut(inserted_idx) {
300                        Some(existing_rule) if existing_rule.level() < new_rule_level => {
301                            if let Some(idx) = existing.details.get_mut(inserted_idx) {
302                                *idx = (rule_index, replacement)
303                            }
304                        }
305                        _ => {
306                            // The new rule doesn't outrank the existing one,
307                            // leave it.
308                        }
309                    }
310                }
311            }
312            None => {
313                let mut remainders = IndexSet::new();
314                remainders.insert(remainder);
315
316                self.index.0.insert(
317                    prefix,
318                    WordExclusionMeta {
319                        remainders,
320                        details: vec![(rule_index, replacement)],
321                    },
322                );
323            }
324        }
325    }
326
327    fn get<'a, 'b: 'a>(&'a self, prefix: &'b str) -> IndexLookupResult<'a> {
328        let case_sensitive_key = Prefix::from((Cow::from(prefix), CaseSensitivity::Sensitive));
329        let case_insensitive_key = Prefix::from((Cow::from(prefix), CaseSensitivity::Insensitive));
330
331        let case_sensitive = self.index.0.get(&case_sensitive_key);
332        let case_insensitive = self.index.0.get(&case_insensitive_key);
333
334        IndexLookupResult {
335            case_sensitive_details: case_sensitive,
336            case_insensitive_details: case_insensitive,
337        }
338    }
339}
340
341impl From<WordExclusionIndexIntermediate> for WordExclusionIndex {
342    fn from(exclude_words: WordExclusionIndexIntermediate) -> Self {
343        let mut this = Self {
344            index: WordExclusionIndexInner::default(),
345            rules: Vec::with_capacity(exclude_words.rule.len()),
346        };
347
348        for (_, rule_details) in exclude_words.rule {
349            let rule_index = this.rules.len();
350            this.rules
351                .push(RuleMeta(rule_details.description, rule_details.level));
352
353            let words = rule_details.words;
354            for word in words {
355                this.insert_exclusion()
356                    .exclusion(word)
357                    .case_sensitivity(rule_details.case_sensitive.into())
358                    .rule_index(rule_index)
359                    .call();
360            }
361        }
362
363        this
364    }
365}
366
367impl Rule for Rule004ExcludeWords {
368    fn default_level(&self) -> LintLevel {
369        // An implementation is required for this trait, but this rule defines
370        // its levels in its own configuration, so this is ignored.
371        LintLevel::default()
372    }
373
374    fn setup(&mut self, settings: Option<&mut RuleSettings>) {
375        trace!("Setting up Rule004ExcludeWords");
376
377        let Some(settings) = settings else {
378            return;
379        };
380
381        let rules = settings.get_deserializable::<WordExclusionIndexIntermediate>("rules");
382        if let Some(rules) = rules {
383            self.0 = rules.into();
384        }
385
386        debug!("Rule 004 is set up: {:#?}", self)
387    }
388
389    fn check(
390        &self,
391        ast: &mdast::Node,
392        context: &Context,
393        _level: LintLevel,
394    ) -> Option<Vec<LintError>> {
395        let mdast::Node::Text(text_node) = ast else {
396            return None;
397        };
398        let Some(position) = &text_node.position else {
399            return None;
400        };
401        debug!("Checking Rule 004 for node {:#?}", ast);
402
403        let mut errors = None::<Vec<LintError>>;
404
405        let range = AdjustedRange::from_unadjusted_position(position, context);
406        let text = context
407            .rope()
408            .byte_slice(Into::<std::ops::Range<usize>>::into(range.clone()));
409        let mut word_iterator: WordIteratorExtension<'_, WordIteratorPrefix> =
410            WordIterator::new(text, range.start.into(), Default::default()).into();
411
412        loop {
413            let Some((offset, word, _)) = word_iterator.next() else {
414                break;
415            };
416            let word = word.to_string();
417
418            let ExclusionMatch {
419                new_iterator,
420                match_: r#match,
421            } = self.match_exclusions(self.0.get(&word), word_iterator);
422            word_iterator = new_iterator;
423
424            if let Some(MatchDetails {
425                last_word,
426                rule,
427                replacement,
428            }) = r#match
429            {
430                let end_offset = match last_word {
431                    Some(last_word) => last_word.0 + last_word.1.len(),
432                    None => offset + word.len(),
433                };
434
435                let error = self
436                    .create_lint_error()
437                    .beginning_offset(offset)
438                    .end_offset(end_offset)
439                    .maybe_replacement(replacement)
440                    .rule(rule)
441                    .range(range.clone())
442                    .context(context)
443                    .call();
444                errors.get_or_insert_with(Vec::new).push(error);
445            }
446        }
447
448        errors
449    }
450}
451
452enum Suffix<'a> {
453    Finish,
454    Remaining(&'a str),
455}
456
457impl<'a> From<&'a str> for Suffix<'a> {
458    fn from(s: &'a str) -> Self {
459        match s {
460            "" => Suffix::Finish,
461            _ => Suffix::Remaining(s),
462        }
463    }
464}
465
466struct ExclusionMatch<'a> {
467    new_iterator: WordIteratorExtension<'a, WordIteratorPrefix<'a>>,
468    match_: Option<MatchDetails>,
469}
470
471#[derive(Debug)]
472struct MatchDetails {
473    last_word: Option<LastWordMatched>,
474    replacement: Option<String>,
475    rule: RuleMeta,
476}
477
478#[derive(Debug)]
479struct MatchDetailsIntermediate<'a> {
480    match_: MatchDetailsIntermediateInner,
481    rule: RuleMeta,
482    replacement: &'a Option<String>,
483}
484
485#[derive(Debug)]
486enum MatchDetailsIntermediateInner {
487    OneWord,
488    /// The match is multiple words long. The position of the last matching
489    /// word is tracked to calculate the full match range later. This is the
490    /// offset not in the text, but in the vector of matches so far.
491    MultipleWords(usize),
492}
493
494#[derive(Debug)]
495struct LastWordMatched(usize, String);
496
497#[bon]
498impl Rule004ExcludeWords {
499    #[builder]
500    fn create_lint_error(
501        &self,
502        beginning_offset: usize,
503        end_offset: usize,
504        range: AdjustedRange,
505        replacement: Option<String>,
506        context: &Context<'_>,
507        rule: RuleMeta,
508    ) -> LintError {
509        trace!("Creating lint error for Rule004. Range: {range:#?}; Beginning offset: {beginning_offset}; End offset: {end_offset}");
510        let narrowed_range = AdjustedRange::new(beginning_offset.into(), end_offset.into());
511        let word = context.rope().byte_slice(narrowed_range.to_usize_range());
512
513        let suggestion = vec![LintCorrection::create_word_splice_correction()
514            .context(context)
515            .outer_range(&range)
516            .splice_range(&narrowed_range)
517            .maybe_replace(replacement.clone().map(Cow::from))
518            .call()];
519        let location = DenormalizedLocation::from_offset_range(narrowed_range, context);
520        let message = substitute_format_string(rule.description().to_string(), word, replacement);
521
522        LintError::from_raw_location()
523            .rule(self.name())
524            .message(message)
525            .level(rule.level())
526            .location(location)
527            .suggestions(suggestion)
528            .call()
529    }
530
531    fn match_exclusions<'a>(
532        &self,
533        IndexLookupResult {
534            case_sensitive_details,
535            case_insensitive_details,
536        }: IndexLookupResult,
537        words: WordIteratorExtension<'a, WordIteratorPrefix<'a>>,
538    ) -> ExclusionMatch<'a> {
539        trace!("Checking for need to match exclusions in Rule 004");
540        if case_sensitive_details.is_none() && case_insensitive_details.is_none() {
541            return ExclusionMatch {
542                new_iterator: words,
543                match_: None,
544            };
545        }
546        debug!("Matching exclusions in Rule 004");
547
548        let mut result_so_far = None::<MatchDetailsIntermediate>;
549        let all = combine_exclusions(case_sensitive_details, case_insensitive_details);
550
551        let mut consumed = vec![];
552        let words = self
553            .match_exclusions_rec()
554            .remaining(all)
555            .consumed(&mut consumed)
556            .words(words)
557            .result(&mut result_so_far)
558            .call();
559
560        let new_iterator = {
561            match result_so_far {
562                Some(MatchDetailsIntermediate {
563                    match_: MatchDetailsIntermediateInner::MultipleWords(end_pos_incl),
564                    ..
565                }) => reattach_unused_words(words, consumed.clone().into_iter(), end_pos_incl + 1),
566                _ => reattach_unused_words(words, consumed.clone().into_iter(), 0),
567            }
568        };
569        ExclusionMatch {
570            new_iterator,
571            match_: result_so_far.map(|res| MatchDetails {
572                last_word: match res.match_ {
573                    MatchDetailsIntermediateInner::OneWord => None,
574                    MatchDetailsIntermediateInner::MultipleWords(end_pos_incl) => {
575                        let last_word = consumed.into_iter().nth(end_pos_incl).expect(
576                            "Saved result only points to actual positions in the list of matches",
577                        );
578                        Some(LastWordMatched(last_word.0, last_word.1.to_string()))
579                    }
580                },
581                rule: res.rule,
582                replacement: res.replacement.clone(),
583            }),
584        }
585    }
586
587    #[builder]
588    fn match_exclusions_rec<'a, 'b>(
589        &self,
590        /// Words that have been consumed so far.
591        consumed: &mut Vec<WordIteratorItem<'b>>,
592        /// The remaining candidates that may still be viable matches. Stored
593        /// alongside their rule index.
594        mut remaining: Peekable<
595            impl Iterator<Item = (usize, Suffix<'a>, CaseSensitivity, &'a Option<String>)>,
596        >,
597        /// The remaining words to match.
598        mut words: WordIteratorExtension<'b, WordIteratorPrefix<'b>>,
599        result: &mut Option<MatchDetailsIntermediate<'a>>,
600    ) -> WordIteratorExtension<'b, WordIteratorPrefix<'b>> {
601        #[cfg(debug_assertions)]
602        trace!("Recursing through the match in Rule004. Consumed: \"{consumed:#?}\"; Current result: {result:#?}");
603
604        match words.next() {
605            None => {
606                // There are no words left in the string to match. If any of
607                // the prior matches were complete matches, then they are the
608                // longest matches. Pick an arbitary one.
609                if let Some((rule_index, _, _, repl)) =
610                    remaining.find(|(_, rem, _, _)| matches!(rem, Suffix::Finish))
611                {
612                    self.save_result()
613                        .matched(consumed)
614                        .rule_index(rule_index)
615                        .replacement(repl)
616                        .result(result)
617                        .call()
618                }
619                words
620            }
621            Some(word_item) => {
622                let mut next_iteration = None;
623                for (rule_index, suffix, case_sensitivity, repl) in remaining {
624                    match suffix {
625                        Suffix::Finish => self
626                            .save_result()
627                            .matched(consumed)
628                            .rule_index(rule_index)
629                            .result(result)
630                            .replacement(repl)
631                            .call(),
632                        Suffix::Remaining(s) => {
633                            if let Some(remainder) =
634                                trim_start((s, case_sensitivity), word_item.1.to_string())
635                            {
636                                // The match could potentially continue. Store the
637                                // candidate to run another iteration.
638                                next_iteration.get_or_insert_with(Vec::new).push((
639                                    rule_index,
640                                    Suffix::from(remainder),
641                                    case_sensitivity,
642                                    repl,
643                                ));
644                            }
645                        }
646                    }
647                }
648
649                consumed.push(word_item);
650                if let Some(next_iteration) = next_iteration {
651                    self.match_exclusions_rec()
652                        .remaining(next_iteration.into_iter().peekable())
653                        .words(words)
654                        .consumed(consumed)
655                        .result(result)
656                        .call()
657                } else {
658                    words
659                }
660            }
661        }
662    }
663
664    #[builder]
665    fn save_result<'a>(
666        &self,
667        matched: &[WordIteratorItem<'_>],
668        rule_index: usize,
669        replacement: &'a Option<String>,
670        result: &mut Option<MatchDetailsIntermediate<'a>>,
671    ) {
672        let match_ = if matched.is_empty() {
673            MatchDetailsIntermediateInner::OneWord
674        } else {
675            MatchDetailsIntermediateInner::MultipleWords(matched.len() - 1)
676        };
677
678        result.replace(MatchDetailsIntermediate {
679            match_,
680            rule: self
681                .0
682                .rules
683                .get(rule_index)
684                .expect("Rule meta added when this linter rule was set up")
685                .clone(),
686            replacement,
687        });
688    }
689}
690
691fn combine_exclusions<'a>(
692    case_sensitive: Option<&'a WordExclusionMeta>,
693    case_insensitive: Option<&'a WordExclusionMeta>,
694) -> Peekable<impl Iterator<Item = (usize, Suffix<'a>, CaseSensitivity, &'a Option<String>)>> {
695    fn remainders_iter(
696        details: &WordExclusionMeta,
697    ) -> impl Iterator<Item = (usize, Suffix, &Option<String>)> {
698        details.remainders.iter().enumerate().map(|(i, rem)| {
699            let (rule_index, replacement) = details
700                .details
701                .get(i)
702                .expect("Details added when setting up rule");
703            (*rule_index, Suffix::from(rem.as_str()), replacement)
704        })
705    }
706
707    let case_sensitive = case_sensitive
708        .map(remainders_iter)
709        .into_iter()
710        .flatten()
711        .map(|(i, rem, repl)| (i, rem, CaseSensitivity::Sensitive, repl));
712    let case_insensitive = case_insensitive
713        .map(remainders_iter)
714        .into_iter()
715        .flatten()
716        .map(|(i, rem, repl)| (i, rem, CaseSensitivity::Insensitive, repl));
717
718    case_sensitive.chain(case_insensitive).peekable()
719}
720
721fn trim_start(hay: (&str, CaseSensitivity), prefix: impl AsRef<str>) -> Option<&str> {
722    let prefix = prefix.as_ref();
723    match hay.1 {
724        CaseSensitivity::Sensitive => {
725            if hay.0.starts_with(prefix) {
726                Some(&hay.0[prefix.len()..])
727            } else {
728                None
729            }
730        }
731        CaseSensitivity::Insensitive => {
732            let hay_lower = hay.0.to_lowercase();
733            let prefix_lower = prefix.to_lowercase();
734            if hay_lower.starts_with(&prefix_lower) {
735                Some(&hay.0[prefix.len()..])
736            } else {
737                None
738            }
739        }
740    }
741}
742
743fn reattach_unused_words<'words>(
744    words: WordIteratorExtension<'words, WordIteratorPrefix<'words>>,
745    consumed: impl Iterator<Item = WordIteratorItem<'words>>,
746    num_used: usize,
747) -> WordIteratorExtension<'words, WordIteratorPrefix<'words>> {
748    #[cfg(debug_assertions)]
749    trace!("Reattaching unused words after matching");
750    words.extend_on_prefix(WordIteratorPrefix::new(consumed.skip(num_used)))
751}
752
753static FORMAT_REGEX: LazyLock<Regex> = LazyLock::new(|| {
754    Regex::new(r"[^%](?<placeholder>%s|%r)").expect("Hardcoded regex should not fail to compile")
755});
756
757fn substitute_format_string(s: String, word: RopeSlice<'_>, replacement: Option<String>) -> String {
758    if FORMAT_REGEX.captures(&s).is_none() {
759        return s;
760    }
761
762    let mut result = String::with_capacity(s.len());
763    let mut last_index = 0;
764    for capture in FORMAT_REGEX.captures_iter(&s) {
765        let placeholder = capture.name("placeholder").unwrap();
766        let range = placeholder.range();
767
768        let substitution = if placeholder.as_str().ends_with('s') {
769            word.to_string()
770        } else {
771            replacement
772                .clone()
773                .unwrap_or("<REPLACEMENT_WORD>".to_string())
774        };
775
776        result.push_str(&s[last_index..range.start]);
777        result.push_str(&substitution);
778        last_index = range.end;
779    }
780    result.push_str(&s[last_index..]);
781    result
782}
783
784#[cfg(test)]
785mod tests {
786    use crate::{
787        fix::LintCorrectionReplace,
788        location::AdjustedOffset,
789        parser::{parse, ParseResult},
790    };
791
792    use super::*;
793
794    fn setup_rule(
795        rules: Vec<(impl Into<String>, WordExclusionMetaIntermediate)>,
796    ) -> Rule004ExcludeWords {
797        let mut rule = Rule004ExcludeWords::default();
798        let mut settings = WordExclusionIndexIntermediate {
799            rule: HashMap::new(),
800        };
801
802        for (rule_description, rule_meta) in rules {
803            settings.rule.insert(rule_description.into(), rule_meta);
804        }
805
806        let mut settings =
807            RuleSettings::with_serializable::<WordExclusionIndexIntermediate>("rules", &settings);
808        rule.setup(Some(&mut settings));
809        rule
810    }
811
812    fn get_simple_ast(
813        md: impl AsRef<str>,
814    ) -> (
815        ParseResult,
816        impl Fn(&ParseResult) -> &mdast::Node,
817        impl Fn(&ParseResult) -> Context<'_>,
818    ) {
819        let parse_result = parse(md.as_ref()).unwrap();
820        (
821            parse_result,
822            |parse_result| {
823                parse_result
824                    .ast()
825                    .children()
826                    .unwrap()
827                    .first()
828                    .unwrap()
829                    .children()
830                    .unwrap()
831                    .first()
832                    .unwrap()
833            },
834            |parse_result| {
835                Context::builder()
836                    .parse_result(parse_result)
837                    .build()
838                    .unwrap()
839            },
840        )
841    }
842
843    #[test]
844    fn test_rule004_exclude_word() {
845        let rules = vec![(
846            "foo".to_string(),
847            WordExclusionMetaIntermediate {
848                description: "Don't use 'Foo'".to_string(),
849                case_sensitive: true,
850                words: vec![ExclusionDefinition::ExcludeOnly("Foo".to_string())],
851                level: LintLevel::Error,
852            },
853        )];
854        let rule = setup_rule(rules);
855
856        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo test.");
857        let result = rule.check(
858            get_ast(&parse_result),
859            &get_context(&parse_result),
860            LintLevel::Error,
861        );
862        assert!(result.is_some());
863
864        let errors = result.unwrap();
865        assert_eq!(errors.len(), 1);
866
867        let error = errors.get(0).unwrap();
868        assert_eq!(error.message, "Don't use 'Foo'");
869        assert_eq!(error.level, LintLevel::Error);
870        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
871        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(13));
872    }
873
874    #[test]
875    fn test_rule004_exclude_and_replace_word() {
876        let rules = vec![(
877            "foo".to_string(),
878            WordExclusionMetaIntermediate {
879                description: "Don't use 'Foo'".to_string(),
880                case_sensitive: true,
881                words: vec![ExclusionDefinition::WithReplace(
882                    "Foo".to_string(),
883                    "Bar".to_string(),
884                )],
885                level: LintLevel::Error,
886            },
887        )];
888        let rule = setup_rule(rules);
889
890        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo test.");
891        let result = rule.check(
892            get_ast(&parse_result),
893            &get_context(&parse_result),
894            LintLevel::Error,
895        );
896        assert!(result.is_some());
897
898        let errors = result.unwrap();
899        assert_eq!(errors.len(), 1);
900
901        let error = errors.get(0).unwrap();
902        assert_eq!(error.message, "Don't use 'Foo'");
903        assert_eq!(error.level, LintLevel::Error);
904        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
905        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(13));
906
907        assert!(error.suggestions.is_some());
908        let suggestions = error.suggestions.as_ref().unwrap();
909        assert_eq!(suggestions.len(), 1);
910        let suggestion = suggestions.get(0).unwrap();
911        assert!(matches!(
912            suggestion,
913            LintCorrection::Replace(LintCorrectionReplace { .. })
914        ));
915    }
916
917    #[test]
918    fn test_rule004_exclude_multiple_words() {
919        let rules = vec![
920            (
921                "foo",
922                WordExclusionMetaIntermediate {
923                    description: "Don't use 'Foo'".to_string(),
924                    case_sensitive: true,
925                    words: vec![ExclusionDefinition::ExcludeOnly("Foo".to_string())],
926                    level: LintLevel::Error,
927                },
928            ),
929            (
930                "bar",
931                WordExclusionMetaIntermediate {
932                    description: "Don't use 'bar'".to_string(),
933                    case_sensitive: true,
934                    words: vec![ExclusionDefinition::ExcludeOnly("bar".to_string())],
935                    level: LintLevel::Error,
936                },
937            ),
938        ];
939        let rule = setup_rule(rules);
940
941        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo test with bar.");
942        let result = rule.check(
943            get_ast(&parse_result),
944            &get_context(&parse_result),
945            LintLevel::Error,
946        );
947        assert!(result.is_some());
948
949        let errors = result.unwrap();
950        assert_eq!(errors.len(), 2);
951
952        let error = errors.get(0).unwrap();
953        assert_eq!(error.message, "Don't use 'Foo'");
954        assert_eq!(error.level, LintLevel::Error);
955        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
956        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(13));
957
958        let error = errors.get(1).unwrap();
959        assert_eq!(error.message, "Don't use 'bar'");
960        assert_eq!(error.level, LintLevel::Error);
961        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(24));
962        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(27));
963    }
964
965    #[test]
966    fn test_rule004_multiword_exclusion() {
967        let rules = vec![(
968            "foo bar".to_string(),
969            WordExclusionMetaIntermediate {
970                description: "Don't use 'Foo bar'".to_string(),
971                case_sensitive: true,
972                words: vec![ExclusionDefinition::ExcludeOnly("Foo bar".to_string())],
973                level: LintLevel::Error,
974            },
975        )];
976        let rule = setup_rule(rules);
977
978        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo bar test.");
979        let result = rule.check(
980            get_ast(&parse_result),
981            &get_context(&parse_result),
982            LintLevel::Error,
983        );
984        assert!(result.is_some());
985
986        let errors = result.unwrap();
987        assert_eq!(errors.len(), 1);
988
989        let error = errors.get(0).unwrap();
990        assert_eq!(error.message, "Don't use 'Foo bar'");
991        assert_eq!(error.level, LintLevel::Error);
992        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
993        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(17));
994    }
995
996    #[test]
997    fn test_rule004_overlapping_exclusions() {
998        let rules = vec![
999            (
1000                "Foo barbie",
1001                WordExclusionMetaIntermediate {
1002                    description: "Don't use 'Foo barbie'".to_string(),
1003                    case_sensitive: true,
1004                    words: vec![ExclusionDefinition::ExcludeOnly("Foo barbie".to_string())],
1005                    level: LintLevel::Error,
1006                },
1007            ),
1008            (
1009                "bartender",
1010                WordExclusionMetaIntermediate {
1011                    description: "Don't use 'bartender'".to_string(),
1012                    case_sensitive: true,
1013                    words: vec![ExclusionDefinition::ExcludeOnly("bartender".to_string())],
1014                    level: LintLevel::Error,
1015                },
1016            ),
1017        ];
1018        let rule = setup_rule(rules);
1019
1020        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo bartender.");
1021        let result = rule.check(
1022            get_ast(&parse_result),
1023            &get_context(&parse_result),
1024            LintLevel::Error,
1025        );
1026        assert!(result.is_some());
1027
1028        let errors = result.unwrap();
1029        assert_eq!(errors.len(), 1);
1030
1031        let error = errors.get(0).unwrap();
1032        assert_eq!(error.message, "Don't use 'bartender'");
1033        assert_eq!(error.level, LintLevel::Error);
1034        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(14));
1035        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(23));
1036    }
1037
1038    #[test]
1039    fn test_rule004_use_longest_overlapping() {
1040        let rules = vec![
1041            (
1042                "Foo bar",
1043                WordExclusionMetaIntermediate {
1044                    description: "Don't use 'Foo bar'".to_string(),
1045                    case_sensitive: true,
1046                    words: vec![ExclusionDefinition::ExcludeOnly("Foo bar".to_string())],
1047                    level: LintLevel::Error,
1048                },
1049            ),
1050            (
1051                "Foo bartender",
1052                WordExclusionMetaIntermediate {
1053                    description: "Don't use 'Foo bartender'".to_string(),
1054                    case_sensitive: true,
1055                    words: vec![ExclusionDefinition::ExcludeOnly(
1056                        "Foo bartender".to_string(),
1057                    )],
1058                    level: LintLevel::Error,
1059                },
1060            ),
1061        ];
1062        let rule = setup_rule(rules);
1063
1064        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo bartender.");
1065        let result = rule.check(
1066            get_ast(&parse_result),
1067            &get_context(&parse_result),
1068            LintLevel::Error,
1069        );
1070        assert!(result.is_some());
1071
1072        let errors = result.unwrap();
1073        assert_eq!(errors.len(), 1);
1074
1075        let error = errors.get(0).unwrap();
1076        assert_eq!(error.message, "Don't use 'Foo bartender'");
1077        assert_eq!(error.level, LintLevel::Error);
1078        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
1079        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(23));
1080    }
1081
1082    #[test]
1083    fn test_rule004_no_exclusions() {
1084        let rules = Vec::<(String, _)>::new();
1085        let rule = setup_rule(rules);
1086
1087        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo bar test.");
1088        let result = rule.check(
1089            get_ast(&parse_result),
1090            &get_context(&parse_result),
1091            LintLevel::Error,
1092        );
1093        assert!(result.is_none());
1094    }
1095
1096    #[test]
1097    fn test_rule004_recover_false_longer_overlap() {
1098        let rules = vec![
1099            (
1100                "Foo bartender",
1101                WordExclusionMetaIntermediate {
1102                    description: "Don't use 'Foo bartender'".to_string(),
1103                    case_sensitive: true,
1104                    words: vec![ExclusionDefinition::ExcludeOnly(
1105                        "Foo bartender".to_string(),
1106                    )],
1107                    level: LintLevel::Error,
1108                },
1109            ),
1110            (
1111                "Foo bartender blah whaaaat",
1112                WordExclusionMetaIntermediate {
1113                    description: "Don't use 'Foo bartender blah whaaaat'".to_string(),
1114                    case_sensitive: true,
1115                    words: vec![ExclusionDefinition::ExcludeOnly(
1116                        "Foo bartender blah whaaaat".to_string(),
1117                    )],
1118                    level: LintLevel::Error,
1119                },
1120            ),
1121        ];
1122        let rule = setup_rule(rules);
1123
1124        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo bartender blah.");
1125        let result = rule.check(
1126            get_ast(&parse_result),
1127            &get_context(&parse_result),
1128            LintLevel::Error,
1129        );
1130        assert!(result.is_some());
1131
1132        let errors = result.unwrap();
1133        assert_eq!(errors.len(), 1);
1134
1135        let error = errors.get(0).unwrap();
1136        assert_eq!(error.message, "Don't use 'Foo bartender'");
1137        assert_eq!(error.level, LintLevel::Error);
1138        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
1139        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(23));
1140    }
1141
1142    #[test]
1143    fn test_rule004_no_matching_exclusions() {
1144        let rules = vec![(
1145            "Foo",
1146            WordExclusionMetaIntermediate {
1147                description: "Don't use 'Foo'".to_string(),
1148                case_sensitive: true,
1149                words: vec![ExclusionDefinition::ExcludeOnly("Foo".to_string())],
1150                level: LintLevel::Error,
1151            },
1152        )];
1153        let rule = setup_rule(rules);
1154
1155        let (parse_result, get_ast, get_context) = get_simple_ast("This is a passing test.");
1156        let result = rule.check(
1157            get_ast(&parse_result),
1158            &get_context(&parse_result),
1159            LintLevel::Error,
1160        );
1161        assert!(result.is_none());
1162    }
1163
1164    #[test]
1165    fn test_rule004_case_insensitive() {
1166        let rules = vec![(
1167            "foo",
1168            WordExclusionMetaIntermediate {
1169                description: "Don't use 'foo'".to_string(),
1170                case_sensitive: false,
1171                words: vec![ExclusionDefinition::ExcludeOnly("foo".to_string())],
1172                level: LintLevel::Error,
1173            },
1174        )];
1175        let rule = setup_rule(rules);
1176
1177        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo test.");
1178        let result = rule.check(
1179            get_ast(&parse_result),
1180            &get_context(&parse_result),
1181            LintLevel::Error,
1182        );
1183        assert!(result.is_some());
1184
1185        let errors = result.unwrap();
1186        assert_eq!(errors.len(), 1);
1187
1188        let error = errors.get(0).unwrap();
1189        assert_eq!(error.message, "Don't use 'foo'");
1190        assert_eq!(error.level, LintLevel::Error);
1191        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
1192        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(13));
1193    }
1194
1195    #[test]
1196    fn test_rule004_lint_level() {
1197        let rules = vec![(
1198            "foo",
1199            WordExclusionMetaIntermediate {
1200                description: "Don't use 'foo'".to_string(),
1201                case_sensitive: false,
1202                words: vec![ExclusionDefinition::ExcludeOnly("foo".to_string())],
1203                level: LintLevel::Warning,
1204            },
1205        )];
1206        let rule = setup_rule(rules);
1207
1208        let (parse_result, get_ast, get_context) = get_simple_ast("This is a Foo test.");
1209        let result = rule.check(
1210            get_ast(&parse_result),
1211            &get_context(&parse_result),
1212            LintLevel::Error,
1213        );
1214        assert!(result.is_some());
1215
1216        let errors = result.unwrap();
1217        assert_eq!(errors.len(), 1);
1218
1219        let error = errors.get(0).unwrap();
1220        assert_eq!(error.message, "Don't use 'foo'");
1221        assert_eq!(error.level, LintLevel::Warning);
1222        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(10));
1223        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(13));
1224    }
1225
1226    #[test]
1227    fn test_rule_004_exclusion_with_apostrophe() {
1228        let rules = vec![(
1229            "blah",
1230            WordExclusionMetaIntermediate {
1231                description: "blah blah blah".to_string(),
1232                case_sensitive: false,
1233                words: vec![ExclusionDefinition::ExcludeOnly("that's it".to_string())],
1234                level: LintLevel::Error,
1235            },
1236        )];
1237        let rule = setup_rule(rules);
1238
1239        let (parse_result, get_ast, get_context) = get_simple_ast("That's it, Bob's your uncle.");
1240        let result = rule.check(
1241            get_ast(&parse_result),
1242            &get_context(&parse_result),
1243            LintLevel::Error,
1244        );
1245        assert!(result.is_some());
1246
1247        let errors = result.unwrap();
1248        assert_eq!(errors.len(), 1);
1249
1250        let error = errors.get(0).unwrap();
1251        assert_eq!(error.message, "blah blah blah");
1252        assert_eq!(error.level, LintLevel::Error);
1253        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(0));
1254        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(9));
1255    }
1256
1257    #[test]
1258    fn test_rule_004_exclusion_with_other_punctuation() {
1259        let rules = vec![(
1260            "blah blah",
1261            WordExclusionMetaIntermediate {
1262                description: "This isn't Reddit.".to_string(),
1263                case_sensitive: false,
1264                words: vec![ExclusionDefinition::ExcludeOnly("tl;dr".to_string())],
1265                level: LintLevel::Error,
1266            },
1267        )];
1268        let rule = setup_rule(rules);
1269
1270        let (parse_result, get_ast, get_context) = get_simple_ast("tl;dr: Just do the thing.");
1271        let result = rule.check(
1272            get_ast(&parse_result),
1273            &get_context(&parse_result),
1274            LintLevel::Error,
1275        );
1276        assert!(result.is_some());
1277
1278        let errors = result.unwrap();
1279        assert_eq!(errors.len(), 1);
1280
1281        let error = errors.get(0).unwrap();
1282        assert_eq!(error.message, "This isn't Reddit.");
1283        assert_eq!(error.level, LintLevel::Error);
1284        assert_eq!(error.location.offset_range.start, AdjustedOffset::from(0));
1285        assert_eq!(error.location.offset_range.end, AdjustedOffset::from(5));
1286    }
1287
1288    #[test]
1289    fn test_rule_004_formatted_message() {
1290        let rules = vec![(
1291            "something",
1292            WordExclusionMetaIntermediate {
1293                description: "Don't use %s".to_string(),
1294                case_sensitive: false,
1295                words: vec![ExclusionDefinition::ExcludeOnly("ladeeda".to_string())],
1296                level: LintLevel::Error,
1297            },
1298        )];
1299        let rule = setup_rule(rules);
1300
1301        let (parse_result, get_ast, get_context) = get_simple_ast("Well, ladeeda.");
1302        let result = rule.check(
1303            get_ast(&parse_result),
1304            &get_context(&parse_result),
1305            LintLevel::Error,
1306        );
1307        assert!(result.is_some());
1308
1309        let errors = result.unwrap();
1310        assert_eq!(errors.len(), 1);
1311
1312        let error = errors.get(0).unwrap();
1313        assert_eq!(error.message, "Don't use ladeeda");
1314    }
1315
1316    #[test]
1317    fn test_rule_004_formatted_message_with_escape() {
1318        let rules = vec![(
1319            "something",
1320            WordExclusionMetaIntermediate {
1321                description: "Don't use %%s".to_string(),
1322                case_sensitive: false,
1323                words: vec![ExclusionDefinition::ExcludeOnly("ladeeda".to_string())],
1324                level: LintLevel::Error,
1325            },
1326        )];
1327        let rule = setup_rule(rules);
1328
1329        let (parse_result, get_ast, get_context) = get_simple_ast("Well, ladeeda.");
1330        let result = rule.check(
1331            get_ast(&parse_result),
1332            &get_context(&parse_result),
1333            LintLevel::Error,
1334        );
1335        assert!(result.is_some());
1336
1337        let errors = result.unwrap();
1338        assert_eq!(errors.len(), 1);
1339
1340        let error = errors.get(0).unwrap();
1341        assert_eq!(error.message, "Don't use %%s");
1342    }
1343
1344    #[test]
1345    fn test_rule_004_formatted_message_with_replacement() {
1346        let rules = vec![(
1347            "something",
1348            WordExclusionMetaIntermediate {
1349                description: "Use %r instead of %s".to_string(),
1350                case_sensitive: false,
1351                words: vec![ExclusionDefinition::WithReplace(
1352                    "PostgreSQL".to_string(),
1353                    "Postgres".to_string(),
1354                )],
1355                level: LintLevel::Error,
1356            },
1357        )];
1358        let rule = setup_rule(rules);
1359
1360        let (parse_result, get_ast, get_context) = get_simple_ast("PostgreSQL is awesome!");
1361        let result = rule.check(
1362            get_ast(&parse_result),
1363            &get_context(&parse_result),
1364            LintLevel::Error,
1365        );
1366        assert!(result.is_some());
1367
1368        let errors = result.unwrap();
1369        assert_eq!(errors.len(), 1);
1370
1371        let error = errors.get(0).unwrap();
1372        assert_eq!(error.message, "Use Postgres instead of PostgreSQL");
1373    }
1374
1375    #[test]
1376    fn test_rule_004_delete_at_beginning() {
1377        let rules = vec![(
1378            "yeah",
1379            WordExclusionMetaIntermediate {
1380                description: "Don't use yeah".to_string(),
1381                case_sensitive: false,
1382                words: vec![ExclusionDefinition::ExcludeOnly("Yeah".to_string())],
1383                level: LintLevel::Error,
1384            },
1385        )];
1386        let rule = setup_rule(rules);
1387
1388        let (parse_result, get_ast, get_context) = get_simple_ast("Yeah this is awesome!");
1389        let result = rule.check(
1390            get_ast(&parse_result),
1391            &get_context(&parse_result),
1392            LintLevel::Error,
1393        );
1394        assert!(result.is_some());
1395
1396        let errors = result.unwrap();
1397        assert_eq!(errors.len(), 1);
1398
1399        let error = errors.get(0).unwrap();
1400        let suggestion = error.suggestions.as_ref().unwrap().get(0).unwrap();
1401        match suggestion {
1402            LintCorrection::Replace(replace) => {
1403                assert_eq!(replace.location.offset_range.start, AdjustedOffset::from(0));
1404                assert_eq!(replace.text(), "T".to_string());
1405            }
1406            other => panic!("Should have been a replacement, got: {other:#?}"),
1407        }
1408    }
1409}