src/pqc_lint/patterns/base.py
2.4 KB · 75 lines · python Raw
1 """Base pattern matcher interface."""
2
3 from __future__ import annotations
4
5 import re
6 from abc import ABC
7 from dataclasses import dataclass
8 from typing import Iterable, Pattern
9
10 from pqc_lint.findings import Finding
11 from pqc_lint.rules import Rule
12
13
14 @dataclass(frozen=True)
15 class PatternSpec:
16 """A regex-based detection pattern bound to a rule."""
17 rule_id: str
18 regex: Pattern[str]
19 description: str
20
21
22 class PatternMatcher(ABC):
23 """Base class for per-language pattern matchers."""
24
25 language: str = ""
26 file_extensions: tuple[str, ...] = ()
27 patterns: tuple[PatternSpec, ...] = ()
28
29 def matches_file(self, path: str) -> bool:
30 p = path.lower()
31 return any(p.endswith(ext) for ext in self.file_extensions)
32
33 def scan(self, file_path: str, content: str, rules: dict[str, Rule]) -> Iterable[Finding]:
34 """Yield Findings for every pattern hit in `content`."""
35 lines = content.split("\n")
36 for spec in self.patterns:
37 if spec.rule_id not in rules:
38 continue
39 rule = rules[spec.rule_id]
40 for m in spec.regex.finditer(content):
41 start = m.start()
42 # Compute 1-based line and column
43 prefix = content[:start]
44 line_no = prefix.count("\n") + 1
45 last_newline = prefix.rfind("\n")
46 col_no = start - last_newline if last_newline >= 0 else start + 1
47
48 snippet_line = lines[line_no - 1] if line_no - 1 < len(lines) else ""
49 snippet = snippet_line.strip()[:200]
50
51 yield Finding(
52 rule_id=rule.id,
53 severity=rule.severity,
54 message=f"{rule.title}: {rule.message}",
55 file=file_path,
56 line=line_no,
57 column=col_no,
58 snippet=snippet,
59 suggestion=rule.suggestion,
60 cwe=rule.cwe,
61 language=self.language,
62 )
63
64
65 def compile_patterns(specs: list[tuple[str, str]]) -> tuple[PatternSpec, ...]:
66 """Helper: compile a list of (rule_id, regex_source) into PatternSpec tuples."""
67 out: list[PatternSpec] = []
68 for rule_id, pattern in specs:
69 out.append(PatternSpec(
70 rule_id=rule_id,
71 regex=re.compile(pattern, re.MULTILINE),
72 description=f"rule {rule_id}",
73 ))
74 return tuple(out)
75