blob: 500fc80d23e107651d8075603a343e4c13629a64
1 | #include <linux/module.h> |
2 | #include <linux/glob.h> |
3 | |
4 | /* |
5 | * The only reason this code can be compiled as a module is because the |
6 | * ATA code that depends on it can be as well. In practice, they're |
7 | * both usually compiled in and the module overhead goes away. |
8 | */ |
9 | MODULE_DESCRIPTION("glob(7) matching"); |
10 | MODULE_LICENSE("Dual MIT/GPL"); |
11 | |
12 | /** |
13 | * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0) |
14 | * @pat: Shell-style pattern to match, e.g. "*.[ch]". |
15 | * @str: String to match. The pattern must match the entire string. |
16 | * |
17 | * Perform shell-style glob matching, returning true (1) if the match |
18 | * succeeds, or false (0) if it fails. Equivalent to !fnmatch(@pat, @str, 0). |
19 | * |
20 | * Pattern metacharacters are ?, *, [ and \. |
21 | * (And, inside character classes, !, - and ].) |
22 | * |
23 | * This is small and simple implementation intended for device blacklists |
24 | * where a string is matched against a number of patterns. Thus, it |
25 | * does not preprocess the patterns. It is non-recursive, and run-time |
26 | * is at most quadratic: strlen(@str)*strlen(@pat). |
27 | * |
28 | * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa"); |
29 | * it takes 6 passes over the pattern before matching the string. |
30 | * |
31 | * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT |
32 | * treat / or leading . specially; it isn't actually used for pathnames. |
33 | * |
34 | * Note that according to glob(7) (and unlike bash), character classes |
35 | * are complemented by a leading !; this does not support the regex-style |
36 | * [^a-z] syntax. |
37 | * |
38 | * An opening bracket without a matching close is matched literally. |
39 | */ |
40 | bool __pure glob_match(char const *pat, char const *str) |
41 | { |
42 | /* |
43 | * Backtrack to previous * on mismatch and retry starting one |
44 | * character later in the string. Because * matches all characters |
45 | * (no exception for /), it can be easily proved that there's |
46 | * never a need to backtrack multiple levels. |
47 | */ |
48 | char const *back_pat = NULL, *back_str = back_str; |
49 | |
50 | /* |
51 | * Loop over each token (character or class) in pat, matching |
52 | * it against the remaining unmatched tail of str. Return false |
53 | * on mismatch, or true after matching the trailing nul bytes. |
54 | */ |
55 | for (;;) { |
56 | unsigned char c = *str++; |
57 | unsigned char d = *pat++; |
58 | |
59 | switch (d) { |
60 | case '?': /* Wildcard: anything but nul */ |
61 | if (c == '\0') |
62 | return false; |
63 | break; |
64 | case '*': /* Any-length wildcard */ |
65 | if (*pat == '\0') /* Optimize trailing * case */ |
66 | return true; |
67 | back_pat = pat; |
68 | back_str = --str; /* Allow zero-length match */ |
69 | break; |
70 | case '[': { /* Character class */ |
71 | bool match = false, inverted = (*pat == '!'); |
72 | char const *class = pat + inverted; |
73 | unsigned char a = *class++; |
74 | |
75 | /* |
76 | * Iterate over each span in the character class. |
77 | * A span is either a single character a, or a |
78 | * range a-b. The first span may begin with ']'. |
79 | */ |
80 | do { |
81 | unsigned char b = a; |
82 | |
83 | if (a == '\0') /* Malformed */ |
84 | goto literal; |
85 | |
86 | if (class[0] == '-' && class[1] != ']') { |
87 | b = class[1]; |
88 | |
89 | if (b == '\0') |
90 | goto literal; |
91 | |
92 | class += 2; |
93 | /* Any special action if a > b? */ |
94 | } |
95 | match |= (a <= c && c <= b); |
96 | } while ((a = *class++) != ']'); |
97 | |
98 | if (match == inverted) |
99 | goto backtrack; |
100 | pat = class; |
101 | } |
102 | break; |
103 | case '\\': |
104 | d = *pat++; |
105 | /*FALLTHROUGH*/ |
106 | default: /* Literal character */ |
107 | literal: |
108 | if (c == d) { |
109 | if (d == '\0') |
110 | return true; |
111 | break; |
112 | } |
113 | backtrack: |
114 | if (c == '\0' || !back_pat) |
115 | return false; /* No point continuing */ |
116 | /* Try again from last *, one character later in str. */ |
117 | pat = back_pat; |
118 | str = ++back_str; |
119 | break; |
120 | } |
121 | } |
122 | } |
123 | EXPORT_SYMBOL(glob_match); |
124 | |
125 | |
126 | #ifdef CONFIG_GLOB_SELFTEST |
127 | |
128 | #include <linux/printk.h> |
129 | #include <linux/moduleparam.h> |
130 | |
131 | /* Boot with "glob.verbose=1" to show successful tests, too */ |
132 | static bool verbose = false; |
133 | module_param(verbose, bool, 0); |
134 | |
135 | struct glob_test { |
136 | char const *pat, *str; |
137 | bool expected; |
138 | }; |
139 | |
140 | static bool __pure __init test(char const *pat, char const *str, bool expected) |
141 | { |
142 | bool match = glob_match(pat, str); |
143 | bool success = match == expected; |
144 | |
145 | /* Can't get string literals into a particular section, so... */ |
146 | static char const msg_error[] __initconst = |
147 | KERN_ERR "glob: \"%s\" vs. \"%s\": %s *** ERROR ***\n"; |
148 | static char const msg_ok[] __initconst = |
149 | KERN_DEBUG "glob: \"%s\" vs. \"%s\": %s OK\n"; |
150 | static char const mismatch[] __initconst = "mismatch"; |
151 | char const *message; |
152 | |
153 | if (!success) |
154 | message = msg_error; |
155 | else if (verbose) |
156 | message = msg_ok; |
157 | else |
158 | return success; |
159 | |
160 | printk(message, pat, str, mismatch + 3*match); |
161 | return success; |
162 | } |
163 | |
164 | /* |
165 | * The tests are all jammed together in one array to make it simpler |
166 | * to place that array in the .init.rodata section. The obvious |
167 | * "array of structures containing char *" has no way to force the |
168 | * pointed-to strings to be in a particular section. |
169 | * |
170 | * Anyway, a test consists of: |
171 | * 1. Expected glob_match result: '1' or '0'. |
172 | * 2. Pattern to match: null-terminated string |
173 | * 3. String to match against: null-terminated string |
174 | * |
175 | * The list of tests is terminated with a final '\0' instead of |
176 | * a glob_match result character. |
177 | */ |
178 | static char const glob_tests[] __initconst = |
179 | /* Some basic tests */ |
180 | "1" "a\0" "a\0" |
181 | "0" "a\0" "b\0" |
182 | "0" "a\0" "aa\0" |
183 | "0" "a\0" "\0" |
184 | "1" "\0" "\0" |
185 | "0" "\0" "a\0" |
186 | /* Simple character class tests */ |
187 | "1" "[a]\0" "a\0" |
188 | "0" "[a]\0" "b\0" |
189 | "0" "[!a]\0" "a\0" |
190 | "1" "[!a]\0" "b\0" |
191 | "1" "[ab]\0" "a\0" |
192 | "1" "[ab]\0" "b\0" |
193 | "0" "[ab]\0" "c\0" |
194 | "1" "[!ab]\0" "c\0" |
195 | "1" "[a-c]\0" "b\0" |
196 | "0" "[a-c]\0" "d\0" |
197 | /* Corner cases in character class parsing */ |
198 | "1" "[a-c-e-g]\0" "-\0" |
199 | "0" "[a-c-e-g]\0" "d\0" |
200 | "1" "[a-c-e-g]\0" "f\0" |
201 | "1" "[]a-ceg-ik[]\0" "a\0" |
202 | "1" "[]a-ceg-ik[]\0" "]\0" |
203 | "1" "[]a-ceg-ik[]\0" "[\0" |
204 | "1" "[]a-ceg-ik[]\0" "h\0" |
205 | "0" "[]a-ceg-ik[]\0" "f\0" |
206 | "0" "[!]a-ceg-ik[]\0" "h\0" |
207 | "0" "[!]a-ceg-ik[]\0" "]\0" |
208 | "1" "[!]a-ceg-ik[]\0" "f\0" |
209 | /* Simple wild cards */ |
210 | "1" "?\0" "a\0" |
211 | "0" "?\0" "aa\0" |
212 | "0" "??\0" "a\0" |
213 | "1" "?x?\0" "axb\0" |
214 | "0" "?x?\0" "abx\0" |
215 | "0" "?x?\0" "xab\0" |
216 | /* Asterisk wild cards (backtracking) */ |
217 | "0" "*??\0" "a\0" |
218 | "1" "*??\0" "ab\0" |
219 | "1" "*??\0" "abc\0" |
220 | "1" "*??\0" "abcd\0" |
221 | "0" "??*\0" "a\0" |
222 | "1" "??*\0" "ab\0" |
223 | "1" "??*\0" "abc\0" |
224 | "1" "??*\0" "abcd\0" |
225 | "0" "?*?\0" "a\0" |
226 | "1" "?*?\0" "ab\0" |
227 | "1" "?*?\0" "abc\0" |
228 | "1" "?*?\0" "abcd\0" |
229 | "1" "*b\0" "b\0" |
230 | "1" "*b\0" "ab\0" |
231 | "0" "*b\0" "ba\0" |
232 | "1" "*b\0" "bb\0" |
233 | "1" "*b\0" "abb\0" |
234 | "1" "*b\0" "bab\0" |
235 | "1" "*bc\0" "abbc\0" |
236 | "1" "*bc\0" "bc\0" |
237 | "1" "*bc\0" "bbc\0" |
238 | "1" "*bc\0" "bcbc\0" |
239 | /* Multiple asterisks (complex backtracking) */ |
240 | "1" "*ac*\0" "abacadaeafag\0" |
241 | "1" "*ac*ae*ag*\0" "abacadaeafag\0" |
242 | "1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0" |
243 | "0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0" |
244 | "1" "*abcd*\0" "abcabcabcabcdefg\0" |
245 | "1" "*ab*cd*\0" "abcabcabcabcdefg\0" |
246 | "1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0" |
247 | "0" "*abcd*\0" "abcabcabcabcefg\0" |
248 | "0" "*ab*cd*\0" "abcabcabcabcefg\0"; |
249 | |
250 | static int __init glob_init(void) |
251 | { |
252 | unsigned successes = 0; |
253 | unsigned n = 0; |
254 | char const *p = glob_tests; |
255 | static char const message[] __initconst = |
256 | KERN_INFO "glob: %u self-tests passed, %u failed\n"; |
257 | |
258 | /* |
259 | * Tests are jammed together in a string. The first byte is '1' |
260 | * or '0' to indicate the expected outcome, or '\0' to indicate the |
261 | * end of the tests. Then come two null-terminated strings: the |
262 | * pattern and the string to match it against. |
263 | */ |
264 | while (*p) { |
265 | bool expected = *p++ & 1; |
266 | char const *pat = p; |
267 | |
268 | p += strlen(p) + 1; |
269 | successes += test(pat, p, expected); |
270 | p += strlen(p) + 1; |
271 | n++; |
272 | } |
273 | |
274 | n -= successes; |
275 | printk(message, successes, n); |
276 | |
277 | /* What's the errno for "kernel bug detected"? Guess... */ |
278 | return n ? -ECANCELED : 0; |
279 | } |
280 | |
281 | /* We need a dummy exit function to allow unload */ |
282 | static void __exit glob_fini(void) { } |
283 | |
284 | module_init(glob_init); |
285 | module_exit(glob_fini); |
286 | |
287 | #endif /* CONFIG_GLOB_SELFTEST */ |
288 |