blob: 128775b8b1239468505d609bbfea89fda5f0f776
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * Mini expr implementation for busybox |
4 | * |
5 | * based on GNU expr Mike Parker. |
6 | * Copyright (C) 86, 1991-1997, 1999 Free Software Foundation, Inc. |
7 | * |
8 | * Busybox modifications |
9 | * Copyright (c) 2000 Edward Betts <edward@debian.org>. |
10 | * Copyright (C) 2003-2005 Vladimir Oleynik <dzo@simtreas.ru> |
11 | * - reduced 464 bytes. |
12 | * - 64 math support |
13 | * |
14 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
15 | */ |
16 | |
17 | /* This program evaluates expressions. Each token (operator, operand, |
18 | * parenthesis) of the expression must be a separate argument. The |
19 | * parser used is a reasonably general one, though any incarnation of |
20 | * it is language-specific. It is especially nice for expressions. |
21 | * |
22 | * No parse tree is needed; a new node is evaluated immediately. |
23 | * One function can handle multiple operators all of equal precedence, |
24 | * provided they all associate ((x op x) op x). */ |
25 | |
26 | /* no getopt needed */ |
27 | |
28 | //usage:#define expr_trivial_usage |
29 | //usage: "EXPRESSION" |
30 | //usage:#define expr_full_usage "\n\n" |
31 | //usage: "Print the value of EXPRESSION to stdout\n" |
32 | //usage: "\n" |
33 | //usage: "EXPRESSION may be:\n" |
34 | //usage: " ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n" |
35 | //usage: " ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n" |
36 | //usage: " ARG1 < ARG2 1 if ARG1 is less than ARG2, else 0. Similarly:\n" |
37 | //usage: " ARG1 <= ARG2\n" |
38 | //usage: " ARG1 = ARG2\n" |
39 | //usage: " ARG1 != ARG2\n" |
40 | //usage: " ARG1 >= ARG2\n" |
41 | //usage: " ARG1 > ARG2\n" |
42 | //usage: " ARG1 + ARG2 Sum of ARG1 and ARG2. Similarly:\n" |
43 | //usage: " ARG1 - ARG2\n" |
44 | //usage: " ARG1 * ARG2\n" |
45 | //usage: " ARG1 / ARG2\n" |
46 | //usage: " ARG1 % ARG2\n" |
47 | //usage: " STRING : REGEXP Anchored pattern match of REGEXP in STRING\n" |
48 | //usage: " match STRING REGEXP Same as STRING : REGEXP\n" |
49 | //usage: " substr STRING POS LENGTH Substring of STRING, POS counted from 1\n" |
50 | //usage: " index STRING CHARS Index in STRING where any CHARS is found, or 0\n" |
51 | //usage: " length STRING Length of STRING\n" |
52 | //usage: " quote TOKEN Interpret TOKEN as a string, even if\n" |
53 | //usage: " it is a keyword like 'match' or an\n" |
54 | //usage: " operator like '/'\n" |
55 | //usage: " (EXPRESSION) Value of EXPRESSION\n" |
56 | //usage: "\n" |
57 | //usage: "Beware that many operators need to be escaped or quoted for shells.\n" |
58 | //usage: "Comparisons are arithmetic if both ARGs are numbers, else\n" |
59 | //usage: "lexicographical. Pattern matches return the string matched between\n" |
60 | //usage: "\\( and \\) or null; if \\( and \\) are not used, they return the number\n" |
61 | //usage: "of characters matched or 0." |
62 | |
63 | #include "libbb.h" |
64 | #include "xregex.h" |
65 | |
66 | #if ENABLE_EXPR_MATH_SUPPORT_64 |
67 | typedef int64_t arith_t; |
68 | |
69 | #define PF_REZ "ll" |
70 | #define PF_REZ_TYPE (long long) |
71 | #define STRTOL(s, e, b) strtoll(s, e, b) |
72 | #else |
73 | typedef long arith_t; |
74 | |
75 | #define PF_REZ "l" |
76 | #define PF_REZ_TYPE (long) |
77 | #define STRTOL(s, e, b) strtol(s, e, b) |
78 | #endif |
79 | |
80 | /* TODO: use bb_strtol[l]? It's easier to check for errors... */ |
81 | |
82 | /* The kinds of value we can have. */ |
83 | enum { |
84 | INTEGER, |
85 | STRING |
86 | }; |
87 | |
88 | /* A value is.... */ |
89 | struct valinfo { |
90 | smallint type; /* Which kind. */ |
91 | union { /* The value itself. */ |
92 | arith_t i; |
93 | char *s; |
94 | } u; |
95 | }; |
96 | typedef struct valinfo VALUE; |
97 | |
98 | /* The arguments given to the program, minus the program name. */ |
99 | struct globals { |
100 | char **args; |
101 | } FIX_ALIASING; |
102 | #define G (*(struct globals*)&bb_common_bufsiz1) |
103 | #define INIT_G() do { } while (0) |
104 | |
105 | /* forward declarations */ |
106 | static VALUE *eval(void); |
107 | |
108 | |
109 | /* Return a VALUE for I. */ |
110 | |
111 | static VALUE *int_value(arith_t i) |
112 | { |
113 | VALUE *v; |
114 | |
115 | v = xzalloc(sizeof(VALUE)); |
116 | if (INTEGER) /* otherwise xzaaloc did it already */ |
117 | v->type = INTEGER; |
118 | v->u.i = i; |
119 | return v; |
120 | } |
121 | |
122 | /* Return a VALUE for S. */ |
123 | |
124 | static VALUE *str_value(const char *s) |
125 | { |
126 | VALUE *v; |
127 | |
128 | v = xzalloc(sizeof(VALUE)); |
129 | if (STRING) /* otherwise xzaaloc did it already */ |
130 | v->type = STRING; |
131 | v->u.s = xstrdup(s); |
132 | return v; |
133 | } |
134 | |
135 | /* Free VALUE V, including structure components. */ |
136 | |
137 | static void freev(VALUE *v) |
138 | { |
139 | if (v->type == STRING) |
140 | free(v->u.s); |
141 | free(v); |
142 | } |
143 | |
144 | /* Return nonzero if V is a null-string or zero-number. */ |
145 | |
146 | static int null(VALUE *v) |
147 | { |
148 | if (v->type == INTEGER) |
149 | return v->u.i == 0; |
150 | /* STRING: */ |
151 | return v->u.s[0] == '\0' || LONE_CHAR(v->u.s, '0'); |
152 | } |
153 | |
154 | /* Coerce V to a STRING value (can't fail). */ |
155 | |
156 | static void tostring(VALUE *v) |
157 | { |
158 | if (v->type == INTEGER) { |
159 | v->u.s = xasprintf("%" PF_REZ "d", PF_REZ_TYPE v->u.i); |
160 | v->type = STRING; |
161 | } |
162 | } |
163 | |
164 | /* Coerce V to an INTEGER value. Return 1 on success, 0 on failure. */ |
165 | |
166 | static bool toarith(VALUE *v) |
167 | { |
168 | if (v->type == STRING) { |
169 | arith_t i; |
170 | char *e; |
171 | |
172 | /* Don't interpret the empty string as an integer. */ |
173 | /* Currently does not worry about overflow or int/long differences. */ |
174 | i = STRTOL(v->u.s, &e, 10); |
175 | if ((v->u.s == e) || *e) |
176 | return 0; |
177 | free(v->u.s); |
178 | v->u.i = i; |
179 | v->type = INTEGER; |
180 | } |
181 | return 1; |
182 | } |
183 | |
184 | /* Return str[0]+str[1] if the next token matches STR exactly. |
185 | STR must not be NULL. */ |
186 | |
187 | static int nextarg(const char *str) |
188 | { |
189 | if (*G.args == NULL || strcmp(*G.args, str) != 0) |
190 | return 0; |
191 | return (unsigned char)str[0] + (unsigned char)str[1]; |
192 | } |
193 | |
194 | /* The comparison operator handling functions. */ |
195 | |
196 | static int cmp_common(VALUE *l, VALUE *r, int op) |
197 | { |
198 | arith_t ll, rr; |
199 | |
200 | ll = l->u.i; |
201 | rr = r->u.i; |
202 | if (l->type == STRING || r->type == STRING) { |
203 | tostring(l); |
204 | tostring(r); |
205 | ll = strcmp(l->u.s, r->u.s); |
206 | rr = 0; |
207 | } |
208 | /* calculating ll - rr and checking the result is prone to overflows. |
209 | * We'll do it differently: */ |
210 | if (op == '<') |
211 | return ll < rr; |
212 | if (op == ('<' + '=')) |
213 | return ll <= rr; |
214 | if (op == '=' || (op == '=' + '=')) |
215 | return ll == rr; |
216 | if (op == '!' + '=') |
217 | return ll != rr; |
218 | if (op == '>') |
219 | return ll > rr; |
220 | /* >= */ |
221 | return ll >= rr; |
222 | } |
223 | |
224 | /* The arithmetic operator handling functions. */ |
225 | |
226 | static arith_t arithmetic_common(VALUE *l, VALUE *r, int op) |
227 | { |
228 | arith_t li, ri; |
229 | |
230 | if (!toarith(l) || !toarith(r)) |
231 | bb_error_msg_and_die("non-numeric argument"); |
232 | li = l->u.i; |
233 | ri = r->u.i; |
234 | if (op == '+') |
235 | return li + ri; |
236 | if (op == '-') |
237 | return li - ri; |
238 | if (op == '*') |
239 | return li * ri; |
240 | if (ri == 0) |
241 | bb_error_msg_and_die("division by zero"); |
242 | if (op == '/') |
243 | return li / ri; |
244 | return li % ri; |
245 | } |
246 | |
247 | /* Do the : operator. |
248 | SV is the VALUE for the lhs (the string), |
249 | PV is the VALUE for the rhs (the pattern). */ |
250 | |
251 | static VALUE *docolon(VALUE *sv, VALUE *pv) |
252 | { |
253 | enum { NMATCH = 2 }; |
254 | VALUE *v; |
255 | regex_t re_buffer; |
256 | regmatch_t re_regs[NMATCH]; |
257 | |
258 | tostring(sv); |
259 | tostring(pv); |
260 | |
261 | if (pv->u.s[0] == '^') { |
262 | bb_error_msg( |
263 | "warning: '%s': using '^' as the first character\n" |
264 | "of a basic regular expression is not portable; it is ignored", pv->u.s); |
265 | } |
266 | |
267 | memset(&re_buffer, 0, sizeof(re_buffer)); |
268 | memset(re_regs, 0, sizeof(re_regs)); |
269 | xregcomp(&re_buffer, pv->u.s, 0); |
270 | |
271 | /* expr uses an anchored pattern match, so check that there was a |
272 | * match and that the match starts at offset 0. */ |
273 | if (regexec(&re_buffer, sv->u.s, NMATCH, re_regs, 0) != REG_NOMATCH |
274 | && re_regs[0].rm_so == 0 |
275 | ) { |
276 | /* Were \(...\) used? */ |
277 | if (re_buffer.re_nsub > 0 && re_regs[1].rm_so >= 0) { |
278 | sv->u.s[re_regs[1].rm_eo] = '\0'; |
279 | v = str_value(sv->u.s + re_regs[1].rm_so); |
280 | } else { |
281 | v = int_value(re_regs[0].rm_eo); |
282 | } |
283 | } else { |
284 | /* Match failed -- return the right kind of null. */ |
285 | if (re_buffer.re_nsub > 0) |
286 | v = str_value(""); |
287 | else |
288 | v = int_value(0); |
289 | } |
290 | regfree(&re_buffer); |
291 | return v; |
292 | } |
293 | |
294 | /* Handle bare operands and ( expr ) syntax. */ |
295 | |
296 | static VALUE *eval7(void) |
297 | { |
298 | VALUE *v; |
299 | |
300 | if (!*G.args) |
301 | bb_error_msg_and_die("syntax error"); |
302 | |
303 | if (nextarg("(")) { |
304 | G.args++; |
305 | v = eval(); |
306 | if (!nextarg(")")) |
307 | bb_error_msg_and_die("syntax error"); |
308 | G.args++; |
309 | return v; |
310 | } |
311 | |
312 | if (nextarg(")")) |
313 | bb_error_msg_and_die("syntax error"); |
314 | |
315 | return str_value(*G.args++); |
316 | } |
317 | |
318 | /* Handle match, substr, index, length, and quote keywords. */ |
319 | |
320 | static VALUE *eval6(void) |
321 | { |
322 | static const char keywords[] ALIGN1 = |
323 | "quote\0""length\0""match\0""index\0""substr\0"; |
324 | |
325 | VALUE *r, *i1, *i2; |
326 | static VALUE *l = NULL; |
327 | static VALUE *v = NULL; |
328 | int key = *G.args ? index_in_strings(keywords, *G.args) + 1 : 0; |
329 | |
330 | if (key == 0) /* not a keyword */ |
331 | return eval7(); |
332 | G.args++; /* We have a valid token, so get the next argument. */ |
333 | if (key == 1) { /* quote */ |
334 | if (!*G.args) |
335 | bb_error_msg_and_die("syntax error"); |
336 | return str_value(*G.args++); |
337 | } |
338 | if (key == 2) { /* length */ |
339 | r = eval6(); |
340 | tostring(r); |
341 | v = int_value(strlen(r->u.s)); |
342 | freev(r); |
343 | } else |
344 | l = eval6(); |
345 | |
346 | if (key == 3) { /* match */ |
347 | r = eval6(); |
348 | v = docolon(l, r); |
349 | freev(l); |
350 | freev(r); |
351 | } |
352 | if (key == 4) { /* index */ |
353 | r = eval6(); |
354 | tostring(l); |
355 | tostring(r); |
356 | v = int_value(strcspn(l->u.s, r->u.s) + 1); |
357 | if (v->u.i == (arith_t) strlen(l->u.s) + 1) |
358 | v->u.i = 0; |
359 | freev(l); |
360 | freev(r); |
361 | } |
362 | if (key == 5) { /* substr */ |
363 | i1 = eval6(); |
364 | i2 = eval6(); |
365 | tostring(l); |
366 | if (!toarith(i1) || !toarith(i2) |
367 | || i1->u.i > (arith_t) strlen(l->u.s) |
368 | || i1->u.i <= 0 || i2->u.i <= 0) |
369 | v = str_value(""); |
370 | else { |
371 | v = xmalloc(sizeof(VALUE)); |
372 | v->type = STRING; |
373 | v->u.s = xstrndup(l->u.s + i1->u.i - 1, i2->u.i); |
374 | } |
375 | freev(l); |
376 | freev(i1); |
377 | freev(i2); |
378 | } |
379 | return v; |
380 | } |
381 | |
382 | /* Handle : operator (pattern matching). |
383 | Calls docolon to do the real work. */ |
384 | |
385 | static VALUE *eval5(void) |
386 | { |
387 | VALUE *l, *r, *v; |
388 | |
389 | l = eval6(); |
390 | while (nextarg(":")) { |
391 | G.args++; |
392 | r = eval6(); |
393 | v = docolon(l, r); |
394 | freev(l); |
395 | freev(r); |
396 | l = v; |
397 | } |
398 | return l; |
399 | } |
400 | |
401 | /* Handle *, /, % operators. */ |
402 | |
403 | static VALUE *eval4(void) |
404 | { |
405 | VALUE *l, *r; |
406 | int op; |
407 | arith_t val; |
408 | |
409 | l = eval5(); |
410 | while (1) { |
411 | op = nextarg("*"); |
412 | if (!op) { op = nextarg("/"); |
413 | if (!op) { op = nextarg("%"); |
414 | if (!op) return l; |
415 | }} |
416 | G.args++; |
417 | r = eval5(); |
418 | val = arithmetic_common(l, r, op); |
419 | freev(l); |
420 | freev(r); |
421 | l = int_value(val); |
422 | } |
423 | } |
424 | |
425 | /* Handle +, - operators. */ |
426 | |
427 | static VALUE *eval3(void) |
428 | { |
429 | VALUE *l, *r; |
430 | int op; |
431 | arith_t val; |
432 | |
433 | l = eval4(); |
434 | while (1) { |
435 | op = nextarg("+"); |
436 | if (!op) { |
437 | op = nextarg("-"); |
438 | if (!op) return l; |
439 | } |
440 | G.args++; |
441 | r = eval4(); |
442 | val = arithmetic_common(l, r, op); |
443 | freev(l); |
444 | freev(r); |
445 | l = int_value(val); |
446 | } |
447 | } |
448 | |
449 | /* Handle comparisons. */ |
450 | |
451 | static VALUE *eval2(void) |
452 | { |
453 | VALUE *l, *r; |
454 | int op; |
455 | arith_t val; |
456 | |
457 | l = eval3(); |
458 | while (1) { |
459 | op = nextarg("<"); |
460 | if (!op) { op = nextarg("<="); |
461 | if (!op) { op = nextarg("="); |
462 | if (!op) { op = nextarg("=="); |
463 | if (!op) { op = nextarg("!="); |
464 | if (!op) { op = nextarg(">="); |
465 | if (!op) { op = nextarg(">"); |
466 | if (!op) return l; |
467 | }}}}}} |
468 | G.args++; |
469 | r = eval3(); |
470 | toarith(l); |
471 | toarith(r); |
472 | val = cmp_common(l, r, op); |
473 | freev(l); |
474 | freev(r); |
475 | l = int_value(val); |
476 | } |
477 | } |
478 | |
479 | /* Handle &. */ |
480 | |
481 | static VALUE *eval1(void) |
482 | { |
483 | VALUE *l, *r; |
484 | |
485 | l = eval2(); |
486 | while (nextarg("&")) { |
487 | G.args++; |
488 | r = eval2(); |
489 | if (null(l) || null(r)) { |
490 | freev(l); |
491 | freev(r); |
492 | l = int_value(0); |
493 | } else |
494 | freev(r); |
495 | } |
496 | return l; |
497 | } |
498 | |
499 | /* Handle |. */ |
500 | |
501 | static VALUE *eval(void) |
502 | { |
503 | VALUE *l, *r; |
504 | |
505 | l = eval1(); |
506 | while (nextarg("|")) { |
507 | G.args++; |
508 | r = eval1(); |
509 | if (null(l)) { |
510 | freev(l); |
511 | l = r; |
512 | } else |
513 | freev(r); |
514 | } |
515 | return l; |
516 | } |
517 | |
518 | int expr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
519 | int expr_main(int argc UNUSED_PARAM, char **argv) |
520 | { |
521 | VALUE *v; |
522 | |
523 | INIT_G(); |
524 | |
525 | xfunc_error_retval = 2; /* coreutils compat */ |
526 | G.args = argv + 1; |
527 | if (*G.args == NULL) { |
528 | bb_error_msg_and_die("too few arguments"); |
529 | } |
530 | v = eval(); |
531 | if (*G.args) |
532 | bb_error_msg_and_die("syntax error"); |
533 | if (v->type == INTEGER) |
534 | printf("%" PF_REZ "d\n", PF_REZ_TYPE v->u.i); |
535 | else |
536 | puts(v->u.s); |
537 | fflush_stdout_and_exit(null(v)); |
538 | } |
539 |