blob: 55a19aa79c5bdb356f3320d8176a5a5493a42dbc
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * Mini expr implementation for busybox |
4 | * |
5 | * based on GNU expr Mike Parker. |
6 | * Copyright (C) 86, 1991-1997, 1999 Free Software Foundation, Inc. |
7 | * |
8 | * Busybox modifications |
9 | * Copyright (c) 2000 Edward Betts <edward@debian.org>. |
10 | * Copyright (C) 2003-2005 Vladimir Oleynik <dzo@simtreas.ru> |
11 | * - reduced 464 bytes. |
12 | * - 64 math support |
13 | * |
14 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
15 | */ |
16 | /* This program evaluates expressions. Each token (operator, operand, |
17 | * parenthesis) of the expression must be a separate argument. The |
18 | * parser used is a reasonably general one, though any incarnation of |
19 | * it is language-specific. It is especially nice for expressions. |
20 | * |
21 | * No parse tree is needed; a new node is evaluated immediately. |
22 | * One function can handle multiple operators all of equal precedence, |
23 | * provided they all associate ((x op x) op x). |
24 | */ |
25 | //config:config EXPR |
26 | //config: bool "expr" |
27 | //config: default y |
28 | //config: help |
29 | //config: expr is used to calculate numbers and print the result |
30 | //config: to standard output. |
31 | //config: |
32 | //config:config EXPR_MATH_SUPPORT_64 |
33 | //config: bool "Extend Posix numbers support to 64 bit" |
34 | //config: default y |
35 | //config: depends on EXPR |
36 | //config: help |
37 | //config: Enable 64-bit math support in the expr applet. This will make |
38 | //config: the applet slightly larger, but will allow computation with very |
39 | //config: large numbers. |
40 | |
41 | //applet:IF_EXPR(APPLET(expr, BB_DIR_USR_BIN, BB_SUID_DROP)) |
42 | |
43 | //kbuild:lib-$(CONFIG_EXPR) += expr.o |
44 | |
45 | //usage:#define expr_trivial_usage |
46 | //usage: "EXPRESSION" |
47 | //usage:#define expr_full_usage "\n\n" |
48 | //usage: "Print the value of EXPRESSION to stdout\n" |
49 | //usage: "\n" |
50 | //usage: "EXPRESSION may be:\n" |
51 | //usage: " ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2\n" |
52 | //usage: " ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0\n" |
53 | //usage: " ARG1 < ARG2 1 if ARG1 is less than ARG2, else 0. Similarly:\n" |
54 | //usage: " ARG1 <= ARG2\n" |
55 | //usage: " ARG1 = ARG2\n" |
56 | //usage: " ARG1 != ARG2\n" |
57 | //usage: " ARG1 >= ARG2\n" |
58 | //usage: " ARG1 > ARG2\n" |
59 | //usage: " ARG1 + ARG2 Sum of ARG1 and ARG2. Similarly:\n" |
60 | //usage: " ARG1 - ARG2\n" |
61 | //usage: " ARG1 * ARG2\n" |
62 | //usage: " ARG1 / ARG2\n" |
63 | //usage: " ARG1 % ARG2\n" |
64 | //usage: " STRING : REGEXP Anchored pattern match of REGEXP in STRING\n" |
65 | //usage: " match STRING REGEXP Same as STRING : REGEXP\n" |
66 | //usage: " substr STRING POS LENGTH Substring of STRING, POS counted from 1\n" |
67 | //usage: " index STRING CHARS Index in STRING where any CHARS is found, or 0\n" |
68 | //usage: " length STRING Length of STRING\n" |
69 | //usage: " quote TOKEN Interpret TOKEN as a string, even if\n" |
70 | //usage: " it is a keyword like 'match' or an\n" |
71 | //usage: " operator like '/'\n" |
72 | //usage: " (EXPRESSION) Value of EXPRESSION\n" |
73 | //usage: "\n" |
74 | //usage: "Beware that many operators need to be escaped or quoted for shells.\n" |
75 | //usage: "Comparisons are arithmetic if both ARGs are numbers, else\n" |
76 | //usage: "lexicographical. Pattern matches return the string matched between\n" |
77 | //usage: "\\( and \\) or null; if \\( and \\) are not used, they return the number\n" |
78 | //usage: "of characters matched or 0." |
79 | |
80 | #include "libbb.h" |
81 | #include "common_bufsiz.h" |
82 | #include "xregex.h" |
83 | |
84 | #if ENABLE_EXPR_MATH_SUPPORT_64 |
85 | typedef int64_t arith_t; |
86 | |
87 | #define PF_REZ "ll" |
88 | #define PF_REZ_TYPE (long long) |
89 | #define STRTOL(s, e, b) strtoll(s, e, b) |
90 | #else |
91 | typedef long arith_t; |
92 | |
93 | #define PF_REZ "l" |
94 | #define PF_REZ_TYPE (long) |
95 | #define STRTOL(s, e, b) strtol(s, e, b) |
96 | #endif |
97 | |
98 | /* TODO: use bb_strtol[l]? It's easier to check for errors... */ |
99 | |
100 | /* The kinds of value we can have. */ |
101 | enum { |
102 | INTEGER, |
103 | STRING |
104 | }; |
105 | |
106 | /* A value is.... */ |
107 | struct valinfo { |
108 | smallint type; /* Which kind. */ |
109 | union { /* The value itself. */ |
110 | arith_t i; |
111 | char *s; |
112 | } u; |
113 | }; |
114 | typedef struct valinfo VALUE; |
115 | |
116 | /* The arguments given to the program, minus the program name. */ |
117 | struct globals { |
118 | char **args; |
119 | } FIX_ALIASING; |
120 | #define G (*(struct globals*)bb_common_bufsiz1) |
121 | #define INIT_G() do { setup_common_bufsiz(); } while (0) |
122 | |
123 | /* forward declarations */ |
124 | static VALUE *eval(void); |
125 | |
126 | |
127 | /* Return a VALUE for I. */ |
128 | |
129 | static VALUE *int_value(arith_t i) |
130 | { |
131 | VALUE *v; |
132 | |
133 | v = xzalloc(sizeof(VALUE)); |
134 | if (INTEGER) /* otherwise xzalloc did it already */ |
135 | v->type = INTEGER; |
136 | v->u.i = i; |
137 | return v; |
138 | } |
139 | |
140 | /* Return a VALUE for S. */ |
141 | |
142 | static VALUE *str_value(const char *s) |
143 | { |
144 | VALUE *v; |
145 | |
146 | v = xzalloc(sizeof(VALUE)); |
147 | if (STRING) /* otherwise xzalloc did it already */ |
148 | v->type = STRING; |
149 | v->u.s = xstrdup(s); |
150 | return v; |
151 | } |
152 | |
153 | /* Free VALUE V, including structure components. */ |
154 | |
155 | static void freev(VALUE *v) |
156 | { |
157 | if (v->type == STRING) |
158 | free(v->u.s); |
159 | free(v); |
160 | } |
161 | |
162 | /* Return nonzero if V is a null-string or zero-number. */ |
163 | |
164 | static int null(VALUE *v) |
165 | { |
166 | if (v->type == INTEGER) |
167 | return v->u.i == 0; |
168 | /* STRING: */ |
169 | return v->u.s[0] == '\0' || LONE_CHAR(v->u.s, '0'); |
170 | } |
171 | |
172 | /* Coerce V to a STRING value (can't fail). */ |
173 | |
174 | static void tostring(VALUE *v) |
175 | { |
176 | if (v->type == INTEGER) { |
177 | v->u.s = xasprintf("%" PF_REZ "d", PF_REZ_TYPE v->u.i); |
178 | v->type = STRING; |
179 | } |
180 | } |
181 | |
182 | /* Coerce V to an INTEGER value. Return 1 on success, 0 on failure. */ |
183 | |
184 | static bool toarith(VALUE *v) |
185 | { |
186 | if (v->type == STRING) { |
187 | arith_t i; |
188 | char *e; |
189 | |
190 | /* Don't interpret the empty string as an integer. */ |
191 | /* Currently does not worry about overflow or int/long differences. */ |
192 | i = STRTOL(v->u.s, &e, 10); |
193 | if ((v->u.s == e) || *e) |
194 | return 0; |
195 | free(v->u.s); |
196 | v->u.i = i; |
197 | v->type = INTEGER; |
198 | } |
199 | return 1; |
200 | } |
201 | |
202 | /* Return str[0]+str[1] if the next token matches STR exactly. |
203 | STR must not be NULL. */ |
204 | |
205 | static int nextarg(const char *str) |
206 | { |
207 | if (*G.args == NULL || strcmp(*G.args, str) != 0) |
208 | return 0; |
209 | return (unsigned char)str[0] + (unsigned char)str[1]; |
210 | } |
211 | |
212 | /* The comparison operator handling functions. */ |
213 | |
214 | static int cmp_common(VALUE *l, VALUE *r, int op) |
215 | { |
216 | arith_t ll, rr; |
217 | |
218 | ll = l->u.i; |
219 | rr = r->u.i; |
220 | if (l->type == STRING || r->type == STRING) { |
221 | tostring(l); |
222 | tostring(r); |
223 | ll = strcmp(l->u.s, r->u.s); |
224 | rr = 0; |
225 | } |
226 | /* calculating ll - rr and checking the result is prone to overflows. |
227 | * We'll do it differently: */ |
228 | if (op == '<') |
229 | return ll < rr; |
230 | if (op == ('<' + '=')) |
231 | return ll <= rr; |
232 | if (op == '=' || (op == '=' + '=')) |
233 | return ll == rr; |
234 | if (op == '!' + '=') |
235 | return ll != rr; |
236 | if (op == '>') |
237 | return ll > rr; |
238 | /* >= */ |
239 | return ll >= rr; |
240 | } |
241 | |
242 | /* The arithmetic operator handling functions. */ |
243 | |
244 | static arith_t arithmetic_common(VALUE *l, VALUE *r, int op) |
245 | { |
246 | arith_t li, ri; |
247 | |
248 | if (!toarith(l) || !toarith(r)) |
249 | bb_error_msg_and_die("non-numeric argument"); |
250 | li = l->u.i; |
251 | ri = r->u.i; |
252 | if (op == '+') |
253 | return li + ri; |
254 | if (op == '-') |
255 | return li - ri; |
256 | if (op == '*') |
257 | return li * ri; |
258 | if (ri == 0) |
259 | bb_error_msg_and_die("division by zero"); |
260 | if (op == '/') |
261 | return li / ri; |
262 | return li % ri; |
263 | } |
264 | |
265 | /* Do the : operator. |
266 | SV is the VALUE for the lhs (the string), |
267 | PV is the VALUE for the rhs (the pattern). */ |
268 | |
269 | static VALUE *docolon(VALUE *sv, VALUE *pv) |
270 | { |
271 | enum { NMATCH = 2 }; |
272 | VALUE *v; |
273 | regex_t re_buffer; |
274 | regmatch_t re_regs[NMATCH]; |
275 | |
276 | tostring(sv); |
277 | tostring(pv); |
278 | |
279 | if (pv->u.s[0] == '^') { |
280 | bb_error_msg( |
281 | "warning: '%s': using '^' as the first character\n" |
282 | "of a basic regular expression is not portable; it is ignored", pv->u.s); |
283 | } |
284 | |
285 | memset(&re_buffer, 0, sizeof(re_buffer)); |
286 | memset(re_regs, 0, sizeof(re_regs)); |
287 | xregcomp(&re_buffer, pv->u.s, 0); |
288 | |
289 | /* expr uses an anchored pattern match, so check that there was a |
290 | * match and that the match starts at offset 0. */ |
291 | if (regexec(&re_buffer, sv->u.s, NMATCH, re_regs, 0) != REG_NOMATCH |
292 | && re_regs[0].rm_so == 0 |
293 | ) { |
294 | /* Were \(...\) used? */ |
295 | if (re_buffer.re_nsub > 0 && re_regs[1].rm_so >= 0) { |
296 | sv->u.s[re_regs[1].rm_eo] = '\0'; |
297 | v = str_value(sv->u.s + re_regs[1].rm_so); |
298 | } else { |
299 | v = int_value(re_regs[0].rm_eo); |
300 | } |
301 | } else { |
302 | /* Match failed -- return the right kind of null. */ |
303 | if (re_buffer.re_nsub > 0) |
304 | v = str_value(""); |
305 | else |
306 | v = int_value(0); |
307 | } |
308 | regfree(&re_buffer); |
309 | return v; |
310 | } |
311 | |
312 | /* Handle bare operands and ( expr ) syntax. */ |
313 | |
314 | static VALUE *eval7(void) |
315 | { |
316 | VALUE *v; |
317 | |
318 | if (!*G.args) |
319 | bb_error_msg_and_die("syntax error"); |
320 | |
321 | if (nextarg("(")) { |
322 | G.args++; |
323 | v = eval(); |
324 | if (!nextarg(")")) |
325 | bb_error_msg_and_die("syntax error"); |
326 | G.args++; |
327 | return v; |
328 | } |
329 | |
330 | if (nextarg(")")) |
331 | bb_error_msg_and_die("syntax error"); |
332 | |
333 | return str_value(*G.args++); |
334 | } |
335 | |
336 | /* Handle match, substr, index, length, and quote keywords. */ |
337 | |
338 | static VALUE *eval6(void) |
339 | { |
340 | static const char keywords[] ALIGN1 = |
341 | "quote\0""length\0""match\0""index\0""substr\0"; |
342 | |
343 | VALUE *r, *i1, *i2; |
344 | static VALUE *l = NULL; |
345 | static VALUE *v = NULL; |
346 | int key = *G.args ? index_in_strings(keywords, *G.args) + 1 : 0; |
347 | |
348 | if (key == 0) /* not a keyword */ |
349 | return eval7(); |
350 | G.args++; /* We have a valid token, so get the next argument. */ |
351 | if (key == 1) { /* quote */ |
352 | if (!*G.args) |
353 | bb_error_msg_and_die("syntax error"); |
354 | return str_value(*G.args++); |
355 | } |
356 | if (key == 2) { /* length */ |
357 | r = eval6(); |
358 | tostring(r); |
359 | v = int_value(strlen(r->u.s)); |
360 | freev(r); |
361 | } else |
362 | l = eval6(); |
363 | |
364 | if (key == 3) { /* match */ |
365 | r = eval6(); |
366 | v = docolon(l, r); |
367 | freev(l); |
368 | freev(r); |
369 | } |
370 | if (key == 4) { /* index */ |
371 | r = eval6(); |
372 | tostring(l); |
373 | tostring(r); |
374 | v = int_value(strcspn(l->u.s, r->u.s) + 1); |
375 | if (v->u.i == (arith_t) strlen(l->u.s) + 1) |
376 | v->u.i = 0; |
377 | freev(l); |
378 | freev(r); |
379 | } |
380 | if (key == 5) { /* substr */ |
381 | i1 = eval6(); |
382 | i2 = eval6(); |
383 | tostring(l); |
384 | if (!toarith(i1) || !toarith(i2) |
385 | || i1->u.i > (arith_t) strlen(l->u.s) |
386 | || i1->u.i <= 0 || i2->u.i <= 0) |
387 | v = str_value(""); |
388 | else { |
389 | v = xmalloc(sizeof(VALUE)); |
390 | v->type = STRING; |
391 | v->u.s = xstrndup(l->u.s + i1->u.i - 1, i2->u.i); |
392 | } |
393 | freev(l); |
394 | freev(i1); |
395 | freev(i2); |
396 | } |
397 | return v; |
398 | } |
399 | |
400 | /* Handle : operator (pattern matching). |
401 | Calls docolon to do the real work. */ |
402 | |
403 | static VALUE *eval5(void) |
404 | { |
405 | VALUE *l, *r, *v; |
406 | |
407 | l = eval6(); |
408 | while (nextarg(":")) { |
409 | G.args++; |
410 | r = eval6(); |
411 | v = docolon(l, r); |
412 | freev(l); |
413 | freev(r); |
414 | l = v; |
415 | } |
416 | return l; |
417 | } |
418 | |
419 | /* Handle *, /, % operators. */ |
420 | |
421 | static VALUE *eval4(void) |
422 | { |
423 | VALUE *l, *r; |
424 | int op; |
425 | arith_t val; |
426 | |
427 | l = eval5(); |
428 | while (1) { |
429 | op = nextarg("*"); |
430 | if (!op) { op = nextarg("/"); |
431 | if (!op) { op = nextarg("%"); |
432 | if (!op) return l; |
433 | }} |
434 | G.args++; |
435 | r = eval5(); |
436 | val = arithmetic_common(l, r, op); |
437 | freev(l); |
438 | freev(r); |
439 | l = int_value(val); |
440 | } |
441 | } |
442 | |
443 | /* Handle +, - operators. */ |
444 | |
445 | static VALUE *eval3(void) |
446 | { |
447 | VALUE *l, *r; |
448 | int op; |
449 | arith_t val; |
450 | |
451 | l = eval4(); |
452 | while (1) { |
453 | op = nextarg("+"); |
454 | if (!op) { |
455 | op = nextarg("-"); |
456 | if (!op) return l; |
457 | } |
458 | G.args++; |
459 | r = eval4(); |
460 | val = arithmetic_common(l, r, op); |
461 | freev(l); |
462 | freev(r); |
463 | l = int_value(val); |
464 | } |
465 | } |
466 | |
467 | /* Handle comparisons. */ |
468 | |
469 | static VALUE *eval2(void) |
470 | { |
471 | VALUE *l, *r; |
472 | int op; |
473 | arith_t val; |
474 | |
475 | l = eval3(); |
476 | while (1) { |
477 | op = nextarg("<"); |
478 | if (!op) { op = nextarg("<="); |
479 | if (!op) { op = nextarg("="); |
480 | if (!op) { op = nextarg("=="); |
481 | if (!op) { op = nextarg("!="); |
482 | if (!op) { op = nextarg(">="); |
483 | if (!op) { op = nextarg(">"); |
484 | if (!op) return l; |
485 | }}}}}} |
486 | G.args++; |
487 | r = eval3(); |
488 | toarith(l); |
489 | toarith(r); |
490 | val = cmp_common(l, r, op); |
491 | freev(l); |
492 | freev(r); |
493 | l = int_value(val); |
494 | } |
495 | } |
496 | |
497 | /* Handle &. */ |
498 | |
499 | static VALUE *eval1(void) |
500 | { |
501 | VALUE *l, *r; |
502 | |
503 | l = eval2(); |
504 | while (nextarg("&")) { |
505 | G.args++; |
506 | r = eval2(); |
507 | if (null(l) || null(r)) { |
508 | freev(l); |
509 | freev(r); |
510 | l = int_value(0); |
511 | } else |
512 | freev(r); |
513 | } |
514 | return l; |
515 | } |
516 | |
517 | /* Handle |. */ |
518 | |
519 | static VALUE *eval(void) |
520 | { |
521 | VALUE *l, *r; |
522 | |
523 | l = eval1(); |
524 | while (nextarg("|")) { |
525 | G.args++; |
526 | r = eval1(); |
527 | if (null(l)) { |
528 | freev(l); |
529 | l = r; |
530 | } else |
531 | freev(r); |
532 | } |
533 | return l; |
534 | } |
535 | |
536 | int expr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
537 | int expr_main(int argc UNUSED_PARAM, char **argv) |
538 | { |
539 | VALUE *v; |
540 | |
541 | INIT_G(); |
542 | |
543 | xfunc_error_retval = 2; /* coreutils compat */ |
544 | G.args = argv + 1; |
545 | if (*G.args == NULL) { |
546 | bb_error_msg_and_die("too few arguments"); |
547 | } |
548 | v = eval(); |
549 | if (*G.args) |
550 | bb_error_msg_and_die("syntax error"); |
551 | if (v->type == INTEGER) |
552 | printf("%" PF_REZ "d\n", PF_REZ_TYPE v->u.i); |
553 | else |
554 | puts(v->u.s); |
555 | fflush_stdout_and_exit(null(v)); |
556 | } |
557 |