summaryrefslogtreecommitdiff
path: root/editors/awk.c (plain)
blob: b1dfe9e8761b2c5b77a525f2e162ed8f9c0ca7bc
1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10//config:config AWK
11//config: bool "awk"
12//config: default y
13//config: help
14//config: Awk is used as a pattern scanning and processing language. This is
15//config: the BusyBox implementation of that programming language.
16//config:
17//config:config FEATURE_AWK_LIBM
18//config: bool "Enable math functions (requires libm)"
19//config: default y
20//config: depends on AWK
21//config: help
22//config: Enable math functions of the Awk programming language.
23//config: NOTE: This will require libm to be present for linking.
24//config:
25//config:config FEATURE_AWK_GNU_EXTENSIONS
26//config: bool "Enable a few GNU extensions"
27//config: default y
28//config: depends on AWK
29//config: help
30//config: Enable a few features from gawk:
31//config: * command line option -e AWK_PROGRAM
32//config: * simultaneous use of -f and -e on the command line.
33//config: This enables the use of awk library files.
34//config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
35
36//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
37
38//kbuild:lib-$(CONFIG_AWK) += awk.o
39
40//usage:#define awk_trivial_usage
41//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42//usage:#define awk_full_usage "\n\n"
43//usage: " -v VAR=VAL Set variable"
44//usage: "\n -F SEP Use SEP as field separator"
45//usage: "\n -f FILE Read program from FILE"
46//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47//usage: "\n -e AWK_PROGRAM"
48//usage: )
49
50#include "libbb.h"
51#include "xregex.h"
52#include <math.h>
53
54/* This is a NOEXEC applet. Be very careful! */
55
56
57/* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59#define debug_printf_walker(...) do {} while (0)
60#define debug_printf_eval(...) do {} while (0)
61#define debug_printf_parse(...) do {} while (0)
62
63#ifndef debug_printf_walker
64# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65#endif
66#ifndef debug_printf_eval
67# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68#endif
69#ifndef debug_printf_parse
70# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71#endif
72
73
74#define OPTSTR_AWK \
75 "F:v:*f:*" \
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
77 "W:"
78enum {
79 OPTBIT_F, /* define field separator */
80 OPTBIT_v, /* define variable */
81 OPTBIT_f, /* pull in awk program from file */
82 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
83 OPTBIT_W, /* -W ignored */
84 OPT_F = 1 << OPTBIT_F,
85 OPT_v = 1 << OPTBIT_v,
86 OPT_f = 1 << OPTBIT_f,
87 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
88 OPT_W = 1 << OPTBIT_W
89};
90
91#define MAXVARFMT 240
92#define MINNVBLOCK 64
93
94/* variable flags */
95#define VF_NUMBER 0x0001 /* 1 = primary type is number */
96#define VF_ARRAY 0x0002 /* 1 = it's an array */
97
98#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
99#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
100#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
101#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
102#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
103#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
104#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
105
106/* these flags are static, don't change them when value is changed */
107#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
108
109typedef struct walker_list {
110 char *end;
111 char *cur;
112 struct walker_list *prev;
113 char wbuf[1];
114} walker_list;
115
116/* Variable */
117typedef struct var_s {
118 unsigned type; /* flags */
119 double number;
120 char *string;
121 union {
122 int aidx; /* func arg idx (for compilation stage) */
123 struct xhash_s *array; /* array ptr */
124 struct var_s *parent; /* for func args, ptr to actual parameter */
125 walker_list *walker; /* list of array elements (for..in) */
126 } x;
127} var;
128
129/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
130typedef struct chain_s {
131 struct node_s *first;
132 struct node_s *last;
133 const char *programname;
134} chain;
135
136/* Function */
137typedef struct func_s {
138 unsigned nargs;
139 struct chain_s body;
140} func;
141
142/* I/O stream */
143typedef struct rstream_s {
144 FILE *F;
145 char *buffer;
146 int adv;
147 int size;
148 int pos;
149 smallint is_pipe;
150} rstream;
151
152typedef struct hash_item_s {
153 union {
154 struct var_s v; /* variable/array hash */
155 struct rstream_s rs; /* redirect streams hash */
156 struct func_s f; /* functions hash */
157 } data;
158 struct hash_item_s *next; /* next in chain */
159 char name[1]; /* really it's longer */
160} hash_item;
161
162typedef struct xhash_s {
163 unsigned nel; /* num of elements */
164 unsigned csize; /* current hash size */
165 unsigned nprime; /* next hash size in PRIMES[] */
166 unsigned glen; /* summary length of item names */
167 struct hash_item_s **items;
168} xhash;
169
170/* Tree node */
171typedef struct node_s {
172 uint32_t info;
173 unsigned lineno;
174 union {
175 struct node_s *n;
176 var *v;
177 int aidx;
178 char *new_progname;
179 regex_t *re;
180 } l;
181 union {
182 struct node_s *n;
183 regex_t *ire;
184 func *f;
185 } r;
186 union {
187 struct node_s *n;
188 } a;
189} node;
190
191/* Block of temporary variables */
192typedef struct nvblock_s {
193 int size;
194 var *pos;
195 struct nvblock_s *prev;
196 struct nvblock_s *next;
197 var nv[];
198} nvblock;
199
200typedef struct tsplitter_s {
201 node n;
202 regex_t re[2];
203} tsplitter;
204
205/* simple token classes */
206/* Order and hex values are very important!!! See next_token() */
207#define TC_SEQSTART (1 << 0) /* ( */
208#define TC_SEQTERM (1 << 1) /* ) */
209#define TC_REGEXP (1 << 2) /* /.../ */
210#define TC_OUTRDR (1 << 3) /* | > >> */
211#define TC_UOPPOST (1 << 4) /* unary postfix operator */
212#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
213#define TC_BINOPX (1 << 6) /* two-opnd operator */
214#define TC_IN (1 << 7)
215#define TC_COMMA (1 << 8)
216#define TC_PIPE (1 << 9) /* input redirection pipe */
217#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
218#define TC_ARRTERM (1 << 11) /* ] */
219#define TC_GRPSTART (1 << 12) /* { */
220#define TC_GRPTERM (1 << 13) /* } */
221#define TC_SEMICOL (1 << 14)
222#define TC_NEWLINE (1 << 15)
223#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
224#define TC_WHILE (1 << 17)
225#define TC_ELSE (1 << 18)
226#define TC_BUILTIN (1 << 19)
227/* This costs ~50 bytes of code.
228 * A separate class to support deprecated "length" form. If we don't need that
229 * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
230 * can be merged with TC_BUILTIN:
231 */
232#define TC_LENGTH (1 << 20)
233#define TC_GETLINE (1 << 21)
234#define TC_FUNCDECL (1 << 22) /* `function' `func' */
235#define TC_BEGIN (1 << 23)
236#define TC_END (1 << 24)
237#define TC_EOF (1 << 25)
238#define TC_VARIABLE (1 << 26)
239#define TC_ARRAY (1 << 27)
240#define TC_FUNCTION (1 << 28)
241#define TC_STRING (1 << 29)
242#define TC_NUMBER (1 << 30)
243
244#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
245
246/* combined token classes */
247#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
248//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
249#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
250 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
251 | TC_SEQSTART | TC_STRING | TC_NUMBER)
252
253#define TC_STATEMNT (TC_STATX | TC_WHILE)
254#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
255
256/* word tokens, cannot mean something else if not expected */
257#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
258 | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
259 | TC_FUNCDECL | TC_BEGIN | TC_END)
260
261/* discard newlines after these */
262#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
263 | TC_BINOP | TC_OPTERM)
264
265/* what can expression begin with */
266#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
267/* what can group begin with */
268#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
269
270/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
271/* operator is inserted between them */
272#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
273 | TC_STRING | TC_NUMBER | TC_UOPPOST)
274#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
275
276#define OF_RES1 0x010000
277#define OF_RES2 0x020000
278#define OF_STR1 0x040000
279#define OF_STR2 0x080000
280#define OF_NUM1 0x100000
281#define OF_CHECKED 0x200000
282
283/* combined operator flags */
284#define xx 0
285#define xV OF_RES2
286#define xS (OF_RES2 | OF_STR2)
287#define Vx OF_RES1
288#define VV (OF_RES1 | OF_RES2)
289#define Nx (OF_RES1 | OF_NUM1)
290#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
291#define Sx (OF_RES1 | OF_STR1)
292#define SV (OF_RES1 | OF_STR1 | OF_RES2)
293#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
294
295#define OPCLSMASK 0xFF00
296#define OPNMASK 0x007F
297
298/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
299 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
300 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
301 */
302#undef P
303#undef PRIMASK
304#undef PRIMASK2
305#define P(x) (x << 24)
306#define PRIMASK 0x7F000000
307#define PRIMASK2 0x7E000000
308
309/* Operation classes */
310
311#define SHIFT_TIL_THIS 0x0600
312#define RECUR_FROM_THIS 0x1000
313
314enum {
315 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
316 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
317
318 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
319 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
320 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
321
322 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
323 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
324 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
325 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
326 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
327 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
328 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
329 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
330 OC_DONE = 0x2800,
331
332 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
333 ST_WHILE = 0x3300
334};
335
336/* simple builtins */
337enum {
338 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
339 F_ti, F_le, F_sy, F_ff, F_cl
340};
341
342/* builtins */
343enum {
344 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
345 B_ge, B_gs, B_su,
346 B_an, B_co, B_ls, B_or, B_rs, B_xo,
347};
348
349/* tokens and their corresponding info values */
350
351#define NTC "\377" /* switch to next token class (tc<<1) */
352#define NTCC '\377'
353
354static const char tokenlist[] ALIGN1 =
355 "\1(" NTC /* TC_SEQSTART */
356 "\1)" NTC /* TC_SEQTERM */
357 "\1/" NTC /* TC_REGEXP */
358 "\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
359 "\2++" "\2--" NTC /* TC_UOPPOST */
360 "\2++" "\2--" "\1$" NTC /* TC_UOPPRE1 */
361 "\2==" "\1=" "\2+=" "\2-=" /* TC_BINOPX */
362 "\2*=" "\2/=" "\2%=" "\2^="
363 "\1+" "\1-" "\3**=" "\2**"
364 "\1/" "\1%" "\1^" "\1*"
365 "\2!=" "\2>=" "\2<=" "\1>"
366 "\1<" "\2!~" "\1~" "\2&&"
367 "\2||" "\1?" "\1:" NTC
368 "\2in" NTC /* TC_IN */
369 "\1," NTC /* TC_COMMA */
370 "\1|" NTC /* TC_PIPE */
371 "\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
372 "\1]" NTC /* TC_ARRTERM */
373 "\1{" NTC /* TC_GRPSTART */
374 "\1}" NTC /* TC_GRPTERM */
375 "\1;" NTC /* TC_SEMICOL */
376 "\1\n" NTC /* TC_NEWLINE */
377 "\2if" "\2do" "\3for" "\5break" /* TC_STATX */
378 "\10continue" "\6delete" "\5print"
379 "\6printf" "\4next" "\10nextfile"
380 "\6return" "\4exit" NTC
381 "\5while" NTC /* TC_WHILE */
382 "\4else" NTC /* TC_ELSE */
383 "\3and" "\5compl" "\6lshift" "\2or" /* TC_BUILTIN */
384 "\6rshift" "\3xor"
385 "\5close" "\6system" "\6fflush" "\5atan2"
386 "\3cos" "\3exp" "\3int" "\3log"
387 "\4rand" "\3sin" "\4sqrt" "\5srand"
388 "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
389 "\5match" "\5split" "\7sprintf" "\3sub"
390 "\6substr" "\7systime" "\10strftime" "\6mktime"
391 "\7tolower" "\7toupper" NTC
392 "\6length" NTC /* TC_LENGTH */
393 "\7getline" NTC /* TC_GETLINE */
394 "\4func" "\10function" NTC /* TC_FUNCDECL */
395 "\5BEGIN" NTC /* TC_BEGIN */
396 "\3END" /* TC_END */
397 /* compiler adds trailing "\0" */
398 ;
399
400#define OC_B OC_BUILTIN
401
402static const uint32_t tokeninfo[] = {
403 0,
404 0,
405 OC_REGEXP,
406 xS|'a', xS|'w', xS|'|',
407 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
408 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
409 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
410 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
411 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
412 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
413 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
414 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
415 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
416 OC_IN|SV|P(49), /* TC_IN */
417 OC_COMMA|SS|P(80),
418 OC_PGETLINE|SV|P(37),
419 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
420 0, /* ] */
421 0,
422 0,
423 0,
424 0, /* \n */
425 ST_IF, ST_DO, ST_FOR, OC_BREAK,
426 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
427 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
428 OC_RETURN|Vx, OC_EXIT|Nx,
429 ST_WHILE,
430 0, /* else */
431 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
432 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
433 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
434 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
435 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
436 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
437 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
438 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
439 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
440 OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
441 OC_GETLINE|SV|P(0),
442 0, 0,
443 0,
444 0 /* TC_END */
445};
446
447/* internal variable names and their initial values */
448/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
449enum {
450 CONVFMT, OFMT, FS, OFS,
451 ORS, RS, RT, FILENAME,
452 SUBSEP, F0, ARGIND, ARGC,
453 ARGV, ERRNO, FNR, NR,
454 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
455};
456
457static const char vNames[] ALIGN1 =
458 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
459 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
460 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
461 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
462 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
463
464static const char vValues[] ALIGN1 =
465 "%.6g\0" "%.6g\0" " \0" " \0"
466 "\n\0" "\n\0" "\0" "\0"
467 "\034\0" "\0" "\377";
468
469/* hash size may grow to these values */
470#define FIRST_PRIME 61
471static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
472
473
474/* Globals. Split in two parts so that first one is addressed
475 * with (mostly short) negative offsets.
476 * NB: it's unsafe to put members of type "double"
477 * into globals2 (gcc may fail to align them).
478 */
479struct globals {
480 double t_double;
481 chain beginseq, mainseq, endseq;
482 chain *seq;
483 node *break_ptr, *continue_ptr;
484 rstream *iF;
485 xhash *vhash, *ahash, *fdhash, *fnhash;
486 const char *g_progname;
487 int g_lineno;
488 int nfields;
489 int maxfields; /* used in fsrealloc() only */
490 var *Fields;
491 nvblock *g_cb;
492 char *g_pos;
493 char *g_buf;
494 smallint icase;
495 smallint exiting;
496 smallint nextrec;
497 smallint nextfile;
498 smallint is_f0_split;
499 smallint t_rollback;
500};
501struct globals2 {
502 uint32_t t_info; /* often used */
503 uint32_t t_tclass;
504 char *t_string;
505 int t_lineno;
506
507 var *intvar[NUM_INTERNAL_VARS]; /* often used */
508
509 /* former statics from various functions */
510 char *split_f0__fstrings;
511
512 uint32_t next_token__save_tclass;
513 uint32_t next_token__save_info;
514 uint32_t next_token__ltclass;
515 smallint next_token__concat_inserted;
516
517 smallint next_input_file__files_happen;
518 rstream next_input_file__rsm;
519
520 var *evaluate__fnargs;
521 unsigned evaluate__seed;
522 regex_t evaluate__sreg;
523
524 var ptest__v;
525
526 tsplitter exec_builtin__tspl;
527
528 /* biggest and least used members go last */
529 tsplitter fsplitter, rsplitter;
530};
531#define G1 (ptr_to_globals[-1])
532#define G (*(struct globals2 *)ptr_to_globals)
533/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
534/*char G1size[sizeof(G1)]; - 0x74 */
535/*char Gsize[sizeof(G)]; - 0x1c4 */
536/* Trying to keep most of members accessible with short offsets: */
537/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
538#define t_double (G1.t_double )
539#define beginseq (G1.beginseq )
540#define mainseq (G1.mainseq )
541#define endseq (G1.endseq )
542#define seq (G1.seq )
543#define break_ptr (G1.break_ptr )
544#define continue_ptr (G1.continue_ptr)
545#define iF (G1.iF )
546#define vhash (G1.vhash )
547#define ahash (G1.ahash )
548#define fdhash (G1.fdhash )
549#define fnhash (G1.fnhash )
550#define g_progname (G1.g_progname )
551#define g_lineno (G1.g_lineno )
552#define nfields (G1.nfields )
553#define maxfields (G1.maxfields )
554#define Fields (G1.Fields )
555#define g_cb (G1.g_cb )
556#define g_pos (G1.g_pos )
557#define g_buf (G1.g_buf )
558#define icase (G1.icase )
559#define exiting (G1.exiting )
560#define nextrec (G1.nextrec )
561#define nextfile (G1.nextfile )
562#define is_f0_split (G1.is_f0_split )
563#define t_rollback (G1.t_rollback )
564#define t_info (G.t_info )
565#define t_tclass (G.t_tclass )
566#define t_string (G.t_string )
567#define t_lineno (G.t_lineno )
568#define intvar (G.intvar )
569#define fsplitter (G.fsplitter )
570#define rsplitter (G.rsplitter )
571#define INIT_G() do { \
572 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
573 G.next_token__ltclass = TC_OPTERM; \
574 G.evaluate__seed = 1; \
575} while (0)
576
577
578/* function prototypes */
579static void handle_special(var *);
580static node *parse_expr(uint32_t);
581static void chain_group(void);
582static var *evaluate(node *, var *);
583static rstream *next_input_file(void);
584static int fmt_num(char *, int, const char *, double, int);
585static int awk_exit(int) NORETURN;
586
587/* ---- error handling ---- */
588
589static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
590static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
591static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
592static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
593static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
594static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
595static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
596static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
597static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
598static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
599
600static void zero_out_var(var *vp)
601{
602 memset(vp, 0, sizeof(*vp));
603}
604
605static void syntax_error(const char *message) NORETURN;
606static void syntax_error(const char *message)
607{
608 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
609}
610
611/* ---- hash stuff ---- */
612
613static unsigned hashidx(const char *name)
614{
615 unsigned idx = 0;
616
617 while (*name)
618 idx = *name++ + (idx << 6) - idx;
619 return idx;
620}
621
622/* create new hash */
623static xhash *hash_init(void)
624{
625 xhash *newhash;
626
627 newhash = xzalloc(sizeof(*newhash));
628 newhash->csize = FIRST_PRIME;
629 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
630
631 return newhash;
632}
633
634/* find item in hash, return ptr to data, NULL if not found */
635static void *hash_search(xhash *hash, const char *name)
636{
637 hash_item *hi;
638
639 hi = hash->items[hashidx(name) % hash->csize];
640 while (hi) {
641 if (strcmp(hi->name, name) == 0)
642 return &hi->data;
643 hi = hi->next;
644 }
645 return NULL;
646}
647
648/* grow hash if it becomes too big */
649static void hash_rebuild(xhash *hash)
650{
651 unsigned newsize, i, idx;
652 hash_item **newitems, *hi, *thi;
653
654 if (hash->nprime == ARRAY_SIZE(PRIMES))
655 return;
656
657 newsize = PRIMES[hash->nprime++];
658 newitems = xzalloc(newsize * sizeof(newitems[0]));
659
660 for (i = 0; i < hash->csize; i++) {
661 hi = hash->items[i];
662 while (hi) {
663 thi = hi;
664 hi = thi->next;
665 idx = hashidx(thi->name) % newsize;
666 thi->next = newitems[idx];
667 newitems[idx] = thi;
668 }
669 }
670
671 free(hash->items);
672 hash->csize = newsize;
673 hash->items = newitems;
674}
675
676/* find item in hash, add it if necessary. Return ptr to data */
677static void *hash_find(xhash *hash, const char *name)
678{
679 hash_item *hi;
680 unsigned idx;
681 int l;
682
683 hi = hash_search(hash, name);
684 if (!hi) {
685 if (++hash->nel / hash->csize > 10)
686 hash_rebuild(hash);
687
688 l = strlen(name) + 1;
689 hi = xzalloc(sizeof(*hi) + l);
690 strcpy(hi->name, name);
691
692 idx = hashidx(name) % hash->csize;
693 hi->next = hash->items[idx];
694 hash->items[idx] = hi;
695 hash->glen += l;
696 }
697 return &hi->data;
698}
699
700#define findvar(hash, name) ((var*) hash_find((hash), (name)))
701#define newvar(name) ((var*) hash_find(vhash, (name)))
702#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
703#define newfunc(name) ((func*) hash_find(fnhash, (name)))
704
705static void hash_remove(xhash *hash, const char *name)
706{
707 hash_item *hi, **phi;
708
709 phi = &hash->items[hashidx(name) % hash->csize];
710 while (*phi) {
711 hi = *phi;
712 if (strcmp(hi->name, name) == 0) {
713 hash->glen -= (strlen(name) + 1);
714 hash->nel--;
715 *phi = hi->next;
716 free(hi);
717 break;
718 }
719 phi = &hi->next;
720 }
721}
722
723/* ------ some useful functions ------ */
724
725static char *skip_spaces(char *p)
726{
727 while (1) {
728 if (*p == '\\' && p[1] == '\n') {
729 p++;
730 t_lineno++;
731 } else if (*p != ' ' && *p != '\t') {
732 break;
733 }
734 p++;
735 }
736 return p;
737}
738
739/* returns old *s, advances *s past word and terminating NUL */
740static char *nextword(char **s)
741{
742 char *p = *s;
743 while (*(*s)++ != '\0')
744 continue;
745 return p;
746}
747
748static char nextchar(char **s)
749{
750 char c, *pps;
751
752 c = *(*s)++;
753 pps = *s;
754 if (c == '\\')
755 c = bb_process_escape_sequence((const char**)s);
756 /* Example awk statement:
757 * s = "abc\"def"
758 * we must treat \" as "
759 */
760 if (c == '\\' && *s == pps) { /* unrecognized \z? */
761 c = *(*s); /* yes, fetch z */
762 if (c)
763 (*s)++; /* advance unless z = NUL */
764 }
765 return c;
766}
767
768/* TODO: merge with strcpy_and_process_escape_sequences()?
769 */
770static void unescape_string_in_place(char *s1)
771{
772 char *s = s1;
773 while ((*s1 = nextchar(&s)) != '\0')
774 s1++;
775}
776
777static ALWAYS_INLINE int isalnum_(int c)
778{
779 return (isalnum(c) || c == '_');
780}
781
782static double my_strtod(char **pp)
783{
784 char *cp = *pp;
785 if (ENABLE_DESKTOP && cp[0] == '0') {
786 /* Might be hex or octal integer: 0x123abc or 07777 */
787 char c = (cp[1] | 0x20);
788 if (c == 'x' || isdigit(cp[1])) {
789 unsigned long long ull = strtoull(cp, pp, 0);
790 if (c == 'x')
791 return ull;
792 c = **pp;
793 if (!isdigit(c) && c != '.')
794 return ull;
795 /* else: it may be a floating number. Examples:
796 * 009.123 (*pp points to '9')
797 * 000.123 (*pp points to '.')
798 * fall through to strtod.
799 */
800 }
801 }
802 return strtod(cp, pp);
803}
804
805/* -------- working with variables (set/get/copy/etc) -------- */
806
807static xhash *iamarray(var *v)
808{
809 var *a = v;
810
811 while (a->type & VF_CHILD)
812 a = a->x.parent;
813
814 if (!(a->type & VF_ARRAY)) {
815 a->type |= VF_ARRAY;
816 a->x.array = hash_init();
817 }
818 return a->x.array;
819}
820
821static void clear_array(xhash *array)
822{
823 unsigned i;
824 hash_item *hi, *thi;
825
826 for (i = 0; i < array->csize; i++) {
827 hi = array->items[i];
828 while (hi) {
829 thi = hi;
830 hi = hi->next;
831 free(thi->data.v.string);
832 free(thi);
833 }
834 array->items[i] = NULL;
835 }
836 array->glen = array->nel = 0;
837}
838
839/* clear a variable */
840static var *clrvar(var *v)
841{
842 if (!(v->type & VF_FSTR))
843 free(v->string);
844
845 v->type &= VF_DONTTOUCH;
846 v->type |= VF_DIRTY;
847 v->string = NULL;
848 return v;
849}
850
851/* assign string value to variable */
852static var *setvar_p(var *v, char *value)
853{
854 clrvar(v);
855 v->string = value;
856 handle_special(v);
857 return v;
858}
859
860/* same as setvar_p but make a copy of string */
861static var *setvar_s(var *v, const char *value)
862{
863 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
864}
865
866/* same as setvar_s but sets USER flag */
867static var *setvar_u(var *v, const char *value)
868{
869 v = setvar_s(v, value);
870 v->type |= VF_USER;
871 return v;
872}
873
874/* set array element to user string */
875static void setari_u(var *a, int idx, const char *s)
876{
877 var *v;
878
879 v = findvar(iamarray(a), itoa(idx));
880 setvar_u(v, s);
881}
882
883/* assign numeric value to variable */
884static var *setvar_i(var *v, double value)
885{
886 clrvar(v);
887 v->type |= VF_NUMBER;
888 v->number = value;
889 handle_special(v);
890 return v;
891}
892
893static const char *getvar_s(var *v)
894{
895 /* if v is numeric and has no cached string, convert it to string */
896 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
897 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
898 v->string = xstrdup(g_buf);
899 v->type |= VF_CACHED;
900 }
901 return (v->string == NULL) ? "" : v->string;
902}
903
904static double getvar_i(var *v)
905{
906 char *s;
907
908 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
909 v->number = 0;
910 s = v->string;
911 if (s && *s) {
912 debug_printf_eval("getvar_i: '%s'->", s);
913 v->number = my_strtod(&s);
914 debug_printf_eval("%f (s:'%s')\n", v->number, s);
915 if (v->type & VF_USER) {
916 s = skip_spaces(s);
917 if (*s != '\0')
918 v->type &= ~VF_USER;
919 }
920 } else {
921 debug_printf_eval("getvar_i: '%s'->zero\n", s);
922 v->type &= ~VF_USER;
923 }
924 v->type |= VF_CACHED;
925 }
926 debug_printf_eval("getvar_i: %f\n", v->number);
927 return v->number;
928}
929
930/* Used for operands of bitwise ops */
931static unsigned long getvar_i_int(var *v)
932{
933 double d = getvar_i(v);
934
935 /* Casting doubles to longs is undefined for values outside
936 * of target type range. Try to widen it as much as possible */
937 if (d >= 0)
938 return (unsigned long)d;
939 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
940 return - (long) (unsigned long) (-d);
941}
942
943static var *copyvar(var *dest, const var *src)
944{
945 if (dest != src) {
946 clrvar(dest);
947 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
948 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
949 dest->number = src->number;
950 if (src->string)
951 dest->string = xstrdup(src->string);
952 }
953 handle_special(dest);
954 return dest;
955}
956
957static var *incvar(var *v)
958{
959 return setvar_i(v, getvar_i(v) + 1.0);
960}
961
962/* return true if v is number or numeric string */
963static int is_numeric(var *v)
964{
965 getvar_i(v);
966 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
967}
968
969/* return 1 when value of v corresponds to true, 0 otherwise */
970static int istrue(var *v)
971{
972 if (is_numeric(v))
973 return (v->number != 0);
974 return (v->string && v->string[0]);
975}
976
977/* temporary variables allocator. Last allocated should be first freed */
978static var *nvalloc(int n)
979{
980 nvblock *pb = NULL;
981 var *v, *r;
982 int size;
983
984 while (g_cb) {
985 pb = g_cb;
986 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
987 break;
988 g_cb = g_cb->next;
989 }
990
991 if (!g_cb) {
992 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
993 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
994 g_cb->size = size;
995 g_cb->pos = g_cb->nv;
996 g_cb->prev = pb;
997 /*g_cb->next = NULL; - xzalloc did it */
998 if (pb)
999 pb->next = g_cb;
1000 }
1001
1002 v = r = g_cb->pos;
1003 g_cb->pos += n;
1004
1005 while (v < g_cb->pos) {
1006 v->type = 0;
1007 v->string = NULL;
1008 v++;
1009 }
1010
1011 return r;
1012}
1013
1014static void nvfree(var *v)
1015{
1016 var *p;
1017
1018 if (v < g_cb->nv || v >= g_cb->pos)
1019 syntax_error(EMSG_INTERNAL_ERROR);
1020
1021 for (p = v; p < g_cb->pos; p++) {
1022 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1023 clear_array(iamarray(p));
1024 free(p->x.array->items);
1025 free(p->x.array);
1026 }
1027 if (p->type & VF_WALK) {
1028 walker_list *n;
1029 walker_list *w = p->x.walker;
1030 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1031 p->x.walker = NULL;
1032 while (w) {
1033 n = w->prev;
1034 debug_printf_walker(" free(%p)\n", w);
1035 free(w);
1036 w = n;
1037 }
1038 }
1039 clrvar(p);
1040 }
1041
1042 g_cb->pos = v;
1043 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1044 g_cb = g_cb->prev;
1045 }
1046}
1047
1048/* ------- awk program text parsing ------- */
1049
1050/* Parse next token pointed by global pos, place results into global ttt.
1051 * If token isn't expected, give away. Return token class
1052 */
1053static uint32_t next_token(uint32_t expected)
1054{
1055#define concat_inserted (G.next_token__concat_inserted)
1056#define save_tclass (G.next_token__save_tclass)
1057#define save_info (G.next_token__save_info)
1058/* Initialized to TC_OPTERM: */
1059#define ltclass (G.next_token__ltclass)
1060
1061 char *p, *s;
1062 const char *tl;
1063 uint32_t tc;
1064 const uint32_t *ti;
1065
1066 if (t_rollback) {
1067 t_rollback = FALSE;
1068 } else if (concat_inserted) {
1069 concat_inserted = FALSE;
1070 t_tclass = save_tclass;
1071 t_info = save_info;
1072 } else {
1073 p = g_pos;
1074 readnext:
1075 p = skip_spaces(p);
1076 g_lineno = t_lineno;
1077 if (*p == '#')
1078 while (*p != '\n' && *p != '\0')
1079 p++;
1080
1081 if (*p == '\n')
1082 t_lineno++;
1083
1084 if (*p == '\0') {
1085 tc = TC_EOF;
1086 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1087 } else if (*p == '\"') {
1088 /* it's a string */
1089 t_string = s = ++p;
1090 while (*p != '\"') {
1091 char *pp;
1092 if (*p == '\0' || *p == '\n')
1093 syntax_error(EMSG_UNEXP_EOS);
1094 pp = p;
1095 *s++ = nextchar(&pp);
1096 p = pp;
1097 }
1098 p++;
1099 *s = '\0';
1100 tc = TC_STRING;
1101 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1102 } else if ((expected & TC_REGEXP) && *p == '/') {
1103 /* it's regexp */
1104 t_string = s = ++p;
1105 while (*p != '/') {
1106 if (*p == '\0' || *p == '\n')
1107 syntax_error(EMSG_UNEXP_EOS);
1108 *s = *p++;
1109 if (*s++ == '\\') {
1110 char *pp = p;
1111 s[-1] = bb_process_escape_sequence((const char **)&pp);
1112 if (*p == '\\')
1113 *s++ = '\\';
1114 if (pp == p)
1115 *s++ = *p++;
1116 else
1117 p = pp;
1118 }
1119 }
1120 p++;
1121 *s = '\0';
1122 tc = TC_REGEXP;
1123 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1124
1125 } else if (*p == '.' || isdigit(*p)) {
1126 /* it's a number */
1127 char *pp = p;
1128 t_double = my_strtod(&pp);
1129 p = pp;
1130 if (*p == '.')
1131 syntax_error(EMSG_UNEXP_TOKEN);
1132 tc = TC_NUMBER;
1133 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1134 } else {
1135 /* search for something known */
1136 tl = tokenlist;
1137 tc = 0x00000001;
1138 ti = tokeninfo;
1139 while (*tl) {
1140 int l = (unsigned char) *tl++;
1141 if (l == (unsigned char) NTCC) {
1142 tc <<= 1;
1143 continue;
1144 }
1145 /* if token class is expected,
1146 * token matches,
1147 * and it's not a longer word,
1148 */
1149 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150 && strncmp(p, tl, l) == 0
1151 && !((tc & TC_WORD) && isalnum_(p[l]))
1152 ) {
1153 /* then this is what we are looking for */
1154 t_info = *ti;
1155 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156 p += l;
1157 goto token_found;
1158 }
1159 ti++;
1160 tl += l;
1161 }
1162 /* not a known token */
1163
1164 /* is it a name? (var/array/function) */
1165 if (!isalnum_(*p))
1166 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167 /* yes */
1168 t_string = --p;
1169 while (isalnum_(*++p)) {
1170 p[-1] = *p;
1171 }
1172 p[-1] = '\0';
1173 tc = TC_VARIABLE;
1174 /* also consume whitespace between functionname and bracket */
1175 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176 p = skip_spaces(p);
1177 if (*p == '(') {
1178 tc = TC_FUNCTION;
1179 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180 } else {
1181 if (*p == '[') {
1182 p++;
1183 tc = TC_ARRAY;
1184 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185 } else
1186 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1187 }
1188 }
1189 token_found:
1190 g_pos = p;
1191
1192 /* skipping newlines in some cases */
1193 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194 goto readnext;
1195
1196 /* insert concatenation operator when needed */
1197 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198 concat_inserted = TRUE;
1199 save_tclass = tc;
1200 save_info = t_info;
1201 tc = TC_BINOP;
1202 t_info = OC_CONCAT | SS | P(35);
1203 }
1204
1205 t_tclass = tc;
1206 }
1207 ltclass = t_tclass;
1208
1209 /* Are we ready for this? */
1210 if (!(ltclass & expected)) {
1211 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1213 }
1214
1215 return ltclass;
1216#undef concat_inserted
1217#undef save_tclass
1218#undef save_info
1219#undef ltclass
1220}
1221
1222static void rollback_token(void)
1223{
1224 t_rollback = TRUE;
1225}
1226
1227static node *new_node(uint32_t info)
1228{
1229 node *n;
1230
1231 n = xzalloc(sizeof(node));
1232 n->info = info;
1233 n->lineno = g_lineno;
1234 return n;
1235}
1236
1237static void mk_re_node(const char *s, node *n, regex_t *re)
1238{
1239 n->info = OC_REGEXP;
1240 n->l.re = re;
1241 n->r.ire = re + 1;
1242 xregcomp(re, s, REG_EXTENDED);
1243 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1244}
1245
1246static node *condition(void)
1247{
1248 next_token(TC_SEQSTART);
1249 return parse_expr(TC_SEQTERM);
1250}
1251
1252/* parse expression terminated by given argument, return ptr
1253 * to built subtree. Terminator is eaten by parse_expr */
1254static node *parse_expr(uint32_t iexp)
1255{
1256 node sn;
1257 node *cn = &sn;
1258 node *vn, *glptr;
1259 uint32_t tc, xtc;
1260 var *v;
1261
1262 debug_printf_parse("%s(%x)\n", __func__, iexp);
1263
1264 sn.info = PRIMASK;
1265 sn.r.n = glptr = NULL;
1266 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1267
1268 while (!((tc = next_token(xtc)) & iexp)) {
1269
1270 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1271 /* input redirection (<) attached to glptr node */
1272 debug_printf_parse("%s: input redir\n", __func__);
1273 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1274 cn->a.n = glptr;
1275 xtc = TC_OPERAND | TC_UOPPRE;
1276 glptr = NULL;
1277
1278 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1279 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1280 /* for binary and postfix-unary operators, jump back over
1281 * previous operators with higher priority */
1282 vn = cn;
1283 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1284 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1285 ) {
1286 vn = vn->a.n;
1287 }
1288 if ((t_info & OPCLSMASK) == OC_TERNARY)
1289 t_info += P(6);
1290 cn = vn->a.n->r.n = new_node(t_info);
1291 cn->a.n = vn->a.n;
1292 if (tc & TC_BINOP) {
1293 cn->l.n = vn;
1294 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1295 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1296 /* it's a pipe */
1297 next_token(TC_GETLINE);
1298 /* give maximum priority to this pipe */
1299 cn->info &= ~PRIMASK;
1300 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1301 }
1302 } else {
1303 cn->r.n = vn;
1304 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1305 }
1306 vn->a.n = cn;
1307
1308 } else {
1309 debug_printf_parse("%s: other\n", __func__);
1310 /* for operands and prefix-unary operators, attach them
1311 * to last node */
1312 vn = cn;
1313 cn = vn->r.n = new_node(t_info);
1314 cn->a.n = vn;
1315 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1316 if (tc & (TC_OPERAND | TC_REGEXP)) {
1317 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1318 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1319 /* one should be very careful with switch on tclass -
1320 * only simple tclasses should be used! */
1321 switch (tc) {
1322 case TC_VARIABLE:
1323 case TC_ARRAY:
1324 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1325 cn->info = OC_VAR;
1326 v = hash_search(ahash, t_string);
1327 if (v != NULL) {
1328 cn->info = OC_FNARG;
1329 cn->l.aidx = v->x.aidx;
1330 } else {
1331 cn->l.v = newvar(t_string);
1332 }
1333 if (tc & TC_ARRAY) {
1334 cn->info |= xS;
1335 cn->r.n = parse_expr(TC_ARRTERM);
1336 }
1337 break;
1338
1339 case TC_NUMBER:
1340 case TC_STRING:
1341 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1342 cn->info = OC_VAR;
1343 v = cn->l.v = xzalloc(sizeof(var));
1344 if (tc & TC_NUMBER)
1345 setvar_i(v, t_double);
1346 else
1347 setvar_s(v, t_string);
1348 break;
1349
1350 case TC_REGEXP:
1351 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1352 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1353 break;
1354
1355 case TC_FUNCTION:
1356 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1357 cn->info = OC_FUNC;
1358 cn->r.f = newfunc(t_string);
1359 cn->l.n = condition();
1360 break;
1361
1362 case TC_SEQSTART:
1363 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1364 cn = vn->r.n = parse_expr(TC_SEQTERM);
1365 if (!cn)
1366 syntax_error("Empty sequence");
1367 cn->a.n = vn;
1368 break;
1369
1370 case TC_GETLINE:
1371 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1372 glptr = cn;
1373 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1374 break;
1375
1376 case TC_BUILTIN:
1377 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1378 cn->l.n = condition();
1379 break;
1380
1381 case TC_LENGTH:
1382 debug_printf_parse("%s: TC_LENGTH\n", __func__);
1383 next_token(TC_SEQSTART | TC_OPTERM | TC_GRPTERM);
1384 rollback_token();
1385 if (t_tclass & TC_SEQSTART) {
1386 /* It was a "(" token. Handle just like TC_BUILTIN */
1387 cn->l.n = condition();
1388 }
1389 break;
1390 }
1391 }
1392 }
1393 }
1394
1395 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1396 return sn.r.n;
1397}
1398
1399/* add node to chain. Return ptr to alloc'd node */
1400static node *chain_node(uint32_t info)
1401{
1402 node *n;
1403
1404 if (!seq->first)
1405 seq->first = seq->last = new_node(0);
1406
1407 if (seq->programname != g_progname) {
1408 seq->programname = g_progname;
1409 n = chain_node(OC_NEWSOURCE);
1410 n->l.new_progname = xstrdup(g_progname);
1411 }
1412
1413 n = seq->last;
1414 n->info = info;
1415 seq->last = n->a.n = new_node(OC_DONE);
1416
1417 return n;
1418}
1419
1420static void chain_expr(uint32_t info)
1421{
1422 node *n;
1423
1424 n = chain_node(info);
1425 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1426 if (t_tclass & TC_GRPTERM)
1427 rollback_token();
1428}
1429
1430static node *chain_loop(node *nn)
1431{
1432 node *n, *n2, *save_brk, *save_cont;
1433
1434 save_brk = break_ptr;
1435 save_cont = continue_ptr;
1436
1437 n = chain_node(OC_BR | Vx);
1438 continue_ptr = new_node(OC_EXEC);
1439 break_ptr = new_node(OC_EXEC);
1440 chain_group();
1441 n2 = chain_node(OC_EXEC | Vx);
1442 n2->l.n = nn;
1443 n2->a.n = n;
1444 continue_ptr->a.n = n2;
1445 break_ptr->a.n = n->r.n = seq->last;
1446
1447 continue_ptr = save_cont;
1448 break_ptr = save_brk;
1449
1450 return n;
1451}
1452
1453/* parse group and attach it to chain */
1454static void chain_group(void)
1455{
1456 uint32_t c;
1457 node *n, *n2, *n3;
1458
1459 do {
1460 c = next_token(TC_GRPSEQ);
1461 } while (c & TC_NEWLINE);
1462
1463 if (c & TC_GRPSTART) {
1464 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1465 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1466 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1467 if (t_tclass & TC_NEWLINE)
1468 continue;
1469 rollback_token();
1470 chain_group();
1471 }
1472 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1473 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1474 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1475 rollback_token();
1476 chain_expr(OC_EXEC | Vx);
1477 } else {
1478 /* TC_STATEMNT */
1479 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1480 switch (t_info & OPCLSMASK) {
1481 case ST_IF:
1482 debug_printf_parse("%s: ST_IF\n", __func__);
1483 n = chain_node(OC_BR | Vx);
1484 n->l.n = condition();
1485 chain_group();
1486 n2 = chain_node(OC_EXEC);
1487 n->r.n = seq->last;
1488 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1489 chain_group();
1490 n2->a.n = seq->last;
1491 } else {
1492 rollback_token();
1493 }
1494 break;
1495
1496 case ST_WHILE:
1497 debug_printf_parse("%s: ST_WHILE\n", __func__);
1498 n2 = condition();
1499 n = chain_loop(NULL);
1500 n->l.n = n2;
1501 break;
1502
1503 case ST_DO:
1504 debug_printf_parse("%s: ST_DO\n", __func__);
1505 n2 = chain_node(OC_EXEC);
1506 n = chain_loop(NULL);
1507 n2->a.n = n->a.n;
1508 next_token(TC_WHILE);
1509 n->l.n = condition();
1510 break;
1511
1512 case ST_FOR:
1513 debug_printf_parse("%s: ST_FOR\n", __func__);
1514 next_token(TC_SEQSTART);
1515 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1516 if (t_tclass & TC_SEQTERM) { /* for-in */
1517 if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
1518 syntax_error(EMSG_UNEXP_TOKEN);
1519 n = chain_node(OC_WALKINIT | VV);
1520 n->l.n = n2->l.n;
1521 n->r.n = n2->r.n;
1522 n = chain_loop(NULL);
1523 n->info = OC_WALKNEXT | Vx;
1524 n->l.n = n2->l.n;
1525 } else { /* for (;;) */
1526 n = chain_node(OC_EXEC | Vx);
1527 n->l.n = n2;
1528 n2 = parse_expr(TC_SEMICOL);
1529 n3 = parse_expr(TC_SEQTERM);
1530 n = chain_loop(n3);
1531 n->l.n = n2;
1532 if (!n2)
1533 n->info = OC_EXEC;
1534 }
1535 break;
1536
1537 case OC_PRINT:
1538 case OC_PRINTF:
1539 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1540 n = chain_node(t_info);
1541 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1542 if (t_tclass & TC_OUTRDR) {
1543 n->info |= t_info;
1544 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1545 }
1546 if (t_tclass & TC_GRPTERM)
1547 rollback_token();
1548 break;
1549
1550 case OC_BREAK:
1551 debug_printf_parse("%s: OC_BREAK\n", __func__);
1552 n = chain_node(OC_EXEC);
1553 n->a.n = break_ptr;
1554 chain_expr(t_info);
1555 break;
1556
1557 case OC_CONTINUE:
1558 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1559 n = chain_node(OC_EXEC);
1560 n->a.n = continue_ptr;
1561 chain_expr(t_info);
1562 break;
1563
1564 /* delete, next, nextfile, return, exit */
1565 default:
1566 debug_printf_parse("%s: default\n", __func__);
1567 chain_expr(t_info);
1568 }
1569 }
1570}
1571
1572static void parse_program(char *p)
1573{
1574 uint32_t tclass;
1575 node *cn;
1576 func *f;
1577 var *v;
1578
1579 g_pos = p;
1580 t_lineno = 1;
1581 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1582 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1583
1584 if (tclass & TC_OPTERM) {
1585 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1586 continue;
1587 }
1588
1589 seq = &mainseq;
1590 if (tclass & TC_BEGIN) {
1591 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1592 seq = &beginseq;
1593 chain_group();
1594 } else if (tclass & TC_END) {
1595 debug_printf_parse("%s: TC_END\n", __func__);
1596 seq = &endseq;
1597 chain_group();
1598 } else if (tclass & TC_FUNCDECL) {
1599 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1600 next_token(TC_FUNCTION);
1601 g_pos++;
1602 f = newfunc(t_string);
1603 f->body.first = NULL;
1604 f->nargs = 0;
1605 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1606 v = findvar(ahash, t_string);
1607 v->x.aidx = f->nargs++;
1608
1609 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1610 break;
1611 }
1612 seq = &f->body;
1613 chain_group();
1614 clear_array(ahash);
1615 } else if (tclass & TC_OPSEQ) {
1616 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1617 rollback_token();
1618 cn = chain_node(OC_TEST);
1619 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1620 if (t_tclass & TC_GRPSTART) {
1621 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1622 rollback_token();
1623 chain_group();
1624 } else {
1625 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1626 chain_node(OC_PRINT);
1627 }
1628 cn->r.n = mainseq.last;
1629 } else /* if (tclass & TC_GRPSTART) */ {
1630 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1631 rollback_token();
1632 chain_group();
1633 }
1634 }
1635 debug_printf_parse("%s: TC_EOF\n", __func__);
1636}
1637
1638
1639/* -------- program execution part -------- */
1640
1641static node *mk_splitter(const char *s, tsplitter *spl)
1642{
1643 regex_t *re, *ire;
1644 node *n;
1645
1646 re = &spl->re[0];
1647 ire = &spl->re[1];
1648 n = &spl->n;
1649 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1650 regfree(re);
1651 regfree(ire); // TODO: nuke ire, use re+1?
1652 }
1653 if (s[0] && s[1]) { /* strlen(s) > 1 */
1654 mk_re_node(s, n, re);
1655 } else {
1656 n->info = (uint32_t) s[0];
1657 }
1658
1659 return n;
1660}
1661
1662/* use node as a regular expression. Supplied with node ptr and regex_t
1663 * storage space. Return ptr to regex (if result points to preg, it should
1664 * be later regfree'd manually
1665 */
1666static regex_t *as_regex(node *op, regex_t *preg)
1667{
1668 int cflags;
1669 var *v;
1670 const char *s;
1671
1672 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1673 return icase ? op->r.ire : op->l.re;
1674 }
1675 v = nvalloc(1);
1676 s = getvar_s(evaluate(op, v));
1677
1678 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1679 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1680 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1681 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1682 * (maybe gsub is not supposed to use REG_EXTENDED?).
1683 */
1684 if (regcomp(preg, s, cflags)) {
1685 cflags &= ~REG_EXTENDED;
1686 xregcomp(preg, s, cflags);
1687 }
1688 nvfree(v);
1689 return preg;
1690}
1691
1692/* gradually increasing buffer.
1693 * note that we reallocate even if n == old_size,
1694 * and thus there is at least one extra allocated byte.
1695 */
1696static char* qrealloc(char *b, int n, int *size)
1697{
1698 if (!b || n >= *size) {
1699 *size = n + (n>>1) + 80;
1700 b = xrealloc(b, *size);
1701 }
1702 return b;
1703}
1704
1705/* resize field storage space */
1706static void fsrealloc(int size)
1707{
1708 int i;
1709
1710 if (size >= maxfields) {
1711 i = maxfields;
1712 maxfields = size + 16;
1713 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1714 for (; i < maxfields; i++) {
1715 Fields[i].type = VF_SPECIAL;
1716 Fields[i].string = NULL;
1717 }
1718 }
1719 /* if size < nfields, clear extra field variables */
1720 for (i = size; i < nfields; i++) {
1721 clrvar(Fields + i);
1722 }
1723 nfields = size;
1724}
1725
1726static int awk_split(const char *s, node *spl, char **slist)
1727{
1728 int l, n;
1729 char c[4];
1730 char *s1;
1731 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1732
1733 /* in worst case, each char would be a separate field */
1734 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1735 strcpy(s1, s);
1736
1737 c[0] = c[1] = (char)spl->info;
1738 c[2] = c[3] = '\0';
1739 if (*getvar_s(intvar[RS]) == '\0')
1740 c[2] = '\n';
1741
1742 n = 0;
1743 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1744 if (!*s)
1745 return n; /* "": zero fields */
1746 n++; /* at least one field will be there */
1747 do {
1748 l = strcspn(s, c+2); /* len till next NUL or \n */
1749 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1750 && pmatch[0].rm_so <= l
1751 ) {
1752 l = pmatch[0].rm_so;
1753 if (pmatch[0].rm_eo == 0) {
1754 l++;
1755 pmatch[0].rm_eo++;
1756 }
1757 n++; /* we saw yet another delimiter */
1758 } else {
1759 pmatch[0].rm_eo = l;
1760 if (s[l])
1761 pmatch[0].rm_eo++;
1762 }
1763 memcpy(s1, s, l);
1764 /* make sure we remove *all* of the separator chars */
1765 do {
1766 s1[l] = '\0';
1767 } while (++l < pmatch[0].rm_eo);
1768 nextword(&s1);
1769 s += pmatch[0].rm_eo;
1770 } while (*s);
1771 return n;
1772 }
1773 if (c[0] == '\0') { /* null split */
1774 while (*s) {
1775 *s1++ = *s++;
1776 *s1++ = '\0';
1777 n++;
1778 }
1779 return n;
1780 }
1781 if (c[0] != ' ') { /* single-character split */
1782 if (icase) {
1783 c[0] = toupper(c[0]);
1784 c[1] = tolower(c[1]);
1785 }
1786 if (*s1)
1787 n++;
1788 while ((s1 = strpbrk(s1, c)) != NULL) {
1789 *s1++ = '\0';
1790 n++;
1791 }
1792 return n;
1793 }
1794 /* space split */
1795 while (*s) {
1796 s = skip_whitespace(s);
1797 if (!*s)
1798 break;
1799 n++;
1800 while (*s && !isspace(*s))
1801 *s1++ = *s++;
1802 *s1++ = '\0';
1803 }
1804 return n;
1805}
1806
1807static void split_f0(void)
1808{
1809/* static char *fstrings; */
1810#define fstrings (G.split_f0__fstrings)
1811
1812 int i, n;
1813 char *s;
1814
1815 if (is_f0_split)
1816 return;
1817
1818 is_f0_split = TRUE;
1819 free(fstrings);
1820 fsrealloc(0);
1821 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1822 fsrealloc(n);
1823 s = fstrings;
1824 for (i = 0; i < n; i++) {
1825 Fields[i].string = nextword(&s);
1826 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1827 }
1828
1829 /* set NF manually to avoid side effects */
1830 clrvar(intvar[NF]);
1831 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1832 intvar[NF]->number = nfields;
1833#undef fstrings
1834}
1835
1836/* perform additional actions when some internal variables changed */
1837static void handle_special(var *v)
1838{
1839 int n;
1840 char *b;
1841 const char *sep, *s;
1842 int sl, l, len, i, bsize;
1843
1844 if (!(v->type & VF_SPECIAL))
1845 return;
1846
1847 if (v == intvar[NF]) {
1848 n = (int)getvar_i(v);
1849 fsrealloc(n);
1850
1851 /* recalculate $0 */
1852 sep = getvar_s(intvar[OFS]);
1853 sl = strlen(sep);
1854 b = NULL;
1855 len = 0;
1856 for (i = 0; i < n; i++) {
1857 s = getvar_s(&Fields[i]);
1858 l = strlen(s);
1859 if (b) {
1860 memcpy(b+len, sep, sl);
1861 len += sl;
1862 }
1863 b = qrealloc(b, len+l+sl, &bsize);
1864 memcpy(b+len, s, l);
1865 len += l;
1866 }
1867 if (b)
1868 b[len] = '\0';
1869 setvar_p(intvar[F0], b);
1870 is_f0_split = TRUE;
1871
1872 } else if (v == intvar[F0]) {
1873 is_f0_split = FALSE;
1874
1875 } else if (v == intvar[FS]) {
1876 /*
1877 * The POSIX-2008 standard says that changing FS should have no effect on the
1878 * current input line, but only on the next one. The language is:
1879 *
1880 * > Before the first reference to a field in the record is evaluated, the record
1881 * > shall be split into fields, according to the rules in Regular Expressions,
1882 * > using the value of FS that was current at the time the record was read.
1883 *
1884 * So, split up current line before assignment to FS:
1885 */
1886 split_f0();
1887
1888 mk_splitter(getvar_s(v), &fsplitter);
1889 } else if (v == intvar[RS]) {
1890 mk_splitter(getvar_s(v), &rsplitter);
1891 } else if (v == intvar[IGNORECASE]) {
1892 icase = istrue(v);
1893 } else { /* $n */
1894 n = getvar_i(intvar[NF]);
1895 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1896 /* right here v is invalid. Just to note... */
1897 }
1898}
1899
1900/* step through func/builtin/etc arguments */
1901static node *nextarg(node **pn)
1902{
1903 node *n;
1904
1905 n = *pn;
1906 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1907 *pn = n->r.n;
1908 n = n->l.n;
1909 } else {
1910 *pn = NULL;
1911 }
1912 return n;
1913}
1914
1915static void hashwalk_init(var *v, xhash *array)
1916{
1917 hash_item *hi;
1918 unsigned i;
1919 walker_list *w;
1920 walker_list *prev_walker;
1921
1922 if (v->type & VF_WALK) {
1923 prev_walker = v->x.walker;
1924 } else {
1925 v->type |= VF_WALK;
1926 prev_walker = NULL;
1927 }
1928 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1929
1930 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1931 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1932 w->cur = w->end = w->wbuf;
1933 w->prev = prev_walker;
1934 for (i = 0; i < array->csize; i++) {
1935 hi = array->items[i];
1936 while (hi) {
1937 strcpy(w->end, hi->name);
1938 nextword(&w->end);
1939 hi = hi->next;
1940 }
1941 }
1942}
1943
1944static int hashwalk_next(var *v)
1945{
1946 walker_list *w = v->x.walker;
1947
1948 if (w->cur >= w->end) {
1949 walker_list *prev_walker = w->prev;
1950
1951 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1952 free(w);
1953 v->x.walker = prev_walker;
1954 return FALSE;
1955 }
1956
1957 setvar_s(v, nextword(&w->cur));
1958 return TRUE;
1959}
1960
1961/* evaluate node, return 1 when result is true, 0 otherwise */
1962static int ptest(node *pattern)
1963{
1964 /* ptest__v is "static": to save stack space? */
1965 return istrue(evaluate(pattern, &G.ptest__v));
1966}
1967
1968/* read next record from stream rsm into a variable v */
1969static int awk_getline(rstream *rsm, var *v)
1970{
1971 char *b;
1972 regmatch_t pmatch[2];
1973 int size, a, p, pp = 0;
1974 int fd, so, eo, r, rp;
1975 char c, *m, *s;
1976
1977 debug_printf_eval("entered %s()\n", __func__);
1978
1979 /* we're using our own buffer since we need access to accumulating
1980 * characters
1981 */
1982 fd = fileno(rsm->F);
1983 m = rsm->buffer;
1984 a = rsm->adv;
1985 p = rsm->pos;
1986 size = rsm->size;
1987 c = (char) rsplitter.n.info;
1988 rp = 0;
1989
1990 if (!m)
1991 m = qrealloc(m, 256, &size);
1992
1993 do {
1994 b = m + a;
1995 so = eo = p;
1996 r = 1;
1997 if (p > 0) {
1998 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1999 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2000 b, 1, pmatch, 0) == 0) {
2001 so = pmatch[0].rm_so;
2002 eo = pmatch[0].rm_eo;
2003 if (b[eo] != '\0')
2004 break;
2005 }
2006 } else if (c != '\0') {
2007 s = strchr(b+pp, c);
2008 if (!s)
2009 s = memchr(b+pp, '\0', p - pp);
2010 if (s) {
2011 so = eo = s-b;
2012 eo++;
2013 break;
2014 }
2015 } else {
2016 while (b[rp] == '\n')
2017 rp++;
2018 s = strstr(b+rp, "\n\n");
2019 if (s) {
2020 so = eo = s-b;
2021 while (b[eo] == '\n')
2022 eo++;
2023 if (b[eo] != '\0')
2024 break;
2025 }
2026 }
2027 }
2028
2029 if (a > 0) {
2030 memmove(m, m+a, p+1);
2031 b = m;
2032 a = 0;
2033 }
2034
2035 m = qrealloc(m, a+p+128, &size);
2036 b = m + a;
2037 pp = p;
2038 p += safe_read(fd, b+p, size-p-1);
2039 if (p < pp) {
2040 p = 0;
2041 r = 0;
2042 setvar_i(intvar[ERRNO], errno);
2043 }
2044 b[p] = '\0';
2045
2046 } while (p > pp);
2047
2048 if (p == 0) {
2049 r--;
2050 } else {
2051 c = b[so]; b[so] = '\0';
2052 setvar_s(v, b+rp);
2053 v->type |= VF_USER;
2054 b[so] = c;
2055 c = b[eo]; b[eo] = '\0';
2056 setvar_s(intvar[RT], b+so);
2057 b[eo] = c;
2058 }
2059
2060 rsm->buffer = m;
2061 rsm->adv = a + eo;
2062 rsm->pos = p - eo;
2063 rsm->size = size;
2064
2065 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2066
2067 return r;
2068}
2069
2070static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2071{
2072 int r = 0;
2073 char c;
2074 const char *s = format;
2075
2076 if (int_as_int && n == (long long)n) {
2077 r = snprintf(b, size, "%lld", (long long)n);
2078 } else {
2079 do { c = *s; } while (c && *++s);
2080 if (strchr("diouxX", c)) {
2081 r = snprintf(b, size, format, (int)n);
2082 } else if (strchr("eEfgG", c)) {
2083 r = snprintf(b, size, format, n);
2084 } else {
2085 syntax_error(EMSG_INV_FMT);
2086 }
2087 }
2088 return r;
2089}
2090
2091/* formatted output into an allocated buffer, return ptr to buffer */
2092static char *awk_printf(node *n)
2093{
2094 char *b = NULL;
2095 char *fmt, *s, *f;
2096 const char *s1;
2097 int i, j, incr, bsize = 0;
2098 char c, c1;
2099 var *v, *arg;
2100
2101 v = nvalloc(1);
2102 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2103
2104 i = 0;
2105 while (*f) {
2106 s = f;
2107 while (*f && (*f != '%' || *++f == '%'))
2108 f++;
2109 while (*f && !isalpha(*f)) {
2110 if (*f == '*')
2111 syntax_error("%*x formats are not supported");
2112 f++;
2113 }
2114
2115 incr = (f - s) + MAXVARFMT;
2116 b = qrealloc(b, incr + i, &bsize);
2117 c = *f;
2118 if (c != '\0')
2119 f++;
2120 c1 = *f;
2121 *f = '\0';
2122 arg = evaluate(nextarg(&n), v);
2123
2124 j = i;
2125 if (c == 'c' || !c) {
2126 i += sprintf(b+i, s, is_numeric(arg) ?
2127 (char)getvar_i(arg) : *getvar_s(arg));
2128 } else if (c == 's') {
2129 s1 = getvar_s(arg);
2130 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2131 i += sprintf(b+i, s, s1);
2132 } else {
2133 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2134 }
2135 *f = c1;
2136
2137 /* if there was an error while sprintf, return value is negative */
2138 if (i < j)
2139 i = j;
2140 }
2141
2142 free(fmt);
2143 nvfree(v);
2144 b = xrealloc(b, i + 1);
2145 b[i] = '\0';
2146 return b;
2147}
2148
2149/* Common substitution routine.
2150 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2151 * store result into (dest), return number of substitutions.
2152 * If nm = 0, replace all matches.
2153 * If src or dst is NULL, use $0.
2154 * If subexp != 0, enable subexpression matching (\1-\9).
2155 */
2156static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2157{
2158 char *resbuf;
2159 const char *sp;
2160 int match_no, residx, replen, resbufsize;
2161 int regexec_flags;
2162 regmatch_t pmatch[10];
2163 regex_t sreg, *regex;
2164
2165 resbuf = NULL;
2166 residx = 0;
2167 match_no = 0;
2168 regexec_flags = 0;
2169 regex = as_regex(rn, &sreg);
2170 sp = getvar_s(src ? src : intvar[F0]);
2171 replen = strlen(repl);
2172 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2173 int so = pmatch[0].rm_so;
2174 int eo = pmatch[0].rm_eo;
2175
2176 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2177 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2178 memcpy(resbuf + residx, sp, eo);
2179 residx += eo;
2180 if (++match_no >= nm) {
2181 const char *s;
2182 int nbs;
2183
2184 /* replace */
2185 residx -= (eo - so);
2186 nbs = 0;
2187 for (s = repl; *s; s++) {
2188 char c = resbuf[residx++] = *s;
2189 if (c == '\\') {
2190 nbs++;
2191 continue;
2192 }
2193 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2194 int j;
2195 residx -= ((nbs + 3) >> 1);
2196 j = 0;
2197 if (c != '&') {
2198 j = c - '0';
2199 nbs++;
2200 }
2201 if (nbs % 2) {
2202 resbuf[residx++] = c;
2203 } else {
2204 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2205 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2206 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2207 residx += n;
2208 }
2209 }
2210 nbs = 0;
2211 }
2212 }
2213
2214 regexec_flags = REG_NOTBOL;
2215 sp += eo;
2216 if (match_no == nm)
2217 break;
2218 if (eo == so) {
2219 /* Empty match (e.g. "b*" will match anywhere).
2220 * Advance by one char. */
2221//BUG (bug 1333):
2222//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2223//... and will erroneously match "b" even though it is NOT at the word start.
2224//we need REG_NOTBOW but it does not exist...
2225//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2226//it should be able to do it correctly.
2227 /* Subtle: this is safe only because
2228 * qrealloc allocated at least one extra byte */
2229 resbuf[residx] = *sp;
2230 if (*sp == '\0')
2231 goto ret;
2232 sp++;
2233 residx++;
2234 }
2235 }
2236
2237 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2238 strcpy(resbuf + residx, sp);
2239 ret:
2240 //bb_error_msg("end sp:'%s'%p", sp,sp);
2241 setvar_p(dest ? dest : intvar[F0], resbuf);
2242 if (regex == &sreg)
2243 regfree(regex);
2244 return match_no;
2245}
2246
2247static NOINLINE int do_mktime(const char *ds)
2248{
2249 struct tm then;
2250 int count;
2251
2252 /*memset(&then, 0, sizeof(then)); - not needed */
2253 then.tm_isdst = -1; /* default is unknown */
2254
2255 /* manpage of mktime says these fields are ints,
2256 * so we can sscanf stuff directly into them */
2257 count = sscanf(ds, "%u %u %u %u %u %u %d",
2258 &then.tm_year, &then.tm_mon, &then.tm_mday,
2259 &then.tm_hour, &then.tm_min, &then.tm_sec,
2260 &then.tm_isdst);
2261
2262 if (count < 6
2263 || (unsigned)then.tm_mon < 1
2264 || (unsigned)then.tm_year < 1900
2265 ) {
2266 return -1;
2267 }
2268
2269 then.tm_mon -= 1;
2270 then.tm_year -= 1900;
2271
2272 return mktime(&then);
2273}
2274
2275static NOINLINE var *exec_builtin(node *op, var *res)
2276{
2277#define tspl (G.exec_builtin__tspl)
2278
2279 var *tv;
2280 node *an[4];
2281 var *av[4];
2282 const char *as[4];
2283 regmatch_t pmatch[2];
2284 regex_t sreg, *re;
2285 node *spl;
2286 uint32_t isr, info;
2287 int nargs;
2288 time_t tt;
2289 int i, l, ll, n;
2290
2291 tv = nvalloc(4);
2292 isr = info = op->info;
2293 op = op->l.n;
2294
2295 av[2] = av[3] = NULL;
2296 for (i = 0; i < 4 && op; i++) {
2297 an[i] = nextarg(&op);
2298 if (isr & 0x09000000)
2299 av[i] = evaluate(an[i], &tv[i]);
2300 if (isr & 0x08000000)
2301 as[i] = getvar_s(av[i]);
2302 isr >>= 1;
2303 }
2304
2305 nargs = i;
2306 if ((uint32_t)nargs < (info >> 30))
2307 syntax_error(EMSG_TOO_FEW_ARGS);
2308
2309 info &= OPNMASK;
2310 switch (info) {
2311
2312 case B_a2:
2313 if (ENABLE_FEATURE_AWK_LIBM)
2314 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2315 else
2316 syntax_error(EMSG_NO_MATH);
2317 break;
2318
2319 case B_sp: {
2320 char *s, *s1;
2321
2322 if (nargs > 2) {
2323 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2324 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2325 } else {
2326 spl = &fsplitter.n;
2327 }
2328
2329 n = awk_split(as[0], spl, &s);
2330 s1 = s;
2331 clear_array(iamarray(av[1]));
2332 for (i = 1; i <= n; i++)
2333 setari_u(av[1], i, nextword(&s));
2334 free(s1);
2335 setvar_i(res, n);
2336 break;
2337 }
2338
2339 case B_ss: {
2340 char *s;
2341
2342 l = strlen(as[0]);
2343 i = getvar_i(av[1]) - 1;
2344 if (i > l)
2345 i = l;
2346 if (i < 0)
2347 i = 0;
2348 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2349 if (n < 0)
2350 n = 0;
2351 s = xstrndup(as[0]+i, n);
2352 setvar_p(res, s);
2353 break;
2354 }
2355
2356 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2357 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2358 case B_an:
2359 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2360 break;
2361
2362 case B_co:
2363 setvar_i(res, ~getvar_i_int(av[0]));
2364 break;
2365
2366 case B_ls:
2367 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2368 break;
2369
2370 case B_or:
2371 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2372 break;
2373
2374 case B_rs:
2375 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2376 break;
2377
2378 case B_xo:
2379 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2380 break;
2381
2382 case B_lo:
2383 case B_up: {
2384 char *s, *s1;
2385 s1 = s = xstrdup(as[0]);
2386 while (*s1) {
2387 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2388 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2389 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2390 s1++;
2391 }
2392 setvar_p(res, s);
2393 break;
2394 }
2395
2396 case B_ix:
2397 n = 0;
2398 ll = strlen(as[1]);
2399 l = strlen(as[0]) - ll;
2400 if (ll > 0 && l >= 0) {
2401 if (!icase) {
2402 char *s = strstr(as[0], as[1]);
2403 if (s)
2404 n = (s - as[0]) + 1;
2405 } else {
2406 /* this piece of code is terribly slow and
2407 * really should be rewritten
2408 */
2409 for (i = 0; i <= l; i++) {
2410 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2411 n = i+1;
2412 break;
2413 }
2414 }
2415 }
2416 }
2417 setvar_i(res, n);
2418 break;
2419
2420 case B_ti:
2421 if (nargs > 1)
2422 tt = getvar_i(av[1]);
2423 else
2424 time(&tt);
2425 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2426 i = strftime(g_buf, MAXVARFMT,
2427 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2428 localtime(&tt));
2429 g_buf[i] = '\0';
2430 setvar_s(res, g_buf);
2431 break;
2432
2433 case B_mt:
2434 setvar_i(res, do_mktime(as[0]));
2435 break;
2436
2437 case B_ma:
2438 re = as_regex(an[1], &sreg);
2439 n = regexec(re, as[0], 1, pmatch, 0);
2440 if (n == 0) {
2441 pmatch[0].rm_so++;
2442 pmatch[0].rm_eo++;
2443 } else {
2444 pmatch[0].rm_so = 0;
2445 pmatch[0].rm_eo = -1;
2446 }
2447 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2448 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2449 setvar_i(res, pmatch[0].rm_so);
2450 if (re == &sreg)
2451 regfree(re);
2452 break;
2453
2454 case B_ge:
2455 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2456 break;
2457
2458 case B_gs:
2459 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2460 break;
2461
2462 case B_su:
2463 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2464 break;
2465 }
2466
2467 nvfree(tv);
2468 return res;
2469#undef tspl
2470}
2471
2472/*
2473 * Evaluate node - the heart of the program. Supplied with subtree
2474 * and place where to store result. returns ptr to result.
2475 */
2476#define XC(n) ((n) >> 8)
2477
2478static var *evaluate(node *op, var *res)
2479{
2480/* This procedure is recursive so we should count every byte */
2481#define fnargs (G.evaluate__fnargs)
2482/* seed is initialized to 1 */
2483#define seed (G.evaluate__seed)
2484#define sreg (G.evaluate__sreg)
2485
2486 var *v1;
2487
2488 if (!op)
2489 return setvar_s(res, NULL);
2490
2491 debug_printf_eval("entered %s()\n", __func__);
2492
2493 v1 = nvalloc(2);
2494
2495 while (op) {
2496 struct {
2497 var *v;
2498 const char *s;
2499 } L = { NULL, NULL };
2500 struct {
2501 var *v;
2502 const char *s;
2503 } R = { NULL, NULL };
2504 static double L_d;
2505 uint32_t opinfo;
2506 int opn;
2507 node *op1;
2508
2509 opinfo = op->info;
2510 opn = (opinfo & OPNMASK);
2511 g_lineno = op->lineno;
2512 op1 = op->l.n;
2513 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2514
2515 /* execute inevitable things */
2516 if (opinfo & OF_RES1)
2517 L.v = evaluate(op1, v1);
2518 if (opinfo & OF_RES2)
2519 R.v = evaluate(op->r.n, v1+1);
2520 if (opinfo & OF_STR1) {
2521 L.s = getvar_s(L.v);
2522 debug_printf_eval("L.s:'%s'\n", L.s);
2523 }
2524 if (opinfo & OF_STR2) {
2525 R.s = getvar_s(R.v);
2526 debug_printf_eval("R.s:'%s'\n", R.s);
2527 }
2528 if (opinfo & OF_NUM1) {
2529 L_d = getvar_i(L.v);
2530 debug_printf_eval("L_d:%f\n", L_d);
2531 }
2532
2533 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2534 switch (XC(opinfo & OPCLSMASK)) {
2535
2536 /* -- iterative node type -- */
2537
2538 /* test pattern */
2539 case XC( OC_TEST ):
2540 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2541 /* it's range pattern */
2542 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2543 op->info |= OF_CHECKED;
2544 if (ptest(op1->r.n))
2545 op->info &= ~OF_CHECKED;
2546 op = op->a.n;
2547 } else {
2548 op = op->r.n;
2549 }
2550 } else {
2551 op = ptest(op1) ? op->a.n : op->r.n;
2552 }
2553 break;
2554
2555 /* just evaluate an expression, also used as unconditional jump */
2556 case XC( OC_EXEC ):
2557 break;
2558
2559 /* branch, used in if-else and various loops */
2560 case XC( OC_BR ):
2561 op = istrue(L.v) ? op->a.n : op->r.n;
2562 break;
2563
2564 /* initialize for-in loop */
2565 case XC( OC_WALKINIT ):
2566 hashwalk_init(L.v, iamarray(R.v));
2567 break;
2568
2569 /* get next array item */
2570 case XC( OC_WALKNEXT ):
2571 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2572 break;
2573
2574 case XC( OC_PRINT ):
2575 case XC( OC_PRINTF ): {
2576 FILE *F = stdout;
2577
2578 if (op->r.n) {
2579 rstream *rsm = newfile(R.s);
2580 if (!rsm->F) {
2581 if (opn == '|') {
2582 rsm->F = popen(R.s, "w");
2583 if (rsm->F == NULL)
2584 bb_perror_msg_and_die("popen");
2585 rsm->is_pipe = 1;
2586 } else {
2587 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2588 }
2589 }
2590 F = rsm->F;
2591 }
2592
2593 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2594 if (!op1) {
2595 fputs(getvar_s(intvar[F0]), F);
2596 } else {
2597 while (op1) {
2598 var *v = evaluate(nextarg(&op1), v1);
2599 if (v->type & VF_NUMBER) {
2600 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2601 getvar_i(v), TRUE);
2602 fputs(g_buf, F);
2603 } else {
2604 fputs(getvar_s(v), F);
2605 }
2606
2607 if (op1)
2608 fputs(getvar_s(intvar[OFS]), F);
2609 }
2610 }
2611 fputs(getvar_s(intvar[ORS]), F);
2612
2613 } else { /* OC_PRINTF */
2614 char *s = awk_printf(op1);
2615 fputs(s, F);
2616 free(s);
2617 }
2618 fflush(F);
2619 break;
2620 }
2621
2622 case XC( OC_DELETE ): {
2623 uint32_t info = op1->info & OPCLSMASK;
2624 var *v;
2625
2626 if (info == OC_VAR) {
2627 v = op1->l.v;
2628 } else if (info == OC_FNARG) {
2629 v = &fnargs[op1->l.aidx];
2630 } else {
2631 syntax_error(EMSG_NOT_ARRAY);
2632 }
2633
2634 if (op1->r.n) {
2635 const char *s;
2636 clrvar(L.v);
2637 s = getvar_s(evaluate(op1->r.n, v1));
2638 hash_remove(iamarray(v), s);
2639 } else {
2640 clear_array(iamarray(v));
2641 }
2642 break;
2643 }
2644
2645 case XC( OC_NEWSOURCE ):
2646 g_progname = op->l.new_progname;
2647 break;
2648
2649 case XC( OC_RETURN ):
2650 copyvar(res, L.v);
2651 break;
2652
2653 case XC( OC_NEXTFILE ):
2654 nextfile = TRUE;
2655 case XC( OC_NEXT ):
2656 nextrec = TRUE;
2657 case XC( OC_DONE ):
2658 clrvar(res);
2659 break;
2660
2661 case XC( OC_EXIT ):
2662 awk_exit(L_d);
2663
2664 /* -- recursive node type -- */
2665
2666 case XC( OC_VAR ):
2667 L.v = op->l.v;
2668 if (L.v == intvar[NF])
2669 split_f0();
2670 goto v_cont;
2671
2672 case XC( OC_FNARG ):
2673 L.v = &fnargs[op->l.aidx];
2674 v_cont:
2675 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2676 break;
2677
2678 case XC( OC_IN ):
2679 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2680 break;
2681
2682 case XC( OC_REGEXP ):
2683 op1 = op;
2684 L.s = getvar_s(intvar[F0]);
2685 goto re_cont;
2686
2687 case XC( OC_MATCH ):
2688 op1 = op->r.n;
2689 re_cont:
2690 {
2691 regex_t *re = as_regex(op1, &sreg);
2692 int i = regexec(re, L.s, 0, NULL, 0);
2693 if (re == &sreg)
2694 regfree(re);
2695 setvar_i(res, (i == 0) ^ (opn == '!'));
2696 }
2697 break;
2698
2699 case XC( OC_MOVE ):
2700 debug_printf_eval("MOVE\n");
2701 /* if source is a temporary string, jusk relink it to dest */
2702//Disabled: if R.v is numeric but happens to have cached R.v->string,
2703//then L.v ends up being a string, which is wrong
2704// if (R.v == v1+1 && R.v->string) {
2705// res = setvar_p(L.v, R.v->string);
2706// R.v->string = NULL;
2707// } else {
2708 res = copyvar(L.v, R.v);
2709// }
2710 break;
2711
2712 case XC( OC_TERNARY ):
2713 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2714 syntax_error(EMSG_POSSIBLE_ERROR);
2715 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2716 break;
2717
2718 case XC( OC_FUNC ): {
2719 var *vbeg, *v;
2720 const char *sv_progname;
2721
2722 /* The body might be empty, still has to eval the args */
2723 if (!op->r.n->info && !op->r.f->body.first)
2724 syntax_error(EMSG_UNDEF_FUNC);
2725
2726 vbeg = v = nvalloc(op->r.f->nargs + 1);
2727 while (op1) {
2728 var *arg = evaluate(nextarg(&op1), v1);
2729 copyvar(v, arg);
2730 v->type |= VF_CHILD;
2731 v->x.parent = arg;
2732 if (++v - vbeg >= (int) op->r.f->nargs)
2733 break;
2734 }
2735
2736 v = fnargs;
2737 fnargs = vbeg;
2738 sv_progname = g_progname;
2739
2740 res = evaluate(op->r.f->body.first, res);
2741
2742 g_progname = sv_progname;
2743 nvfree(fnargs);
2744 fnargs = v;
2745
2746 break;
2747 }
2748
2749 case XC( OC_GETLINE ):
2750 case XC( OC_PGETLINE ): {
2751 rstream *rsm;
2752 int i;
2753
2754 if (op1) {
2755 rsm = newfile(L.s);
2756 if (!rsm->F) {
2757 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2758 rsm->F = popen(L.s, "r");
2759 rsm->is_pipe = TRUE;
2760 } else {
2761 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2762 }
2763 }
2764 } else {
2765 if (!iF)
2766 iF = next_input_file();
2767 rsm = iF;
2768 }
2769
2770 if (!rsm || !rsm->F) {
2771 setvar_i(intvar[ERRNO], errno);
2772 setvar_i(res, -1);
2773 break;
2774 }
2775
2776 if (!op->r.n)
2777 R.v = intvar[F0];
2778
2779 i = awk_getline(rsm, R.v);
2780 if (i > 0 && !op1) {
2781 incvar(intvar[FNR]);
2782 incvar(intvar[NR]);
2783 }
2784 setvar_i(res, i);
2785 break;
2786 }
2787
2788 /* simple builtins */
2789 case XC( OC_FBLTIN ): {
2790 static double R_d;
2791
2792 switch (opn) {
2793 case F_in:
2794 R_d = (long long)L_d;
2795 break;
2796
2797 case F_rn:
2798 R_d = (double)rand() / (double)RAND_MAX;
2799 break;
2800
2801 case F_co:
2802 if (ENABLE_FEATURE_AWK_LIBM) {
2803 R_d = cos(L_d);
2804 break;
2805 }
2806
2807 case F_ex:
2808 if (ENABLE_FEATURE_AWK_LIBM) {
2809 R_d = exp(L_d);
2810 break;
2811 }
2812
2813 case F_lg:
2814 if (ENABLE_FEATURE_AWK_LIBM) {
2815 R_d = log(L_d);
2816 break;
2817 }
2818
2819 case F_si:
2820 if (ENABLE_FEATURE_AWK_LIBM) {
2821 R_d = sin(L_d);
2822 break;
2823 }
2824
2825 case F_sq:
2826 if (ENABLE_FEATURE_AWK_LIBM) {
2827 R_d = sqrt(L_d);
2828 break;
2829 }
2830
2831 syntax_error(EMSG_NO_MATH);
2832 break;
2833
2834 case F_sr:
2835 R_d = (double)seed;
2836 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2837 srand(seed);
2838 break;
2839
2840 case F_ti:
2841 R_d = time(NULL);
2842 break;
2843
2844 case F_le:
2845 debug_printf_eval("length: L.s:'%s'\n", L.s);
2846 if (!op1) {
2847 L.s = getvar_s(intvar[F0]);
2848 debug_printf_eval("length: L.s='%s'\n", L.s);
2849 }
2850 else if (L.v->type & VF_ARRAY) {
2851 R_d = L.v->x.array->nel;
2852 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2853 break;
2854 }
2855 R_d = strlen(L.s);
2856 break;
2857
2858 case F_sy:
2859 fflush_all();
2860 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2861 ? (system(L.s) >> 8) : 0;
2862 break;
2863
2864 case F_ff:
2865 if (!op1) {
2866 fflush(stdout);
2867 } else if (L.s && *L.s) {
2868 rstream *rsm = newfile(L.s);
2869 fflush(rsm->F);
2870 } else {
2871 fflush_all();
2872 }
2873 break;
2874
2875 case F_cl: {
2876 rstream *rsm;
2877 int err = 0;
2878 rsm = (rstream *)hash_search(fdhash, L.s);
2879 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2880 if (rsm) {
2881 debug_printf_eval("OC_FBLTIN F_cl "
2882 "rsm->is_pipe:%d, ->F:%p\n",
2883 rsm->is_pipe, rsm->F);
2884 /* Can be NULL if open failed. Example:
2885 * getline line <"doesnt_exist";
2886 * close("doesnt_exist"); <--- here rsm->F is NULL
2887 */
2888 if (rsm->F)
2889 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2890 free(rsm->buffer);
2891 hash_remove(fdhash, L.s);
2892 }
2893 if (err)
2894 setvar_i(intvar[ERRNO], errno);
2895 R_d = (double)err;
2896 break;
2897 }
2898 } /* switch */
2899 setvar_i(res, R_d);
2900 break;
2901 }
2902
2903 case XC( OC_BUILTIN ):
2904 res = exec_builtin(op, res);
2905 break;
2906
2907 case XC( OC_SPRINTF ):
2908 setvar_p(res, awk_printf(op1));
2909 break;
2910
2911 case XC( OC_UNARY ): {
2912 double Ld, R_d;
2913
2914 Ld = R_d = getvar_i(R.v);
2915 switch (opn) {
2916 case 'P':
2917 Ld = ++R_d;
2918 goto r_op_change;
2919 case 'p':
2920 R_d++;
2921 goto r_op_change;
2922 case 'M':
2923 Ld = --R_d;
2924 goto r_op_change;
2925 case 'm':
2926 R_d--;
2927 r_op_change:
2928 setvar_i(R.v, R_d);
2929 break;
2930 case '!':
2931 Ld = !istrue(R.v);
2932 break;
2933 case '-':
2934 Ld = -R_d;
2935 break;
2936 }
2937 setvar_i(res, Ld);
2938 break;
2939 }
2940
2941 case XC( OC_FIELD ): {
2942 int i = (int)getvar_i(R.v);
2943 if (i == 0) {
2944 res = intvar[F0];
2945 } else {
2946 split_f0();
2947 if (i > nfields)
2948 fsrealloc(i);
2949 res = &Fields[i - 1];
2950 }
2951 break;
2952 }
2953
2954 /* concatenation (" ") and index joining (",") */
2955 case XC( OC_CONCAT ):
2956 case XC( OC_COMMA ): {
2957 const char *sep = "";
2958 if ((opinfo & OPCLSMASK) == OC_COMMA)
2959 sep = getvar_s(intvar[SUBSEP]);
2960 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2961 break;
2962 }
2963
2964 case XC( OC_LAND ):
2965 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2966 break;
2967
2968 case XC( OC_LOR ):
2969 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2970 break;
2971
2972 case XC( OC_BINARY ):
2973 case XC( OC_REPLACE ): {
2974 double R_d = getvar_i(R.v);
2975 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2976 switch (opn) {
2977 case '+':
2978 L_d += R_d;
2979 break;
2980 case '-':
2981 L_d -= R_d;
2982 break;
2983 case '*':
2984 L_d *= R_d;
2985 break;
2986 case '/':
2987 if (R_d == 0)
2988 syntax_error(EMSG_DIV_BY_ZERO);
2989 L_d /= R_d;
2990 break;
2991 case '&':
2992 if (ENABLE_FEATURE_AWK_LIBM)
2993 L_d = pow(L_d, R_d);
2994 else
2995 syntax_error(EMSG_NO_MATH);
2996 break;
2997 case '%':
2998 if (R_d == 0)
2999 syntax_error(EMSG_DIV_BY_ZERO);
3000 L_d -= (long long)(L_d / R_d) * R_d;
3001 break;
3002 }
3003 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3004 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3005 break;
3006 }
3007
3008 case XC( OC_COMPARE ): {
3009 static int i;
3010 double Ld;
3011
3012 if (is_numeric(L.v) && is_numeric(R.v)) {
3013 Ld = getvar_i(L.v) - getvar_i(R.v);
3014 } else {
3015 const char *l = getvar_s(L.v);
3016 const char *r = getvar_s(R.v);
3017 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3018 }
3019 switch (opn & 0xfe) {
3020 case 0:
3021 i = (Ld > 0);
3022 break;
3023 case 2:
3024 i = (Ld >= 0);
3025 break;
3026 case 4:
3027 i = (Ld == 0);
3028 break;
3029 }
3030 setvar_i(res, (i == 0) ^ (opn & 1));
3031 break;
3032 }
3033
3034 default:
3035 syntax_error(EMSG_POSSIBLE_ERROR);
3036 }
3037 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3038 op = op->a.n;
3039 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3040 break;
3041 if (nextrec)
3042 break;
3043 } /* while (op) */
3044
3045 nvfree(v1);
3046 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3047 return res;
3048#undef fnargs
3049#undef seed
3050#undef sreg
3051}
3052
3053
3054/* -------- main & co. -------- */
3055
3056static int awk_exit(int r)
3057{
3058 var tv;
3059 unsigned i;
3060 hash_item *hi;
3061
3062 zero_out_var(&tv);
3063
3064 if (!exiting) {
3065 exiting = TRUE;
3066 nextrec = FALSE;
3067 evaluate(endseq.first, &tv);
3068 }
3069
3070 /* waiting for children */
3071 for (i = 0; i < fdhash->csize; i++) {
3072 hi = fdhash->items[i];
3073 while (hi) {
3074 if (hi->data.rs.F && hi->data.rs.is_pipe)
3075 pclose(hi->data.rs.F);
3076 hi = hi->next;
3077 }
3078 }
3079
3080 exit(r);
3081}
3082
3083/* if expr looks like "var=value", perform assignment and return 1,
3084 * otherwise return 0 */
3085static int is_assignment(const char *expr)
3086{
3087 char *exprc, *val;
3088
3089 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3090 return FALSE;
3091 }
3092
3093 exprc = xstrdup(expr);
3094 val = exprc + (val - expr);
3095 *val++ = '\0';
3096
3097 unescape_string_in_place(val);
3098 setvar_u(newvar(exprc), val);
3099 free(exprc);
3100 return TRUE;
3101}
3102
3103/* switch to next input file */
3104static rstream *next_input_file(void)
3105{
3106#define rsm (G.next_input_file__rsm)
3107#define files_happen (G.next_input_file__files_happen)
3108
3109 FILE *F;
3110 const char *fname, *ind;
3111
3112 if (rsm.F)
3113 fclose(rsm.F);
3114 rsm.F = NULL;
3115 rsm.pos = rsm.adv = 0;
3116
3117 for (;;) {
3118 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3119 if (files_happen)
3120 return NULL;
3121 fname = "-";
3122 F = stdin;
3123 break;
3124 }
3125 ind = getvar_s(incvar(intvar[ARGIND]));
3126 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3127 if (fname && *fname && !is_assignment(fname)) {
3128 F = xfopen_stdin(fname);
3129 break;
3130 }
3131 }
3132
3133 files_happen = TRUE;
3134 setvar_s(intvar[FILENAME], fname);
3135 rsm.F = F;
3136 return &rsm;
3137#undef rsm
3138#undef files_happen
3139}
3140
3141int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3142int awk_main(int argc, char **argv)
3143{
3144 unsigned opt;
3145 char *opt_F;
3146 llist_t *list_v = NULL;
3147 llist_t *list_f = NULL;
3148#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3149 llist_t *list_e = NULL;
3150#endif
3151 int i, j;
3152 var *v;
3153 var tv;
3154 char **envp;
3155 char *vnames = (char *)vNames; /* cheat */
3156 char *vvalues = (char *)vValues;
3157
3158 INIT_G();
3159
3160 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3161 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3162 if (ENABLE_LOCALE_SUPPORT)
3163 setlocale(LC_NUMERIC, "C");
3164
3165 zero_out_var(&tv);
3166
3167 /* allocate global buffer */
3168 g_buf = xmalloc(MAXVARFMT + 1);
3169
3170 vhash = hash_init();
3171 ahash = hash_init();
3172 fdhash = hash_init();
3173 fnhash = hash_init();
3174
3175 /* initialize variables */
3176 for (i = 0; *vnames; i++) {
3177 intvar[i] = v = newvar(nextword(&vnames));
3178 if (*vvalues != '\377')
3179 setvar_s(v, nextword(&vvalues));
3180 else
3181 setvar_i(v, 0);
3182
3183 if (*vnames == '*') {
3184 v->type |= VF_SPECIAL;
3185 vnames++;
3186 }
3187 }
3188
3189 handle_special(intvar[FS]);
3190 handle_special(intvar[RS]);
3191
3192 newfile("/dev/stdin")->F = stdin;
3193 newfile("/dev/stdout")->F = stdout;
3194 newfile("/dev/stderr")->F = stderr;
3195
3196 /* Huh, people report that sometimes environ is NULL. Oh well. */
3197 if (environ) for (envp = environ; *envp; envp++) {
3198 /* environ is writable, thus we don't strdup it needlessly */
3199 char *s = *envp;
3200 char *s1 = strchr(s, '=');
3201 if (s1) {
3202 *s1 = '\0';
3203 /* Both findvar and setvar_u take const char*
3204 * as 2nd arg -> environment is not trashed */
3205 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3206 *s1 = '=';
3207 }
3208 }
3209 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3210 argv += optind;
3211 argc -= optind;
3212 if (opt & OPT_W)
3213 bb_error_msg("warning: option -W is ignored");
3214 if (opt & OPT_F) {
3215 unescape_string_in_place(opt_F);
3216 setvar_s(intvar[FS], opt_F);
3217 }
3218 while (list_v) {
3219 if (!is_assignment(llist_pop(&list_v)))
3220 bb_show_usage();
3221 }
3222 while (list_f) {
3223 char *s = NULL;
3224 FILE *from_file;
3225
3226 g_progname = llist_pop(&list_f);
3227 from_file = xfopen_stdin(g_progname);
3228 /* one byte is reserved for some trick in next_token */
3229 for (i = j = 1; j > 0; i += j) {
3230 s = xrealloc(s, i + 4096);
3231 j = fread(s + i, 1, 4094, from_file);
3232 }
3233 s[i] = '\0';
3234 fclose(from_file);
3235 parse_program(s + 1);
3236 free(s);
3237 }
3238 g_progname = "cmd. line";
3239#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3240 while (list_e) {
3241 parse_program(llist_pop(&list_e));
3242 }
3243#endif
3244 if (!(opt & (OPT_f | OPT_e))) {
3245 if (!*argv)
3246 bb_show_usage();
3247 parse_program(*argv++);
3248 argc--;
3249 }
3250
3251 /* fill in ARGV array */
3252 setvar_i(intvar[ARGC], argc + 1);
3253 setari_u(intvar[ARGV], 0, "awk");
3254 i = 0;
3255 while (*argv)
3256 setari_u(intvar[ARGV], ++i, *argv++);
3257
3258 evaluate(beginseq.first, &tv);
3259 if (!mainseq.first && !endseq.first)
3260 awk_exit(EXIT_SUCCESS);
3261
3262 /* input file could already be opened in BEGIN block */
3263 if (!iF)
3264 iF = next_input_file();
3265
3266 /* passing through input files */
3267 while (iF) {
3268 nextfile = FALSE;
3269 setvar_i(intvar[FNR], 0);
3270
3271 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3272 nextrec = FALSE;
3273 incvar(intvar[NR]);
3274 incvar(intvar[FNR]);
3275 evaluate(mainseq.first, &tv);
3276
3277 if (nextfile)
3278 break;
3279 }
3280
3281 if (i < 0)
3282 syntax_error(strerror(errno));
3283
3284 iF = next_input_file();
3285 }
3286
3287 awk_exit(EXIT_SUCCESS);
3288 /*return 0;*/
3289}
3290