summaryrefslogtreecommitdiff
path: root/editors/awk.c (plain)
blob: e2527ff80252c4a6dcfb6ba4ba89d37a2c913384
1/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
9
10//config:config AWK
11//config: bool "awk"
12//config: default y
13//config: help
14//config: Awk is used as a pattern scanning and processing language. This is
15//config: the BusyBox implementation of that programming language.
16//config:
17//config:config FEATURE_AWK_LIBM
18//config: bool "Enable math functions (requires libm)"
19//config: default y
20//config: depends on AWK
21//config: help
22//config: Enable math functions of the Awk programming language.
23//config: NOTE: This will require libm to be present for linking.
24//config:
25//config:config FEATURE_AWK_GNU_EXTENSIONS
26//config: bool "Enable a few GNU extensions"
27//config: default y
28//config: depends on AWK
29//config: help
30//config: Enable a few features from gawk:
31//config: * command line option -e AWK_PROGRAM
32//config: * simultaneous use of -f and -e on the command line.
33//config: This enables the use of awk library files.
34//config: Ex: awk -f mylib.awk -e '{print myfunction($1);}' ...
35
36//applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
37
38//kbuild:lib-$(CONFIG_AWK) += awk.o
39
40//usage:#define awk_trivial_usage
41//usage: "[OPTIONS] [AWK_PROGRAM] [FILE]..."
42//usage:#define awk_full_usage "\n\n"
43//usage: " -v VAR=VAL Set variable"
44//usage: "\n -F SEP Use SEP as field separator"
45//usage: "\n -f FILE Read program from FILE"
46//usage: IF_FEATURE_AWK_GNU_EXTENSIONS(
47//usage: "\n -e AWK_PROGRAM"
48//usage: )
49
50#include "libbb.h"
51#include "xregex.h"
52#include <math.h>
53
54/* This is a NOEXEC applet. Be very careful! */
55
56
57/* If you comment out one of these below, it will be #defined later
58 * to perform debug printfs to stderr: */
59#define debug_printf_walker(...) do {} while (0)
60#define debug_printf_eval(...) do {} while (0)
61#define debug_printf_parse(...) do {} while (0)
62
63#ifndef debug_printf_walker
64# define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
65#endif
66#ifndef debug_printf_eval
67# define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
68#endif
69#ifndef debug_printf_parse
70# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
71#endif
72
73
74#define OPTSTR_AWK \
75 "F:v:f:" \
76 IF_FEATURE_AWK_GNU_EXTENSIONS("e:") \
77 "W:"
78#define OPTCOMPLSTR_AWK \
79 "v::f::" \
80 IF_FEATURE_AWK_GNU_EXTENSIONS("e::")
81enum {
82 OPTBIT_F, /* define field separator */
83 OPTBIT_v, /* define variable */
84 OPTBIT_f, /* pull in awk program from file */
85 IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
86 OPTBIT_W, /* -W ignored */
87 OPT_F = 1 << OPTBIT_F,
88 OPT_v = 1 << OPTBIT_v,
89 OPT_f = 1 << OPTBIT_f,
90 OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
91 OPT_W = 1 << OPTBIT_W
92};
93
94#define MAXVARFMT 240
95#define MINNVBLOCK 64
96
97/* variable flags */
98#define VF_NUMBER 0x0001 /* 1 = primary type is number */
99#define VF_ARRAY 0x0002 /* 1 = it's an array */
100
101#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
102#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
103#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
104#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
105#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
106#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
107#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
108
109/* these flags are static, don't change them when value is changed */
110#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111
112typedef struct walker_list {
113 char *end;
114 char *cur;
115 struct walker_list *prev;
116 char wbuf[1];
117} walker_list;
118
119/* Variable */
120typedef struct var_s {
121 unsigned type; /* flags */
122 double number;
123 char *string;
124 union {
125 int aidx; /* func arg idx (for compilation stage) */
126 struct xhash_s *array; /* array ptr */
127 struct var_s *parent; /* for func args, ptr to actual parameter */
128 walker_list *walker; /* list of array elements (for..in) */
129 } x;
130} var;
131
132/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133typedef struct chain_s {
134 struct node_s *first;
135 struct node_s *last;
136 const char *programname;
137} chain;
138
139/* Function */
140typedef struct func_s {
141 unsigned nargs;
142 struct chain_s body;
143} func;
144
145/* I/O stream */
146typedef struct rstream_s {
147 FILE *F;
148 char *buffer;
149 int adv;
150 int size;
151 int pos;
152 smallint is_pipe;
153} rstream;
154
155typedef struct hash_item_s {
156 union {
157 struct var_s v; /* variable/array hash */
158 struct rstream_s rs; /* redirect streams hash */
159 struct func_s f; /* functions hash */
160 } data;
161 struct hash_item_s *next; /* next in chain */
162 char name[1]; /* really it's longer */
163} hash_item;
164
165typedef struct xhash_s {
166 unsigned nel; /* num of elements */
167 unsigned csize; /* current hash size */
168 unsigned nprime; /* next hash size in PRIMES[] */
169 unsigned glen; /* summary length of item names */
170 struct hash_item_s **items;
171} xhash;
172
173/* Tree node */
174typedef struct node_s {
175 uint32_t info;
176 unsigned lineno;
177 union {
178 struct node_s *n;
179 var *v;
180 int aidx;
181 char *new_progname;
182 regex_t *re;
183 } l;
184 union {
185 struct node_s *n;
186 regex_t *ire;
187 func *f;
188 } r;
189 union {
190 struct node_s *n;
191 } a;
192} node;
193
194/* Block of temporary variables */
195typedef struct nvblock_s {
196 int size;
197 var *pos;
198 struct nvblock_s *prev;
199 struct nvblock_s *next;
200 var nv[];
201} nvblock;
202
203typedef struct tsplitter_s {
204 node n;
205 regex_t re[2];
206} tsplitter;
207
208/* simple token classes */
209/* Order and hex values are very important!!! See next_token() */
210#define TC_SEQSTART 1 /* ( */
211#define TC_SEQTERM (1 << 1) /* ) */
212#define TC_REGEXP (1 << 2) /* /.../ */
213#define TC_OUTRDR (1 << 3) /* | > >> */
214#define TC_UOPPOST (1 << 4) /* unary postfix operator */
215#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
216#define TC_BINOPX (1 << 6) /* two-opnd operator */
217#define TC_IN (1 << 7)
218#define TC_COMMA (1 << 8)
219#define TC_PIPE (1 << 9) /* input redirection pipe */
220#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
221#define TC_ARRTERM (1 << 11) /* ] */
222#define TC_GRPSTART (1 << 12) /* { */
223#define TC_GRPTERM (1 << 13) /* } */
224#define TC_SEMICOL (1 << 14)
225#define TC_NEWLINE (1 << 15)
226#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
227#define TC_WHILE (1 << 17)
228#define TC_ELSE (1 << 18)
229#define TC_BUILTIN (1 << 19)
230#define TC_GETLINE (1 << 20)
231#define TC_FUNCDECL (1 << 21) /* `function' `func' */
232#define TC_BEGIN (1 << 22)
233#define TC_END (1 << 23)
234#define TC_EOF (1 << 24)
235#define TC_VARIABLE (1 << 25)
236#define TC_ARRAY (1 << 26)
237#define TC_FUNCTION (1 << 27)
238#define TC_STRING (1 << 28)
239#define TC_NUMBER (1 << 29)
240
241#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
242
243/* combined token classes */
244#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
245//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
246#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
247 | TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
248
249#define TC_STATEMNT (TC_STATX | TC_WHILE)
250#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
251
252/* word tokens, cannot mean something else if not expected */
253#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN \
254 | TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
255
256/* discard newlines after these */
257#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
258 | TC_BINOP | TC_OPTERM)
259
260/* what can expression begin with */
261#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
262/* what can group begin with */
263#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
264
265/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
266/* operator is inserted between them */
267#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
268 | TC_STRING | TC_NUMBER | TC_UOPPOST)
269#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
270
271#define OF_RES1 0x010000
272#define OF_RES2 0x020000
273#define OF_STR1 0x040000
274#define OF_STR2 0x080000
275#define OF_NUM1 0x100000
276#define OF_CHECKED 0x200000
277
278/* combined operator flags */
279#define xx 0
280#define xV OF_RES2
281#define xS (OF_RES2 | OF_STR2)
282#define Vx OF_RES1
283#define VV (OF_RES1 | OF_RES2)
284#define Nx (OF_RES1 | OF_NUM1)
285#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
286#define Sx (OF_RES1 | OF_STR1)
287#define SV (OF_RES1 | OF_STR1 | OF_RES2)
288#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
289
290#define OPCLSMASK 0xFF00
291#define OPNMASK 0x007F
292
293/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
294 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
295 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
296 */
297#undef P
298#undef PRIMASK
299#undef PRIMASK2
300#define P(x) (x << 24)
301#define PRIMASK 0x7F000000
302#define PRIMASK2 0x7E000000
303
304/* Operation classes */
305
306#define SHIFT_TIL_THIS 0x0600
307#define RECUR_FROM_THIS 0x1000
308
309enum {
310 OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
311 OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
312
313 OC_BR = 0x0700, OC_BREAK = 0x0800, OC_CONTINUE = 0x0900,
314 OC_EXIT = 0x0a00, OC_NEXT = 0x0b00, OC_NEXTFILE = 0x0c00,
315 OC_TEST = 0x0d00, OC_WALKNEXT = 0x0e00,
316
317 OC_BINARY = 0x1000, OC_BUILTIN = 0x1100, OC_COLON = 0x1200,
318 OC_COMMA = 0x1300, OC_COMPARE = 0x1400, OC_CONCAT = 0x1500,
319 OC_FBLTIN = 0x1600, OC_FIELD = 0x1700, OC_FNARG = 0x1800,
320 OC_FUNC = 0x1900, OC_GETLINE = 0x1a00, OC_IN = 0x1b00,
321 OC_LAND = 0x1c00, OC_LOR = 0x1d00, OC_MATCH = 0x1e00,
322 OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100,
323 OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400,
324 OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700,
325 OC_DONE = 0x2800,
326
327 ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200,
328 ST_WHILE = 0x3300
329};
330
331/* simple builtins */
332enum {
333 F_in, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
334 F_ti, F_le, F_sy, F_ff, F_cl
335};
336
337/* builtins */
338enum {
339 B_a2, B_ix, B_ma, B_sp, B_ss, B_ti, B_mt, B_lo, B_up,
340 B_ge, B_gs, B_su,
341 B_an, B_co, B_ls, B_or, B_rs, B_xo,
342};
343
344/* tokens and their corresponding info values */
345
346#define NTC "\377" /* switch to next token class (tc<<1) */
347#define NTCC '\377'
348
349#define OC_B OC_BUILTIN
350
351static const char tokenlist[] ALIGN1 =
352 "\1(" NTC
353 "\1)" NTC
354 "\1/" NTC /* REGEXP */
355 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
356 "\2++" "\2--" NTC /* UOPPOST */
357 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
358 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
359 "\2*=" "\2/=" "\2%=" "\2^="
360 "\1+" "\1-" "\3**=" "\2**"
361 "\1/" "\1%" "\1^" "\1*"
362 "\2!=" "\2>=" "\2<=" "\1>"
363 "\1<" "\2!~" "\1~" "\2&&"
364 "\2||" "\1?" "\1:" NTC
365 "\2in" NTC
366 "\1," NTC
367 "\1|" NTC
368 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
369 "\1]" NTC
370 "\1{" NTC
371 "\1}" NTC
372 "\1;" NTC
373 "\1\n" NTC
374 "\2if" "\2do" "\3for" "\5break" /* STATX */
375 "\10continue" "\6delete" "\5print"
376 "\6printf" "\4next" "\10nextfile"
377 "\6return" "\4exit" NTC
378 "\5while" NTC
379 "\4else" NTC
380
381 "\3and" "\5compl" "\6lshift" "\2or"
382 "\6rshift" "\3xor"
383 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
384 "\3cos" "\3exp" "\3int" "\3log"
385 "\4rand" "\3sin" "\4sqrt" "\5srand"
386 "\6gensub" "\4gsub" "\5index" "\6length"
387 "\5match" "\5split" "\7sprintf" "\3sub"
388 "\6substr" "\7systime" "\10strftime" "\6mktime"
389 "\7tolower" "\7toupper" NTC
390 "\7getline" NTC
391 "\4func" "\10function" NTC
392 "\5BEGIN" NTC
393 "\3END"
394 /* compiler adds trailing "\0" */
395 ;
396
397static const uint32_t tokeninfo[] = {
398 0,
399 0,
400 OC_REGEXP,
401 xS|'a', xS|'w', xS|'|',
402 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
403 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
404 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
405 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
406 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
407 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
408 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
409 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
410 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
411 OC_IN|SV|P(49), /* in */
412 OC_COMMA|SS|P(80),
413 OC_PGETLINE|SV|P(37),
414 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
415 0, /* ] */
416 0,
417 0,
418 0,
419 0, /* \n */
420 ST_IF, ST_DO, ST_FOR, OC_BREAK,
421 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
422 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
423 OC_RETURN|Vx, OC_EXIT|Nx,
424 ST_WHILE,
425 0, /* else */
426
427 OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
428 OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
429 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
430 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
431 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
432 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
433 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
434 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
435 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
436 OC_GETLINE|SV|P(0),
437 0, 0,
438 0,
439 0 /* END */
440};
441
442/* internal variable names and their initial values */
443/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
444enum {
445 CONVFMT, OFMT, FS, OFS,
446 ORS, RS, RT, FILENAME,
447 SUBSEP, F0, ARGIND, ARGC,
448 ARGV, ERRNO, FNR, NR,
449 NF, IGNORECASE, ENVIRON, NUM_INTERNAL_VARS
450};
451
452static const char vNames[] ALIGN1 =
453 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
454 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
455 "SUBSEP\0" "$\0*" "ARGIND\0" "ARGC\0"
456 "ARGV\0" "ERRNO\0" "FNR\0" "NR\0"
457 "NF\0*" "IGNORECASE\0*" "ENVIRON\0" "\0";
458
459static const char vValues[] ALIGN1 =
460 "%.6g\0" "%.6g\0" " \0" " \0"
461 "\n\0" "\n\0" "\0" "\0"
462 "\034\0" "\0" "\377";
463
464/* hash size may grow to these values */
465#define FIRST_PRIME 61
466static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
467
468
469/* Globals. Split in two parts so that first one is addressed
470 * with (mostly short) negative offsets.
471 * NB: it's unsafe to put members of type "double"
472 * into globals2 (gcc may fail to align them).
473 */
474struct globals {
475 double t_double;
476 chain beginseq, mainseq, endseq;
477 chain *seq;
478 node *break_ptr, *continue_ptr;
479 rstream *iF;
480 xhash *vhash, *ahash, *fdhash, *fnhash;
481 const char *g_progname;
482 int g_lineno;
483 int nfields;
484 int maxfields; /* used in fsrealloc() only */
485 var *Fields;
486 nvblock *g_cb;
487 char *g_pos;
488 char *g_buf;
489 smallint icase;
490 smallint exiting;
491 smallint nextrec;
492 smallint nextfile;
493 smallint is_f0_split;
494 smallint t_rollback;
495};
496struct globals2 {
497 uint32_t t_info; /* often used */
498 uint32_t t_tclass;
499 char *t_string;
500 int t_lineno;
501
502 var *intvar[NUM_INTERNAL_VARS]; /* often used */
503
504 /* former statics from various functions */
505 char *split_f0__fstrings;
506
507 uint32_t next_token__save_tclass;
508 uint32_t next_token__save_info;
509 uint32_t next_token__ltclass;
510 smallint next_token__concat_inserted;
511
512 smallint next_input_file__files_happen;
513 rstream next_input_file__rsm;
514
515 var *evaluate__fnargs;
516 unsigned evaluate__seed;
517 regex_t evaluate__sreg;
518
519 var ptest__v;
520
521 tsplitter exec_builtin__tspl;
522
523 /* biggest and least used members go last */
524 tsplitter fsplitter, rsplitter;
525};
526#define G1 (ptr_to_globals[-1])
527#define G (*(struct globals2 *)ptr_to_globals)
528/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
529/*char G1size[sizeof(G1)]; - 0x74 */
530/*char Gsize[sizeof(G)]; - 0x1c4 */
531/* Trying to keep most of members accessible with short offsets: */
532/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
533#define t_double (G1.t_double )
534#define beginseq (G1.beginseq )
535#define mainseq (G1.mainseq )
536#define endseq (G1.endseq )
537#define seq (G1.seq )
538#define break_ptr (G1.break_ptr )
539#define continue_ptr (G1.continue_ptr)
540#define iF (G1.iF )
541#define vhash (G1.vhash )
542#define ahash (G1.ahash )
543#define fdhash (G1.fdhash )
544#define fnhash (G1.fnhash )
545#define g_progname (G1.g_progname )
546#define g_lineno (G1.g_lineno )
547#define nfields (G1.nfields )
548#define maxfields (G1.maxfields )
549#define Fields (G1.Fields )
550#define g_cb (G1.g_cb )
551#define g_pos (G1.g_pos )
552#define g_buf (G1.g_buf )
553#define icase (G1.icase )
554#define exiting (G1.exiting )
555#define nextrec (G1.nextrec )
556#define nextfile (G1.nextfile )
557#define is_f0_split (G1.is_f0_split )
558#define t_rollback (G1.t_rollback )
559#define t_info (G.t_info )
560#define t_tclass (G.t_tclass )
561#define t_string (G.t_string )
562#define t_lineno (G.t_lineno )
563#define intvar (G.intvar )
564#define fsplitter (G.fsplitter )
565#define rsplitter (G.rsplitter )
566#define INIT_G() do { \
567 SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
568 G.next_token__ltclass = TC_OPTERM; \
569 G.evaluate__seed = 1; \
570} while (0)
571
572
573/* function prototypes */
574static void handle_special(var *);
575static node *parse_expr(uint32_t);
576static void chain_group(void);
577static var *evaluate(node *, var *);
578static rstream *next_input_file(void);
579static int fmt_num(char *, int, const char *, double, int);
580static int awk_exit(int) NORETURN;
581
582/* ---- error handling ---- */
583
584static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
585static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
586static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
587static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
588static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
589static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments for builtin";
590static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
591static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
592static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
593static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
594
595static void zero_out_var(var *vp)
596{
597 memset(vp, 0, sizeof(*vp));
598}
599
600static void syntax_error(const char *message) NORETURN;
601static void syntax_error(const char *message)
602{
603 bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
604}
605
606/* ---- hash stuff ---- */
607
608static unsigned hashidx(const char *name)
609{
610 unsigned idx = 0;
611
612 while (*name)
613 idx = *name++ + (idx << 6) - idx;
614 return idx;
615}
616
617/* create new hash */
618static xhash *hash_init(void)
619{
620 xhash *newhash;
621
622 newhash = xzalloc(sizeof(*newhash));
623 newhash->csize = FIRST_PRIME;
624 newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
625
626 return newhash;
627}
628
629/* find item in hash, return ptr to data, NULL if not found */
630static void *hash_search(xhash *hash, const char *name)
631{
632 hash_item *hi;
633
634 hi = hash->items[hashidx(name) % hash->csize];
635 while (hi) {
636 if (strcmp(hi->name, name) == 0)
637 return &hi->data;
638 hi = hi->next;
639 }
640 return NULL;
641}
642
643/* grow hash if it becomes too big */
644static void hash_rebuild(xhash *hash)
645{
646 unsigned newsize, i, idx;
647 hash_item **newitems, *hi, *thi;
648
649 if (hash->nprime == ARRAY_SIZE(PRIMES))
650 return;
651
652 newsize = PRIMES[hash->nprime++];
653 newitems = xzalloc(newsize * sizeof(newitems[0]));
654
655 for (i = 0; i < hash->csize; i++) {
656 hi = hash->items[i];
657 while (hi) {
658 thi = hi;
659 hi = thi->next;
660 idx = hashidx(thi->name) % newsize;
661 thi->next = newitems[idx];
662 newitems[idx] = thi;
663 }
664 }
665
666 free(hash->items);
667 hash->csize = newsize;
668 hash->items = newitems;
669}
670
671/* find item in hash, add it if necessary. Return ptr to data */
672static void *hash_find(xhash *hash, const char *name)
673{
674 hash_item *hi;
675 unsigned idx;
676 int l;
677
678 hi = hash_search(hash, name);
679 if (!hi) {
680 if (++hash->nel / hash->csize > 10)
681 hash_rebuild(hash);
682
683 l = strlen(name) + 1;
684 hi = xzalloc(sizeof(*hi) + l);
685 strcpy(hi->name, name);
686
687 idx = hashidx(name) % hash->csize;
688 hi->next = hash->items[idx];
689 hash->items[idx] = hi;
690 hash->glen += l;
691 }
692 return &hi->data;
693}
694
695#define findvar(hash, name) ((var*) hash_find((hash), (name)))
696#define newvar(name) ((var*) hash_find(vhash, (name)))
697#define newfile(name) ((rstream*)hash_find(fdhash, (name)))
698#define newfunc(name) ((func*) hash_find(fnhash, (name)))
699
700static void hash_remove(xhash *hash, const char *name)
701{
702 hash_item *hi, **phi;
703
704 phi = &hash->items[hashidx(name) % hash->csize];
705 while (*phi) {
706 hi = *phi;
707 if (strcmp(hi->name, name) == 0) {
708 hash->glen -= (strlen(name) + 1);
709 hash->nel--;
710 *phi = hi->next;
711 free(hi);
712 break;
713 }
714 phi = &hi->next;
715 }
716}
717
718/* ------ some useful functions ------ */
719
720static char *skip_spaces(char *p)
721{
722 while (1) {
723 if (*p == '\\' && p[1] == '\n') {
724 p++;
725 t_lineno++;
726 } else if (*p != ' ' && *p != '\t') {
727 break;
728 }
729 p++;
730 }
731 return p;
732}
733
734/* returns old *s, advances *s past word and terminating NUL */
735static char *nextword(char **s)
736{
737 char *p = *s;
738 while (*(*s)++ != '\0')
739 continue;
740 return p;
741}
742
743static char nextchar(char **s)
744{
745 char c, *pps;
746
747 c = *(*s)++;
748 pps = *s;
749 if (c == '\\')
750 c = bb_process_escape_sequence((const char**)s);
751 /* Example awk statement:
752 * s = "abc\"def"
753 * we must treat \" as "
754 */
755 if (c == '\\' && *s == pps) { /* unrecognized \z? */
756 c = *(*s); /* yes, fetch z */
757 if (c)
758 (*s)++; /* advance unless z = NUL */
759 }
760 return c;
761}
762
763/* TODO: merge with strcpy_and_process_escape_sequences()?
764 */
765static void unescape_string_in_place(char *s1)
766{
767 char *s = s1;
768 while ((*s1 = nextchar(&s)) != '\0')
769 s1++;
770}
771
772static ALWAYS_INLINE int isalnum_(int c)
773{
774 return (isalnum(c) || c == '_');
775}
776
777static double my_strtod(char **pp)
778{
779 char *cp = *pp;
780 if (ENABLE_DESKTOP && cp[0] == '0') {
781 /* Might be hex or octal integer: 0x123abc or 07777 */
782 char c = (cp[1] | 0x20);
783 if (c == 'x' || isdigit(cp[1])) {
784 unsigned long long ull = strtoull(cp, pp, 0);
785 if (c == 'x')
786 return ull;
787 c = **pp;
788 if (!isdigit(c) && c != '.')
789 return ull;
790 /* else: it may be a floating number. Examples:
791 * 009.123 (*pp points to '9')
792 * 000.123 (*pp points to '.')
793 * fall through to strtod.
794 */
795 }
796 }
797 return strtod(cp, pp);
798}
799
800/* -------- working with variables (set/get/copy/etc) -------- */
801
802static xhash *iamarray(var *v)
803{
804 var *a = v;
805
806 while (a->type & VF_CHILD)
807 a = a->x.parent;
808
809 if (!(a->type & VF_ARRAY)) {
810 a->type |= VF_ARRAY;
811 a->x.array = hash_init();
812 }
813 return a->x.array;
814}
815
816static void clear_array(xhash *array)
817{
818 unsigned i;
819 hash_item *hi, *thi;
820
821 for (i = 0; i < array->csize; i++) {
822 hi = array->items[i];
823 while (hi) {
824 thi = hi;
825 hi = hi->next;
826 free(thi->data.v.string);
827 free(thi);
828 }
829 array->items[i] = NULL;
830 }
831 array->glen = array->nel = 0;
832}
833
834/* clear a variable */
835static var *clrvar(var *v)
836{
837 if (!(v->type & VF_FSTR))
838 free(v->string);
839
840 v->type &= VF_DONTTOUCH;
841 v->type |= VF_DIRTY;
842 v->string = NULL;
843 return v;
844}
845
846/* assign string value to variable */
847static var *setvar_p(var *v, char *value)
848{
849 clrvar(v);
850 v->string = value;
851 handle_special(v);
852 return v;
853}
854
855/* same as setvar_p but make a copy of string */
856static var *setvar_s(var *v, const char *value)
857{
858 return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
859}
860
861/* same as setvar_s but sets USER flag */
862static var *setvar_u(var *v, const char *value)
863{
864 v = setvar_s(v, value);
865 v->type |= VF_USER;
866 return v;
867}
868
869/* set array element to user string */
870static void setari_u(var *a, int idx, const char *s)
871{
872 var *v;
873
874 v = findvar(iamarray(a), itoa(idx));
875 setvar_u(v, s);
876}
877
878/* assign numeric value to variable */
879static var *setvar_i(var *v, double value)
880{
881 clrvar(v);
882 v->type |= VF_NUMBER;
883 v->number = value;
884 handle_special(v);
885 return v;
886}
887
888static const char *getvar_s(var *v)
889{
890 /* if v is numeric and has no cached string, convert it to string */
891 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
892 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
893 v->string = xstrdup(g_buf);
894 v->type |= VF_CACHED;
895 }
896 return (v->string == NULL) ? "" : v->string;
897}
898
899static double getvar_i(var *v)
900{
901 char *s;
902
903 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
904 v->number = 0;
905 s = v->string;
906 if (s && *s) {
907 debug_printf_eval("getvar_i: '%s'->", s);
908 v->number = my_strtod(&s);
909 debug_printf_eval("%f (s:'%s')\n", v->number, s);
910 if (v->type & VF_USER) {
911 s = skip_spaces(s);
912 if (*s != '\0')
913 v->type &= ~VF_USER;
914 }
915 } else {
916 debug_printf_eval("getvar_i: '%s'->zero\n", s);
917 v->type &= ~VF_USER;
918 }
919 v->type |= VF_CACHED;
920 }
921 debug_printf_eval("getvar_i: %f\n", v->number);
922 return v->number;
923}
924
925/* Used for operands of bitwise ops */
926static unsigned long getvar_i_int(var *v)
927{
928 double d = getvar_i(v);
929
930 /* Casting doubles to longs is undefined for values outside
931 * of target type range. Try to widen it as much as possible */
932 if (d >= 0)
933 return (unsigned long)d;
934 /* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
935 return - (long) (unsigned long) (-d);
936}
937
938static var *copyvar(var *dest, const var *src)
939{
940 if (dest != src) {
941 clrvar(dest);
942 dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
943 debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
944 dest->number = src->number;
945 if (src->string)
946 dest->string = xstrdup(src->string);
947 }
948 handle_special(dest);
949 return dest;
950}
951
952static var *incvar(var *v)
953{
954 return setvar_i(v, getvar_i(v) + 1.0);
955}
956
957/* return true if v is number or numeric string */
958static int is_numeric(var *v)
959{
960 getvar_i(v);
961 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
962}
963
964/* return 1 when value of v corresponds to true, 0 otherwise */
965static int istrue(var *v)
966{
967 if (is_numeric(v))
968 return (v->number != 0);
969 return (v->string && v->string[0]);
970}
971
972/* temporary variables allocator. Last allocated should be first freed */
973static var *nvalloc(int n)
974{
975 nvblock *pb = NULL;
976 var *v, *r;
977 int size;
978
979 while (g_cb) {
980 pb = g_cb;
981 if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
982 break;
983 g_cb = g_cb->next;
984 }
985
986 if (!g_cb) {
987 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
988 g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
989 g_cb->size = size;
990 g_cb->pos = g_cb->nv;
991 g_cb->prev = pb;
992 /*g_cb->next = NULL; - xzalloc did it */
993 if (pb)
994 pb->next = g_cb;
995 }
996
997 v = r = g_cb->pos;
998 g_cb->pos += n;
999
1000 while (v < g_cb->pos) {
1001 v->type = 0;
1002 v->string = NULL;
1003 v++;
1004 }
1005
1006 return r;
1007}
1008
1009static void nvfree(var *v)
1010{
1011 var *p;
1012
1013 if (v < g_cb->nv || v >= g_cb->pos)
1014 syntax_error(EMSG_INTERNAL_ERROR);
1015
1016 for (p = v; p < g_cb->pos; p++) {
1017 if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1018 clear_array(iamarray(p));
1019 free(p->x.array->items);
1020 free(p->x.array);
1021 }
1022 if (p->type & VF_WALK) {
1023 walker_list *n;
1024 walker_list *w = p->x.walker;
1025 debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1026 p->x.walker = NULL;
1027 while (w) {
1028 n = w->prev;
1029 debug_printf_walker(" free(%p)\n", w);
1030 free(w);
1031 w = n;
1032 }
1033 }
1034 clrvar(p);
1035 }
1036
1037 g_cb->pos = v;
1038 while (g_cb->prev && g_cb->pos == g_cb->nv) {
1039 g_cb = g_cb->prev;
1040 }
1041}
1042
1043/* ------- awk program text parsing ------- */
1044
1045/* Parse next token pointed by global pos, place results into global ttt.
1046 * If token isn't expected, give away. Return token class
1047 */
1048static uint32_t next_token(uint32_t expected)
1049{
1050#define concat_inserted (G.next_token__concat_inserted)
1051#define save_tclass (G.next_token__save_tclass)
1052#define save_info (G.next_token__save_info)
1053/* Initialized to TC_OPTERM: */
1054#define ltclass (G.next_token__ltclass)
1055
1056 char *p, *s;
1057 const char *tl;
1058 uint32_t tc;
1059 const uint32_t *ti;
1060
1061 if (t_rollback) {
1062 t_rollback = FALSE;
1063
1064 } else if (concat_inserted) {
1065 concat_inserted = FALSE;
1066 t_tclass = save_tclass;
1067 t_info = save_info;
1068
1069 } else {
1070 p = g_pos;
1071 readnext:
1072 p = skip_spaces(p);
1073 g_lineno = t_lineno;
1074 if (*p == '#')
1075 while (*p != '\n' && *p != '\0')
1076 p++;
1077
1078 if (*p == '\n')
1079 t_lineno++;
1080
1081 if (*p == '\0') {
1082 tc = TC_EOF;
1083 debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1084
1085 } else if (*p == '\"') {
1086 /* it's a string */
1087 t_string = s = ++p;
1088 while (*p != '\"') {
1089 char *pp;
1090 if (*p == '\0' || *p == '\n')
1091 syntax_error(EMSG_UNEXP_EOS);
1092 pp = p;
1093 *s++ = nextchar(&pp);
1094 p = pp;
1095 }
1096 p++;
1097 *s = '\0';
1098 tc = TC_STRING;
1099 debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1100
1101 } else if ((expected & TC_REGEXP) && *p == '/') {
1102 /* it's regexp */
1103 t_string = s = ++p;
1104 while (*p != '/') {
1105 if (*p == '\0' || *p == '\n')
1106 syntax_error(EMSG_UNEXP_EOS);
1107 *s = *p++;
1108 if (*s++ == '\\') {
1109 char *pp = p;
1110 s[-1] = bb_process_escape_sequence((const char **)&pp);
1111 if (*p == '\\')
1112 *s++ = '\\';
1113 if (pp == p)
1114 *s++ = *p++;
1115 else
1116 p = pp;
1117 }
1118 }
1119 p++;
1120 *s = '\0';
1121 tc = TC_REGEXP;
1122 debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1123
1124 } else if (*p == '.' || isdigit(*p)) {
1125 /* it's a number */
1126 char *pp = p;
1127 t_double = my_strtod(&pp);
1128 p = pp;
1129 if (*p == '.')
1130 syntax_error(EMSG_UNEXP_TOKEN);
1131 tc = TC_NUMBER;
1132 debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1133
1134 } else {
1135 /* search for something known */
1136 tl = tokenlist;
1137 tc = 0x00000001;
1138 ti = tokeninfo;
1139 while (*tl) {
1140 int l = (unsigned char) *tl++;
1141 if (l == (unsigned char) NTCC) {
1142 tc <<= 1;
1143 continue;
1144 }
1145 /* if token class is expected,
1146 * token matches,
1147 * and it's not a longer word,
1148 */
1149 if ((tc & (expected | TC_WORD | TC_NEWLINE))
1150 && strncmp(p, tl, l) == 0
1151 && !((tc & TC_WORD) && isalnum_(p[l]))
1152 ) {
1153 /* then this is what we are looking for */
1154 t_info = *ti;
1155 debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1156 p += l;
1157 goto token_found;
1158 }
1159 ti++;
1160 tl += l;
1161 }
1162 /* not a known token */
1163
1164 /* is it a name? (var/array/function) */
1165 if (!isalnum_(*p))
1166 syntax_error(EMSG_UNEXP_TOKEN); /* no */
1167 /* yes */
1168 t_string = --p;
1169 while (isalnum_(*++p)) {
1170 p[-1] = *p;
1171 }
1172 p[-1] = '\0';
1173 tc = TC_VARIABLE;
1174 /* also consume whitespace between functionname and bracket */
1175 if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
1176 p = skip_spaces(p);
1177 if (*p == '(') {
1178 tc = TC_FUNCTION;
1179 debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1180 } else {
1181 if (*p == '[') {
1182 p++;
1183 tc = TC_ARRAY;
1184 debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1185 } else
1186 debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1187 }
1188 }
1189 token_found:
1190 g_pos = p;
1191
1192 /* skipping newlines in some cases */
1193 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
1194 goto readnext;
1195
1196 /* insert concatenation operator when needed */
1197 if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)) {
1198 concat_inserted = TRUE;
1199 save_tclass = tc;
1200 save_info = t_info;
1201 tc = TC_BINOP;
1202 t_info = OC_CONCAT | SS | P(35);
1203 }
1204
1205 t_tclass = tc;
1206 }
1207 ltclass = t_tclass;
1208
1209 /* Are we ready for this? */
1210 if (!(ltclass & expected))
1211 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
1212 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1213
1214 return ltclass;
1215#undef concat_inserted
1216#undef save_tclass
1217#undef save_info
1218#undef ltclass
1219}
1220
1221static void rollback_token(void)
1222{
1223 t_rollback = TRUE;
1224}
1225
1226static node *new_node(uint32_t info)
1227{
1228 node *n;
1229
1230 n = xzalloc(sizeof(node));
1231 n->info = info;
1232 n->lineno = g_lineno;
1233 return n;
1234}
1235
1236static void mk_re_node(const char *s, node *n, regex_t *re)
1237{
1238 n->info = OC_REGEXP;
1239 n->l.re = re;
1240 n->r.ire = re + 1;
1241 xregcomp(re, s, REG_EXTENDED);
1242 xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1243}
1244
1245static node *condition(void)
1246{
1247 next_token(TC_SEQSTART);
1248 return parse_expr(TC_SEQTERM);
1249}
1250
1251/* parse expression terminated by given argument, return ptr
1252 * to built subtree. Terminator is eaten by parse_expr */
1253static node *parse_expr(uint32_t iexp)
1254{
1255 node sn;
1256 node *cn = &sn;
1257 node *vn, *glptr;
1258 uint32_t tc, xtc;
1259 var *v;
1260
1261 debug_printf_parse("%s(%x)\n", __func__, iexp);
1262
1263 sn.info = PRIMASK;
1264 sn.r.n = glptr = NULL;
1265 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1266
1267 while (!((tc = next_token(xtc)) & iexp)) {
1268
1269 if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
1270 /* input redirection (<) attached to glptr node */
1271 debug_printf_parse("%s: input redir\n", __func__);
1272 cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1273 cn->a.n = glptr;
1274 xtc = TC_OPERAND | TC_UOPPRE;
1275 glptr = NULL;
1276
1277 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1278 debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
1279 /* for binary and postfix-unary operators, jump back over
1280 * previous operators with higher priority */
1281 vn = cn;
1282 while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1283 || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
1284 ) {
1285 vn = vn->a.n;
1286 }
1287 if ((t_info & OPCLSMASK) == OC_TERNARY)
1288 t_info += P(6);
1289 cn = vn->a.n->r.n = new_node(t_info);
1290 cn->a.n = vn->a.n;
1291 if (tc & TC_BINOP) {
1292 cn->l.n = vn;
1293 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1294 if ((t_info & OPCLSMASK) == OC_PGETLINE) {
1295 /* it's a pipe */
1296 next_token(TC_GETLINE);
1297 /* give maximum priority to this pipe */
1298 cn->info &= ~PRIMASK;
1299 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1300 }
1301 } else {
1302 cn->r.n = vn;
1303 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1304 }
1305 vn->a.n = cn;
1306
1307 } else {
1308 debug_printf_parse("%s: other\n", __func__);
1309 /* for operands and prefix-unary operators, attach them
1310 * to last node */
1311 vn = cn;
1312 cn = vn->r.n = new_node(t_info);
1313 cn->a.n = vn;
1314 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1315 if (tc & (TC_OPERAND | TC_REGEXP)) {
1316 debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
1317 xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
1318 /* one should be very careful with switch on tclass -
1319 * only simple tclasses should be used! */
1320 switch (tc) {
1321 case TC_VARIABLE:
1322 case TC_ARRAY:
1323 debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1324 cn->info = OC_VAR;
1325 v = hash_search(ahash, t_string);
1326 if (v != NULL) {
1327 cn->info = OC_FNARG;
1328 cn->l.aidx = v->x.aidx;
1329 } else {
1330 cn->l.v = newvar(t_string);
1331 }
1332 if (tc & TC_ARRAY) {
1333 cn->info |= xS;
1334 cn->r.n = parse_expr(TC_ARRTERM);
1335 }
1336 break;
1337
1338 case TC_NUMBER:
1339 case TC_STRING:
1340 debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1341 cn->info = OC_VAR;
1342 v = cn->l.v = xzalloc(sizeof(var));
1343 if (tc & TC_NUMBER)
1344 setvar_i(v, t_double);
1345 else
1346 setvar_s(v, t_string);
1347 break;
1348
1349 case TC_REGEXP:
1350 debug_printf_parse("%s: TC_REGEXP\n", __func__);
1351 mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1352 break;
1353
1354 case TC_FUNCTION:
1355 debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1356 cn->info = OC_FUNC;
1357 cn->r.f = newfunc(t_string);
1358 cn->l.n = condition();
1359 break;
1360
1361 case TC_SEQSTART:
1362 debug_printf_parse("%s: TC_SEQSTART\n", __func__);
1363 cn = vn->r.n = parse_expr(TC_SEQTERM);
1364 if (!cn)
1365 syntax_error("Empty sequence");
1366 cn->a.n = vn;
1367 break;
1368
1369 case TC_GETLINE:
1370 debug_printf_parse("%s: TC_GETLINE\n", __func__);
1371 glptr = cn;
1372 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1373 break;
1374
1375 case TC_BUILTIN:
1376 debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1377 cn->l.n = condition();
1378 break;
1379 }
1380 }
1381 }
1382 }
1383
1384 debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1385 return sn.r.n;
1386}
1387
1388/* add node to chain. Return ptr to alloc'd node */
1389static node *chain_node(uint32_t info)
1390{
1391 node *n;
1392
1393 if (!seq->first)
1394 seq->first = seq->last = new_node(0);
1395
1396 if (seq->programname != g_progname) {
1397 seq->programname = g_progname;
1398 n = chain_node(OC_NEWSOURCE);
1399 n->l.new_progname = xstrdup(g_progname);
1400 }
1401
1402 n = seq->last;
1403 n->info = info;
1404 seq->last = n->a.n = new_node(OC_DONE);
1405
1406 return n;
1407}
1408
1409static void chain_expr(uint32_t info)
1410{
1411 node *n;
1412
1413 n = chain_node(info);
1414 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1415 if (t_tclass & TC_GRPTERM)
1416 rollback_token();
1417}
1418
1419static node *chain_loop(node *nn)
1420{
1421 node *n, *n2, *save_brk, *save_cont;
1422
1423 save_brk = break_ptr;
1424 save_cont = continue_ptr;
1425
1426 n = chain_node(OC_BR | Vx);
1427 continue_ptr = new_node(OC_EXEC);
1428 break_ptr = new_node(OC_EXEC);
1429 chain_group();
1430 n2 = chain_node(OC_EXEC | Vx);
1431 n2->l.n = nn;
1432 n2->a.n = n;
1433 continue_ptr->a.n = n2;
1434 break_ptr->a.n = n->r.n = seq->last;
1435
1436 continue_ptr = save_cont;
1437 break_ptr = save_brk;
1438
1439 return n;
1440}
1441
1442/* parse group and attach it to chain */
1443static void chain_group(void)
1444{
1445 uint32_t c;
1446 node *n, *n2, *n3;
1447
1448 do {
1449 c = next_token(TC_GRPSEQ);
1450 } while (c & TC_NEWLINE);
1451
1452 if (c & TC_GRPSTART) {
1453 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1454 while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1455 debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
1456 if (t_tclass & TC_NEWLINE)
1457 continue;
1458 rollback_token();
1459 chain_group();
1460 }
1461 debug_printf_parse("%s: TC_GRPTERM\n", __func__);
1462 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1463 debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
1464 rollback_token();
1465 chain_expr(OC_EXEC | Vx);
1466 } else {
1467 /* TC_STATEMNT */
1468 debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
1469 switch (t_info & OPCLSMASK) {
1470 case ST_IF:
1471 debug_printf_parse("%s: ST_IF\n", __func__);
1472 n = chain_node(OC_BR | Vx);
1473 n->l.n = condition();
1474 chain_group();
1475 n2 = chain_node(OC_EXEC);
1476 n->r.n = seq->last;
1477 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
1478 chain_group();
1479 n2->a.n = seq->last;
1480 } else {
1481 rollback_token();
1482 }
1483 break;
1484
1485 case ST_WHILE:
1486 debug_printf_parse("%s: ST_WHILE\n", __func__);
1487 n2 = condition();
1488 n = chain_loop(NULL);
1489 n->l.n = n2;
1490 break;
1491
1492 case ST_DO:
1493 debug_printf_parse("%s: ST_DO\n", __func__);
1494 n2 = chain_node(OC_EXEC);
1495 n = chain_loop(NULL);
1496 n2->a.n = n->a.n;
1497 next_token(TC_WHILE);
1498 n->l.n = condition();
1499 break;
1500
1501 case ST_FOR:
1502 debug_printf_parse("%s: ST_FOR\n", __func__);
1503 next_token(TC_SEQSTART);
1504 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1505 if (t_tclass & TC_SEQTERM) { /* for-in */
1506 if ((n2->info & OPCLSMASK) != OC_IN)
1507 syntax_error(EMSG_UNEXP_TOKEN);
1508 n = chain_node(OC_WALKINIT | VV);
1509 n->l.n = n2->l.n;
1510 n->r.n = n2->r.n;
1511 n = chain_loop(NULL);
1512 n->info = OC_WALKNEXT | Vx;
1513 n->l.n = n2->l.n;
1514 } else { /* for (;;) */
1515 n = chain_node(OC_EXEC | Vx);
1516 n->l.n = n2;
1517 n2 = parse_expr(TC_SEMICOL);
1518 n3 = parse_expr(TC_SEQTERM);
1519 n = chain_loop(n3);
1520 n->l.n = n2;
1521 if (!n2)
1522 n->info = OC_EXEC;
1523 }
1524 break;
1525
1526 case OC_PRINT:
1527 case OC_PRINTF:
1528 debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1529 n = chain_node(t_info);
1530 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1531 if (t_tclass & TC_OUTRDR) {
1532 n->info |= t_info;
1533 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1534 }
1535 if (t_tclass & TC_GRPTERM)
1536 rollback_token();
1537 break;
1538
1539 case OC_BREAK:
1540 debug_printf_parse("%s: OC_BREAK\n", __func__);
1541 n = chain_node(OC_EXEC);
1542 n->a.n = break_ptr;
1543 break;
1544
1545 case OC_CONTINUE:
1546 debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1547 n = chain_node(OC_EXEC);
1548 n->a.n = continue_ptr;
1549 break;
1550
1551 /* delete, next, nextfile, return, exit */
1552 default:
1553 debug_printf_parse("%s: default\n", __func__);
1554 chain_expr(t_info);
1555 }
1556 }
1557}
1558
1559static void parse_program(char *p)
1560{
1561 uint32_t tclass;
1562 node *cn;
1563 func *f;
1564 var *v;
1565
1566 g_pos = p;
1567 t_lineno = 1;
1568 while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1569 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1570
1571 if (tclass & TC_OPTERM) {
1572 debug_printf_parse("%s: TC_OPTERM\n", __func__);
1573 continue;
1574 }
1575
1576 seq = &mainseq;
1577 if (tclass & TC_BEGIN) {
1578 debug_printf_parse("%s: TC_BEGIN\n", __func__);
1579 seq = &beginseq;
1580 chain_group();
1581
1582 } else if (tclass & TC_END) {
1583 debug_printf_parse("%s: TC_END\n", __func__);
1584 seq = &endseq;
1585 chain_group();
1586
1587 } else if (tclass & TC_FUNCDECL) {
1588 debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1589 next_token(TC_FUNCTION);
1590 g_pos++;
1591 f = newfunc(t_string);
1592 f->body.first = NULL;
1593 f->nargs = 0;
1594 while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1595 v = findvar(ahash, t_string);
1596 v->x.aidx = f->nargs++;
1597
1598 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1599 break;
1600 }
1601 seq = &f->body;
1602 chain_group();
1603 clear_array(ahash);
1604
1605 } else if (tclass & TC_OPSEQ) {
1606 debug_printf_parse("%s: TC_OPSEQ\n", __func__);
1607 rollback_token();
1608 cn = chain_node(OC_TEST);
1609 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1610 if (t_tclass & TC_GRPSTART) {
1611 debug_printf_parse("%s: TC_GRPSTART\n", __func__);
1612 rollback_token();
1613 chain_group();
1614 } else {
1615 debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
1616 chain_node(OC_PRINT);
1617 }
1618 cn->r.n = mainseq.last;
1619
1620 } else /* if (tclass & TC_GRPSTART) */ {
1621 debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
1622 rollback_token();
1623 chain_group();
1624 }
1625 }
1626 debug_printf_parse("%s: TC_EOF\n", __func__);
1627}
1628
1629
1630/* -------- program execution part -------- */
1631
1632static node *mk_splitter(const char *s, tsplitter *spl)
1633{
1634 regex_t *re, *ire;
1635 node *n;
1636
1637 re = &spl->re[0];
1638 ire = &spl->re[1];
1639 n = &spl->n;
1640 if ((n->info & OPCLSMASK) == OC_REGEXP) {
1641 regfree(re);
1642 regfree(ire); // TODO: nuke ire, use re+1?
1643 }
1644 if (s[0] && s[1]) { /* strlen(s) > 1 */
1645 mk_re_node(s, n, re);
1646 } else {
1647 n->info = (uint32_t) s[0];
1648 }
1649
1650 return n;
1651}
1652
1653/* use node as a regular expression. Supplied with node ptr and regex_t
1654 * storage space. Return ptr to regex (if result points to preg, it should
1655 * be later regfree'd manually
1656 */
1657static regex_t *as_regex(node *op, regex_t *preg)
1658{
1659 int cflags;
1660 var *v;
1661 const char *s;
1662
1663 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1664 return icase ? op->r.ire : op->l.re;
1665 }
1666 v = nvalloc(1);
1667 s = getvar_s(evaluate(op, v));
1668
1669 cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1670 /* Testcase where REG_EXTENDED fails (unpaired '{'):
1671 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1672 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1673 * (maybe gsub is not supposed to use REG_EXTENDED?).
1674 */
1675 if (regcomp(preg, s, cflags)) {
1676 cflags &= ~REG_EXTENDED;
1677 xregcomp(preg, s, cflags);
1678 }
1679 nvfree(v);
1680 return preg;
1681}
1682
1683/* gradually increasing buffer.
1684 * note that we reallocate even if n == old_size,
1685 * and thus there is at least one extra allocated byte.
1686 */
1687static char* qrealloc(char *b, int n, int *size)
1688{
1689 if (!b || n >= *size) {
1690 *size = n + (n>>1) + 80;
1691 b = xrealloc(b, *size);
1692 }
1693 return b;
1694}
1695
1696/* resize field storage space */
1697static void fsrealloc(int size)
1698{
1699 int i;
1700
1701 if (size >= maxfields) {
1702 i = maxfields;
1703 maxfields = size + 16;
1704 Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
1705 for (; i < maxfields; i++) {
1706 Fields[i].type = VF_SPECIAL;
1707 Fields[i].string = NULL;
1708 }
1709 }
1710 /* if size < nfields, clear extra field variables */
1711 for (i = size; i < nfields; i++) {
1712 clrvar(Fields + i);
1713 }
1714 nfields = size;
1715}
1716
1717static int awk_split(const char *s, node *spl, char **slist)
1718{
1719 int l, n;
1720 char c[4];
1721 char *s1;
1722 regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
1723
1724 /* in worst case, each char would be a separate field */
1725 *slist = s1 = xzalloc(strlen(s) * 2 + 3);
1726 strcpy(s1, s);
1727
1728 c[0] = c[1] = (char)spl->info;
1729 c[2] = c[3] = '\0';
1730 if (*getvar_s(intvar[RS]) == '\0')
1731 c[2] = '\n';
1732
1733 n = 0;
1734 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1735 if (!*s)
1736 return n; /* "": zero fields */
1737 n++; /* at least one field will be there */
1738 do {
1739 l = strcspn(s, c+2); /* len till next NUL or \n */
1740 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0
1741 && pmatch[0].rm_so <= l
1742 ) {
1743 l = pmatch[0].rm_so;
1744 if (pmatch[0].rm_eo == 0) {
1745 l++;
1746 pmatch[0].rm_eo++;
1747 }
1748 n++; /* we saw yet another delimiter */
1749 } else {
1750 pmatch[0].rm_eo = l;
1751 if (s[l])
1752 pmatch[0].rm_eo++;
1753 }
1754 memcpy(s1, s, l);
1755 /* make sure we remove *all* of the separator chars */
1756 do {
1757 s1[l] = '\0';
1758 } while (++l < pmatch[0].rm_eo);
1759 nextword(&s1);
1760 s += pmatch[0].rm_eo;
1761 } while (*s);
1762 return n;
1763 }
1764 if (c[0] == '\0') { /* null split */
1765 while (*s) {
1766 *s1++ = *s++;
1767 *s1++ = '\0';
1768 n++;
1769 }
1770 return n;
1771 }
1772 if (c[0] != ' ') { /* single-character split */
1773 if (icase) {
1774 c[0] = toupper(c[0]);
1775 c[1] = tolower(c[1]);
1776 }
1777 if (*s1)
1778 n++;
1779 while ((s1 = strpbrk(s1, c)) != NULL) {
1780 *s1++ = '\0';
1781 n++;
1782 }
1783 return n;
1784 }
1785 /* space split */
1786 while (*s) {
1787 s = skip_whitespace(s);
1788 if (!*s)
1789 break;
1790 n++;
1791 while (*s && !isspace(*s))
1792 *s1++ = *s++;
1793 *s1++ = '\0';
1794 }
1795 return n;
1796}
1797
1798static void split_f0(void)
1799{
1800/* static char *fstrings; */
1801#define fstrings (G.split_f0__fstrings)
1802
1803 int i, n;
1804 char *s;
1805
1806 if (is_f0_split)
1807 return;
1808
1809 is_f0_split = TRUE;
1810 free(fstrings);
1811 fsrealloc(0);
1812 n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
1813 fsrealloc(n);
1814 s = fstrings;
1815 for (i = 0; i < n; i++) {
1816 Fields[i].string = nextword(&s);
1817 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1818 }
1819
1820 /* set NF manually to avoid side effects */
1821 clrvar(intvar[NF]);
1822 intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
1823 intvar[NF]->number = nfields;
1824#undef fstrings
1825}
1826
1827/* perform additional actions when some internal variables changed */
1828static void handle_special(var *v)
1829{
1830 int n;
1831 char *b;
1832 const char *sep, *s;
1833 int sl, l, len, i, bsize;
1834
1835 if (!(v->type & VF_SPECIAL))
1836 return;
1837
1838 if (v == intvar[NF]) {
1839 n = (int)getvar_i(v);
1840 fsrealloc(n);
1841
1842 /* recalculate $0 */
1843 sep = getvar_s(intvar[OFS]);
1844 sl = strlen(sep);
1845 b = NULL;
1846 len = 0;
1847 for (i = 0; i < n; i++) {
1848 s = getvar_s(&Fields[i]);
1849 l = strlen(s);
1850 if (b) {
1851 memcpy(b+len, sep, sl);
1852 len += sl;
1853 }
1854 b = qrealloc(b, len+l+sl, &bsize);
1855 memcpy(b+len, s, l);
1856 len += l;
1857 }
1858 if (b)
1859 b[len] = '\0';
1860 setvar_p(intvar[F0], b);
1861 is_f0_split = TRUE;
1862
1863 } else if (v == intvar[F0]) {
1864 is_f0_split = FALSE;
1865
1866 } else if (v == intvar[FS]) {
1867 /*
1868 * The POSIX-2008 standard says that changing FS should have no effect on the
1869 * current input line, but only on the next one. The language is:
1870 *
1871 * > Before the first reference to a field in the record is evaluated, the record
1872 * > shall be split into fields, according to the rules in Regular Expressions,
1873 * > using the value of FS that was current at the time the record was read.
1874 *
1875 * So, split up current line before assignment to FS:
1876 */
1877 split_f0();
1878
1879 mk_splitter(getvar_s(v), &fsplitter);
1880
1881 } else if (v == intvar[RS]) {
1882 mk_splitter(getvar_s(v), &rsplitter);
1883
1884 } else if (v == intvar[IGNORECASE]) {
1885 icase = istrue(v);
1886
1887 } else { /* $n */
1888 n = getvar_i(intvar[NF]);
1889 setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
1890 /* right here v is invalid. Just to note... */
1891 }
1892}
1893
1894/* step through func/builtin/etc arguments */
1895static node *nextarg(node **pn)
1896{
1897 node *n;
1898
1899 n = *pn;
1900 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1901 *pn = n->r.n;
1902 n = n->l.n;
1903 } else {
1904 *pn = NULL;
1905 }
1906 return n;
1907}
1908
1909static void hashwalk_init(var *v, xhash *array)
1910{
1911 hash_item *hi;
1912 unsigned i;
1913 walker_list *w;
1914 walker_list *prev_walker;
1915
1916 if (v->type & VF_WALK) {
1917 prev_walker = v->x.walker;
1918 } else {
1919 v->type |= VF_WALK;
1920 prev_walker = NULL;
1921 }
1922 debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
1923
1924 w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
1925 debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
1926 w->cur = w->end = w->wbuf;
1927 w->prev = prev_walker;
1928 for (i = 0; i < array->csize; i++) {
1929 hi = array->items[i];
1930 while (hi) {
1931 strcpy(w->end, hi->name);
1932 nextword(&w->end);
1933 hi = hi->next;
1934 }
1935 }
1936}
1937
1938static int hashwalk_next(var *v)
1939{
1940 walker_list *w = v->x.walker;
1941
1942 if (w->cur >= w->end) {
1943 walker_list *prev_walker = w->prev;
1944
1945 debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
1946 free(w);
1947 v->x.walker = prev_walker;
1948 return FALSE;
1949 }
1950
1951 setvar_s(v, nextword(&w->cur));
1952 return TRUE;
1953}
1954
1955/* evaluate node, return 1 when result is true, 0 otherwise */
1956static int ptest(node *pattern)
1957{
1958 /* ptest__v is "static": to save stack space? */
1959 return istrue(evaluate(pattern, &G.ptest__v));
1960}
1961
1962/* read next record from stream rsm into a variable v */
1963static int awk_getline(rstream *rsm, var *v)
1964{
1965 char *b;
1966 regmatch_t pmatch[2];
1967 int size, a, p, pp = 0;
1968 int fd, so, eo, r, rp;
1969 char c, *m, *s;
1970
1971 debug_printf_eval("entered %s()\n", __func__);
1972
1973 /* we're using our own buffer since we need access to accumulating
1974 * characters
1975 */
1976 fd = fileno(rsm->F);
1977 m = rsm->buffer;
1978 a = rsm->adv;
1979 p = rsm->pos;
1980 size = rsm->size;
1981 c = (char) rsplitter.n.info;
1982 rp = 0;
1983
1984 if (!m)
1985 m = qrealloc(m, 256, &size);
1986
1987 do {
1988 b = m + a;
1989 so = eo = p;
1990 r = 1;
1991 if (p > 0) {
1992 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1993 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1994 b, 1, pmatch, 0) == 0) {
1995 so = pmatch[0].rm_so;
1996 eo = pmatch[0].rm_eo;
1997 if (b[eo] != '\0')
1998 break;
1999 }
2000 } else if (c != '\0') {
2001 s = strchr(b+pp, c);
2002 if (!s)
2003 s = memchr(b+pp, '\0', p - pp);
2004 if (s) {
2005 so = eo = s-b;
2006 eo++;
2007 break;
2008 }
2009 } else {
2010 while (b[rp] == '\n')
2011 rp++;
2012 s = strstr(b+rp, "\n\n");
2013 if (s) {
2014 so = eo = s-b;
2015 while (b[eo] == '\n')
2016 eo++;
2017 if (b[eo] != '\0')
2018 break;
2019 }
2020 }
2021 }
2022
2023 if (a > 0) {
2024 memmove(m, m+a, p+1);
2025 b = m;
2026 a = 0;
2027 }
2028
2029 m = qrealloc(m, a+p+128, &size);
2030 b = m + a;
2031 pp = p;
2032 p += safe_read(fd, b+p, size-p-1);
2033 if (p < pp) {
2034 p = 0;
2035 r = 0;
2036 setvar_i(intvar[ERRNO], errno);
2037 }
2038 b[p] = '\0';
2039
2040 } while (p > pp);
2041
2042 if (p == 0) {
2043 r--;
2044 } else {
2045 c = b[so]; b[so] = '\0';
2046 setvar_s(v, b+rp);
2047 v->type |= VF_USER;
2048 b[so] = c;
2049 c = b[eo]; b[eo] = '\0';
2050 setvar_s(intvar[RT], b+so);
2051 b[eo] = c;
2052 }
2053
2054 rsm->buffer = m;
2055 rsm->adv = a + eo;
2056 rsm->pos = p - eo;
2057 rsm->size = size;
2058
2059 debug_printf_eval("returning from %s(): %d\n", __func__, r);
2060
2061 return r;
2062}
2063
2064static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
2065{
2066 int r = 0;
2067 char c;
2068 const char *s = format;
2069
2070 if (int_as_int && n == (long long)n) {
2071 r = snprintf(b, size, "%lld", (long long)n);
2072 } else {
2073 do { c = *s; } while (c && *++s);
2074 if (strchr("diouxX", c)) {
2075 r = snprintf(b, size, format, (int)n);
2076 } else if (strchr("eEfgG", c)) {
2077 r = snprintf(b, size, format, n);
2078 } else {
2079 syntax_error(EMSG_INV_FMT);
2080 }
2081 }
2082 return r;
2083}
2084
2085/* formatted output into an allocated buffer, return ptr to buffer */
2086static char *awk_printf(node *n)
2087{
2088 char *b = NULL;
2089 char *fmt, *s, *f;
2090 const char *s1;
2091 int i, j, incr, bsize = 0;
2092 char c, c1;
2093 var *v, *arg;
2094
2095 v = nvalloc(1);
2096 fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
2097
2098 i = 0;
2099 while (*f) {
2100 s = f;
2101 while (*f && (*f != '%' || *++f == '%'))
2102 f++;
2103 while (*f && !isalpha(*f)) {
2104 if (*f == '*')
2105 syntax_error("%*x formats are not supported");
2106 f++;
2107 }
2108
2109 incr = (f - s) + MAXVARFMT;
2110 b = qrealloc(b, incr + i, &bsize);
2111 c = *f;
2112 if (c != '\0')
2113 f++;
2114 c1 = *f;
2115 *f = '\0';
2116 arg = evaluate(nextarg(&n), v);
2117
2118 j = i;
2119 if (c == 'c' || !c) {
2120 i += sprintf(b+i, s, is_numeric(arg) ?
2121 (char)getvar_i(arg) : *getvar_s(arg));
2122 } else if (c == 's') {
2123 s1 = getvar_s(arg);
2124 b = qrealloc(b, incr+i+strlen(s1), &bsize);
2125 i += sprintf(b+i, s, s1);
2126 } else {
2127 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
2128 }
2129 *f = c1;
2130
2131 /* if there was an error while sprintf, return value is negative */
2132 if (i < j)
2133 i = j;
2134 }
2135
2136 free(fmt);
2137 nvfree(v);
2138 b = xrealloc(b, i + 1);
2139 b[i] = '\0';
2140 return b;
2141}
2142
2143/* Common substitution routine.
2144 * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2145 * store result into (dest), return number of substitutions.
2146 * If nm = 0, replace all matches.
2147 * If src or dst is NULL, use $0.
2148 * If subexp != 0, enable subexpression matching (\1-\9).
2149 */
2150static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2151{
2152 char *resbuf;
2153 const char *sp;
2154 int match_no, residx, replen, resbufsize;
2155 int regexec_flags;
2156 regmatch_t pmatch[10];
2157 regex_t sreg, *regex;
2158
2159 resbuf = NULL;
2160 residx = 0;
2161 match_no = 0;
2162 regexec_flags = 0;
2163 regex = as_regex(rn, &sreg);
2164 sp = getvar_s(src ? src : intvar[F0]);
2165 replen = strlen(repl);
2166 while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2167 int so = pmatch[0].rm_so;
2168 int eo = pmatch[0].rm_eo;
2169
2170 //bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2171 resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2172 memcpy(resbuf + residx, sp, eo);
2173 residx += eo;
2174 if (++match_no >= nm) {
2175 const char *s;
2176 int nbs;
2177
2178 /* replace */
2179 residx -= (eo - so);
2180 nbs = 0;
2181 for (s = repl; *s; s++) {
2182 char c = resbuf[residx++] = *s;
2183 if (c == '\\') {
2184 nbs++;
2185 continue;
2186 }
2187 if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2188 int j;
2189 residx -= ((nbs + 3) >> 1);
2190 j = 0;
2191 if (c != '&') {
2192 j = c - '0';
2193 nbs++;
2194 }
2195 if (nbs % 2) {
2196 resbuf[residx++] = c;
2197 } else {
2198 int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2199 resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2200 memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2201 residx += n;
2202 }
2203 }
2204 nbs = 0;
2205 }
2206 }
2207
2208 regexec_flags = REG_NOTBOL;
2209 sp += eo;
2210 if (match_no == nm)
2211 break;
2212 if (eo == so) {
2213 /* Empty match (e.g. "b*" will match anywhere).
2214 * Advance by one char. */
2215//BUG (bug 1333):
2216//gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2217//... and will erroneously match "b" even though it is NOT at the word start.
2218//we need REG_NOTBOW but it does not exist...
2219//TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2220//it should be able to do it correctly.
2221 /* Subtle: this is safe only because
2222 * qrealloc allocated at least one extra byte */
2223 resbuf[residx] = *sp;
2224 if (*sp == '\0')
2225 goto ret;
2226 sp++;
2227 residx++;
2228 }
2229 }
2230
2231 resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2232 strcpy(resbuf + residx, sp);
2233 ret:
2234 //bb_error_msg("end sp:'%s'%p", sp,sp);
2235 setvar_p(dest ? dest : intvar[F0], resbuf);
2236 if (regex == &sreg)
2237 regfree(regex);
2238 return match_no;
2239}
2240
2241static NOINLINE int do_mktime(const char *ds)
2242{
2243 struct tm then;
2244 int count;
2245
2246 /*memset(&then, 0, sizeof(then)); - not needed */
2247 then.tm_isdst = -1; /* default is unknown */
2248
2249 /* manpage of mktime says these fields are ints,
2250 * so we can sscanf stuff directly into them */
2251 count = sscanf(ds, "%u %u %u %u %u %u %d",
2252 &then.tm_year, &then.tm_mon, &then.tm_mday,
2253 &then.tm_hour, &then.tm_min, &then.tm_sec,
2254 &then.tm_isdst);
2255
2256 if (count < 6
2257 || (unsigned)then.tm_mon < 1
2258 || (unsigned)then.tm_year < 1900
2259 ) {
2260 return -1;
2261 }
2262
2263 then.tm_mon -= 1;
2264 then.tm_year -= 1900;
2265
2266 return mktime(&then);
2267}
2268
2269static NOINLINE var *exec_builtin(node *op, var *res)
2270{
2271#define tspl (G.exec_builtin__tspl)
2272
2273 var *tv;
2274 node *an[4];
2275 var *av[4];
2276 const char *as[4];
2277 regmatch_t pmatch[2];
2278 regex_t sreg, *re;
2279 node *spl;
2280 uint32_t isr, info;
2281 int nargs;
2282 time_t tt;
2283 int i, l, ll, n;
2284
2285 tv = nvalloc(4);
2286 isr = info = op->info;
2287 op = op->l.n;
2288
2289 av[2] = av[3] = NULL;
2290 for (i = 0; i < 4 && op; i++) {
2291 an[i] = nextarg(&op);
2292 if (isr & 0x09000000)
2293 av[i] = evaluate(an[i], &tv[i]);
2294 if (isr & 0x08000000)
2295 as[i] = getvar_s(av[i]);
2296 isr >>= 1;
2297 }
2298
2299 nargs = i;
2300 if ((uint32_t)nargs < (info >> 30))
2301 syntax_error(EMSG_TOO_FEW_ARGS);
2302
2303 info &= OPNMASK;
2304 switch (info) {
2305
2306 case B_a2:
2307 if (ENABLE_FEATURE_AWK_LIBM)
2308 setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2309 else
2310 syntax_error(EMSG_NO_MATH);
2311 break;
2312
2313 case B_sp: {
2314 char *s, *s1;
2315
2316 if (nargs > 2) {
2317 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
2318 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
2319 } else {
2320 spl = &fsplitter.n;
2321 }
2322
2323 n = awk_split(as[0], spl, &s);
2324 s1 = s;
2325 clear_array(iamarray(av[1]));
2326 for (i = 1; i <= n; i++)
2327 setari_u(av[1], i, nextword(&s));
2328 free(s1);
2329 setvar_i(res, n);
2330 break;
2331 }
2332
2333 case B_ss: {
2334 char *s;
2335
2336 l = strlen(as[0]);
2337 i = getvar_i(av[1]) - 1;
2338 if (i > l)
2339 i = l;
2340 if (i < 0)
2341 i = 0;
2342 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2343 if (n < 0)
2344 n = 0;
2345 s = xstrndup(as[0]+i, n);
2346 setvar_p(res, s);
2347 break;
2348 }
2349
2350 /* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2351 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2352 case B_an:
2353 setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2354 break;
2355
2356 case B_co:
2357 setvar_i(res, ~getvar_i_int(av[0]));
2358 break;
2359
2360 case B_ls:
2361 setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2362 break;
2363
2364 case B_or:
2365 setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2366 break;
2367
2368 case B_rs:
2369 setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2370 break;
2371
2372 case B_xo:
2373 setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2374 break;
2375
2376 case B_lo:
2377 case B_up: {
2378 char *s, *s1;
2379 s1 = s = xstrdup(as[0]);
2380 while (*s1) {
2381 //*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2382 if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2383 *s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2384 s1++;
2385 }
2386 setvar_p(res, s);
2387 break;
2388 }
2389
2390 case B_ix:
2391 n = 0;
2392 ll = strlen(as[1]);
2393 l = strlen(as[0]) - ll;
2394 if (ll > 0 && l >= 0) {
2395 if (!icase) {
2396 char *s = strstr(as[0], as[1]);
2397 if (s)
2398 n = (s - as[0]) + 1;
2399 } else {
2400 /* this piece of code is terribly slow and
2401 * really should be rewritten
2402 */
2403 for (i = 0; i <= l; i++) {
2404 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2405 n = i+1;
2406 break;
2407 }
2408 }
2409 }
2410 }
2411 setvar_i(res, n);
2412 break;
2413
2414 case B_ti:
2415 if (nargs > 1)
2416 tt = getvar_i(av[1]);
2417 else
2418 time(&tt);
2419 //s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2420 i = strftime(g_buf, MAXVARFMT,
2421 ((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2422 localtime(&tt));
2423 g_buf[i] = '\0';
2424 setvar_s(res, g_buf);
2425 break;
2426
2427 case B_mt:
2428 setvar_i(res, do_mktime(as[0]));
2429 break;
2430
2431 case B_ma:
2432 re = as_regex(an[1], &sreg);
2433 n = regexec(re, as[0], 1, pmatch, 0);
2434 if (n == 0) {
2435 pmatch[0].rm_so++;
2436 pmatch[0].rm_eo++;
2437 } else {
2438 pmatch[0].rm_so = 0;
2439 pmatch[0].rm_eo = -1;
2440 }
2441 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
2442 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
2443 setvar_i(res, pmatch[0].rm_so);
2444 if (re == &sreg)
2445 regfree(re);
2446 break;
2447
2448 case B_ge:
2449 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2450 break;
2451
2452 case B_gs:
2453 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2454 break;
2455
2456 case B_su:
2457 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2458 break;
2459 }
2460
2461 nvfree(tv);
2462 return res;
2463#undef tspl
2464}
2465
2466/*
2467 * Evaluate node - the heart of the program. Supplied with subtree
2468 * and place where to store result. returns ptr to result.
2469 */
2470#define XC(n) ((n) >> 8)
2471
2472static var *evaluate(node *op, var *res)
2473{
2474/* This procedure is recursive so we should count every byte */
2475#define fnargs (G.evaluate__fnargs)
2476/* seed is initialized to 1 */
2477#define seed (G.evaluate__seed)
2478#define sreg (G.evaluate__sreg)
2479
2480 var *v1;
2481
2482 if (!op)
2483 return setvar_s(res, NULL);
2484
2485 debug_printf_eval("entered %s()\n", __func__);
2486
2487 v1 = nvalloc(2);
2488
2489 while (op) {
2490 struct {
2491 var *v;
2492 const char *s;
2493 } L = { NULL, NULL };
2494 struct {
2495 var *v;
2496 const char *s;
2497 } R = { NULL, NULL };
2498 static double L_d;
2499 uint32_t opinfo;
2500 int opn;
2501 node *op1;
2502
2503 opinfo = op->info;
2504 opn = (opinfo & OPNMASK);
2505 g_lineno = op->lineno;
2506 op1 = op->l.n;
2507 debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2508
2509 /* execute inevitable things */
2510 if (opinfo & OF_RES1)
2511 L.v = evaluate(op1, v1);
2512 if (opinfo & OF_RES2)
2513 R.v = evaluate(op->r.n, v1+1);
2514 if (opinfo & OF_STR1) {
2515 L.s = getvar_s(L.v);
2516 debug_printf_eval("L.s:'%s'\n", L.s);
2517 }
2518 if (opinfo & OF_STR2) {
2519 R.s = getvar_s(R.v);
2520 debug_printf_eval("R.s:'%s'\n", R.s);
2521 }
2522 if (opinfo & OF_NUM1) {
2523 L_d = getvar_i(L.v);
2524 debug_printf_eval("L_d:%f\n", L_d);
2525 }
2526
2527 debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2528 switch (XC(opinfo & OPCLSMASK)) {
2529
2530 /* -- iterative node type -- */
2531
2532 /* test pattern */
2533 case XC( OC_TEST ):
2534 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2535 /* it's range pattern */
2536 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2537 op->info |= OF_CHECKED;
2538 if (ptest(op1->r.n))
2539 op->info &= ~OF_CHECKED;
2540 op = op->a.n;
2541 } else {
2542 op = op->r.n;
2543 }
2544 } else {
2545 op = ptest(op1) ? op->a.n : op->r.n;
2546 }
2547 break;
2548
2549 /* just evaluate an expression, also used as unconditional jump */
2550 case XC( OC_EXEC ):
2551 break;
2552
2553 /* branch, used in if-else and various loops */
2554 case XC( OC_BR ):
2555 op = istrue(L.v) ? op->a.n : op->r.n;
2556 break;
2557
2558 /* initialize for-in loop */
2559 case XC( OC_WALKINIT ):
2560 hashwalk_init(L.v, iamarray(R.v));
2561 break;
2562
2563 /* get next array item */
2564 case XC( OC_WALKNEXT ):
2565 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2566 break;
2567
2568 case XC( OC_PRINT ):
2569 case XC( OC_PRINTF ): {
2570 FILE *F = stdout;
2571
2572 if (op->r.n) {
2573 rstream *rsm = newfile(R.s);
2574 if (!rsm->F) {
2575 if (opn == '|') {
2576 rsm->F = popen(R.s, "w");
2577 if (rsm->F == NULL)
2578 bb_perror_msg_and_die("popen");
2579 rsm->is_pipe = 1;
2580 } else {
2581 rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2582 }
2583 }
2584 F = rsm->F;
2585 }
2586
2587 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2588 if (!op1) {
2589 fputs(getvar_s(intvar[F0]), F);
2590 } else {
2591 while (op1) {
2592 var *v = evaluate(nextarg(&op1), v1);
2593 if (v->type & VF_NUMBER) {
2594 fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
2595 getvar_i(v), TRUE);
2596 fputs(g_buf, F);
2597 } else {
2598 fputs(getvar_s(v), F);
2599 }
2600
2601 if (op1)
2602 fputs(getvar_s(intvar[OFS]), F);
2603 }
2604 }
2605 fputs(getvar_s(intvar[ORS]), F);
2606
2607 } else { /* OC_PRINTF */
2608 char *s = awk_printf(op1);
2609 fputs(s, F);
2610 free(s);
2611 }
2612 fflush(F);
2613 break;
2614 }
2615
2616 case XC( OC_DELETE ): {
2617 uint32_t info = op1->info & OPCLSMASK;
2618 var *v;
2619
2620 if (info == OC_VAR) {
2621 v = op1->l.v;
2622 } else if (info == OC_FNARG) {
2623 v = &fnargs[op1->l.aidx];
2624 } else {
2625 syntax_error(EMSG_NOT_ARRAY);
2626 }
2627
2628 if (op1->r.n) {
2629 const char *s;
2630 clrvar(L.v);
2631 s = getvar_s(evaluate(op1->r.n, v1));
2632 hash_remove(iamarray(v), s);
2633 } else {
2634 clear_array(iamarray(v));
2635 }
2636 break;
2637 }
2638
2639 case XC( OC_NEWSOURCE ):
2640 g_progname = op->l.new_progname;
2641 break;
2642
2643 case XC( OC_RETURN ):
2644 copyvar(res, L.v);
2645 break;
2646
2647 case XC( OC_NEXTFILE ):
2648 nextfile = TRUE;
2649 case XC( OC_NEXT ):
2650 nextrec = TRUE;
2651 case XC( OC_DONE ):
2652 clrvar(res);
2653 break;
2654
2655 case XC( OC_EXIT ):
2656 awk_exit(L_d);
2657
2658 /* -- recursive node type -- */
2659
2660 case XC( OC_VAR ):
2661 L.v = op->l.v;
2662 if (L.v == intvar[NF])
2663 split_f0();
2664 goto v_cont;
2665
2666 case XC( OC_FNARG ):
2667 L.v = &fnargs[op->l.aidx];
2668 v_cont:
2669 res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
2670 break;
2671
2672 case XC( OC_IN ):
2673 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2674 break;
2675
2676 case XC( OC_REGEXP ):
2677 op1 = op;
2678 L.s = getvar_s(intvar[F0]);
2679 goto re_cont;
2680
2681 case XC( OC_MATCH ):
2682 op1 = op->r.n;
2683 re_cont:
2684 {
2685 regex_t *re = as_regex(op1, &sreg);
2686 int i = regexec(re, L.s, 0, NULL, 0);
2687 if (re == &sreg)
2688 regfree(re);
2689 setvar_i(res, (i == 0) ^ (opn == '!'));
2690 }
2691 break;
2692
2693 case XC( OC_MOVE ):
2694 debug_printf_eval("MOVE\n");
2695 /* if source is a temporary string, jusk relink it to dest */
2696//Disabled: if R.v is numeric but happens to have cached R.v->string,
2697//then L.v ends up being a string, which is wrong
2698// if (R.v == v1+1 && R.v->string) {
2699// res = setvar_p(L.v, R.v->string);
2700// R.v->string = NULL;
2701// } else {
2702 res = copyvar(L.v, R.v);
2703// }
2704 break;
2705
2706 case XC( OC_TERNARY ):
2707 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2708 syntax_error(EMSG_POSSIBLE_ERROR);
2709 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2710 break;
2711
2712 case XC( OC_FUNC ): {
2713 var *vbeg, *v;
2714 const char *sv_progname;
2715
2716 /* The body might be empty, still has to eval the args */
2717 if (!op->r.n->info && !op->r.f->body.first)
2718 syntax_error(EMSG_UNDEF_FUNC);
2719
2720 vbeg = v = nvalloc(op->r.f->nargs + 1);
2721 while (op1) {
2722 var *arg = evaluate(nextarg(&op1), v1);
2723 copyvar(v, arg);
2724 v->type |= VF_CHILD;
2725 v->x.parent = arg;
2726 if (++v - vbeg >= (int) op->r.f->nargs)
2727 break;
2728 }
2729
2730 v = fnargs;
2731 fnargs = vbeg;
2732 sv_progname = g_progname;
2733
2734 res = evaluate(op->r.f->body.first, res);
2735
2736 g_progname = sv_progname;
2737 nvfree(fnargs);
2738 fnargs = v;
2739
2740 break;
2741 }
2742
2743 case XC( OC_GETLINE ):
2744 case XC( OC_PGETLINE ): {
2745 rstream *rsm;
2746 int i;
2747
2748 if (op1) {
2749 rsm = newfile(L.s);
2750 if (!rsm->F) {
2751 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2752 rsm->F = popen(L.s, "r");
2753 rsm->is_pipe = TRUE;
2754 } else {
2755 rsm->F = fopen_for_read(L.s); /* not xfopen! */
2756 }
2757 }
2758 } else {
2759 if (!iF)
2760 iF = next_input_file();
2761 rsm = iF;
2762 }
2763
2764 if (!rsm || !rsm->F) {
2765 setvar_i(intvar[ERRNO], errno);
2766 setvar_i(res, -1);
2767 break;
2768 }
2769
2770 if (!op->r.n)
2771 R.v = intvar[F0];
2772
2773 i = awk_getline(rsm, R.v);
2774 if (i > 0 && !op1) {
2775 incvar(intvar[FNR]);
2776 incvar(intvar[NR]);
2777 }
2778 setvar_i(res, i);
2779 break;
2780 }
2781
2782 /* simple builtins */
2783 case XC( OC_FBLTIN ): {
2784 static double R_d;
2785
2786 switch (opn) {
2787 case F_in:
2788 R_d = (long long)L_d;
2789 break;
2790
2791 case F_rn:
2792 R_d = (double)rand() / (double)RAND_MAX;
2793 break;
2794
2795 case F_co:
2796 if (ENABLE_FEATURE_AWK_LIBM) {
2797 R_d = cos(L_d);
2798 break;
2799 }
2800
2801 case F_ex:
2802 if (ENABLE_FEATURE_AWK_LIBM) {
2803 R_d = exp(L_d);
2804 break;
2805 }
2806
2807 case F_lg:
2808 if (ENABLE_FEATURE_AWK_LIBM) {
2809 R_d = log(L_d);
2810 break;
2811 }
2812
2813 case F_si:
2814 if (ENABLE_FEATURE_AWK_LIBM) {
2815 R_d = sin(L_d);
2816 break;
2817 }
2818
2819 case F_sq:
2820 if (ENABLE_FEATURE_AWK_LIBM) {
2821 R_d = sqrt(L_d);
2822 break;
2823 }
2824
2825 syntax_error(EMSG_NO_MATH);
2826 break;
2827
2828 case F_sr:
2829 R_d = (double)seed;
2830 seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
2831 srand(seed);
2832 break;
2833
2834 case F_ti:
2835 R_d = time(NULL);
2836 break;
2837
2838 case F_le:
2839 debug_printf_eval("length: L.s:'%s'\n", L.s);
2840 if (!op1) {
2841 L.s = getvar_s(intvar[F0]);
2842 debug_printf_eval("length: L.s='%s'\n", L.s);
2843 }
2844 else if (L.v->type & VF_ARRAY) {
2845 R_d = L.v->x.array->nel;
2846 debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
2847 break;
2848 }
2849 R_d = strlen(L.s);
2850 break;
2851
2852 case F_sy:
2853 fflush_all();
2854 R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
2855 ? (system(L.s) >> 8) : 0;
2856 break;
2857
2858 case F_ff:
2859 if (!op1) {
2860 fflush(stdout);
2861 } else if (L.s && *L.s) {
2862 rstream *rsm = newfile(L.s);
2863 fflush(rsm->F);
2864 } else {
2865 fflush_all();
2866 }
2867 break;
2868
2869 case F_cl: {
2870 rstream *rsm;
2871 int err = 0;
2872 rsm = (rstream *)hash_search(fdhash, L.s);
2873 debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
2874 if (rsm) {
2875 debug_printf_eval("OC_FBLTIN F_cl "
2876 "rsm->is_pipe:%d, ->F:%p\n",
2877 rsm->is_pipe, rsm->F);
2878 /* Can be NULL if open failed. Example:
2879 * getline line <"doesnt_exist";
2880 * close("doesnt_exist"); <--- here rsm->F is NULL
2881 */
2882 if (rsm->F)
2883 err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
2884 free(rsm->buffer);
2885 hash_remove(fdhash, L.s);
2886 }
2887 if (err)
2888 setvar_i(intvar[ERRNO], errno);
2889 R_d = (double)err;
2890 break;
2891 }
2892 } /* switch */
2893 setvar_i(res, R_d);
2894 break;
2895 }
2896
2897 case XC( OC_BUILTIN ):
2898 res = exec_builtin(op, res);
2899 break;
2900
2901 case XC( OC_SPRINTF ):
2902 setvar_p(res, awk_printf(op1));
2903 break;
2904
2905 case XC( OC_UNARY ): {
2906 double Ld, R_d;
2907
2908 Ld = R_d = getvar_i(R.v);
2909 switch (opn) {
2910 case 'P':
2911 Ld = ++R_d;
2912 goto r_op_change;
2913 case 'p':
2914 R_d++;
2915 goto r_op_change;
2916 case 'M':
2917 Ld = --R_d;
2918 goto r_op_change;
2919 case 'm':
2920 R_d--;
2921 r_op_change:
2922 setvar_i(R.v, R_d);
2923 break;
2924 case '!':
2925 Ld = !istrue(R.v);
2926 break;
2927 case '-':
2928 Ld = -R_d;
2929 break;
2930 }
2931 setvar_i(res, Ld);
2932 break;
2933 }
2934
2935 case XC( OC_FIELD ): {
2936 int i = (int)getvar_i(R.v);
2937 if (i == 0) {
2938 res = intvar[F0];
2939 } else {
2940 split_f0();
2941 if (i > nfields)
2942 fsrealloc(i);
2943 res = &Fields[i - 1];
2944 }
2945 break;
2946 }
2947
2948 /* concatenation (" ") and index joining (",") */
2949 case XC( OC_CONCAT ):
2950 case XC( OC_COMMA ): {
2951 const char *sep = "";
2952 if ((opinfo & OPCLSMASK) == OC_COMMA)
2953 sep = getvar_s(intvar[SUBSEP]);
2954 setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
2955 break;
2956 }
2957
2958 case XC( OC_LAND ):
2959 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2960 break;
2961
2962 case XC( OC_LOR ):
2963 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2964 break;
2965
2966 case XC( OC_BINARY ):
2967 case XC( OC_REPLACE ): {
2968 double R_d = getvar_i(R.v);
2969 debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
2970 switch (opn) {
2971 case '+':
2972 L_d += R_d;
2973 break;
2974 case '-':
2975 L_d -= R_d;
2976 break;
2977 case '*':
2978 L_d *= R_d;
2979 break;
2980 case '/':
2981 if (R_d == 0)
2982 syntax_error(EMSG_DIV_BY_ZERO);
2983 L_d /= R_d;
2984 break;
2985 case '&':
2986 if (ENABLE_FEATURE_AWK_LIBM)
2987 L_d = pow(L_d, R_d);
2988 else
2989 syntax_error(EMSG_NO_MATH);
2990 break;
2991 case '%':
2992 if (R_d == 0)
2993 syntax_error(EMSG_DIV_BY_ZERO);
2994 L_d -= (long long)(L_d / R_d) * R_d;
2995 break;
2996 }
2997 debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
2998 res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
2999 break;
3000 }
3001
3002 case XC( OC_COMPARE ): {
3003 static int i;
3004 double Ld;
3005
3006 if (is_numeric(L.v) && is_numeric(R.v)) {
3007 Ld = getvar_i(L.v) - getvar_i(R.v);
3008 } else {
3009 const char *l = getvar_s(L.v);
3010 const char *r = getvar_s(R.v);
3011 Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3012 }
3013 switch (opn & 0xfe) {
3014 case 0:
3015 i = (Ld > 0);
3016 break;
3017 case 2:
3018 i = (Ld >= 0);
3019 break;
3020 case 4:
3021 i = (Ld == 0);
3022 break;
3023 }
3024 setvar_i(res, (i == 0) ^ (opn & 1));
3025 break;
3026 }
3027
3028 default:
3029 syntax_error(EMSG_POSSIBLE_ERROR);
3030 }
3031 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3032 op = op->a.n;
3033 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3034 break;
3035 if (nextrec)
3036 break;
3037 } /* while (op) */
3038
3039 nvfree(v1);
3040 debug_printf_eval("returning from %s(): %p\n", __func__, res);
3041 return res;
3042#undef fnargs
3043#undef seed
3044#undef sreg
3045}
3046
3047
3048/* -------- main & co. -------- */
3049
3050static int awk_exit(int r)
3051{
3052 var tv;
3053 unsigned i;
3054 hash_item *hi;
3055
3056 zero_out_var(&tv);
3057
3058 if (!exiting) {
3059 exiting = TRUE;
3060 nextrec = FALSE;
3061 evaluate(endseq.first, &tv);
3062 }
3063
3064 /* waiting for children */
3065 for (i = 0; i < fdhash->csize; i++) {
3066 hi = fdhash->items[i];
3067 while (hi) {
3068 if (hi->data.rs.F && hi->data.rs.is_pipe)
3069 pclose(hi->data.rs.F);
3070 hi = hi->next;
3071 }
3072 }
3073
3074 exit(r);
3075}
3076
3077/* if expr looks like "var=value", perform assignment and return 1,
3078 * otherwise return 0 */
3079static int is_assignment(const char *expr)
3080{
3081 char *exprc, *val;
3082
3083 if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
3084 return FALSE;
3085 }
3086
3087 exprc = xstrdup(expr);
3088 val = exprc + (val - expr);
3089 *val++ = '\0';
3090
3091 unescape_string_in_place(val);
3092 setvar_u(newvar(exprc), val);
3093 free(exprc);
3094 return TRUE;
3095}
3096
3097/* switch to next input file */
3098static rstream *next_input_file(void)
3099{
3100#define rsm (G.next_input_file__rsm)
3101#define files_happen (G.next_input_file__files_happen)
3102
3103 FILE *F;
3104 const char *fname, *ind;
3105
3106 if (rsm.F)
3107 fclose(rsm.F);
3108 rsm.F = NULL;
3109 rsm.pos = rsm.adv = 0;
3110
3111 for (;;) {
3112 if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
3113 if (files_happen)
3114 return NULL;
3115 fname = "-";
3116 F = stdin;
3117 break;
3118 }
3119 ind = getvar_s(incvar(intvar[ARGIND]));
3120 fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
3121 if (fname && *fname && !is_assignment(fname)) {
3122 F = xfopen_stdin(fname);
3123 break;
3124 }
3125 }
3126
3127 files_happen = TRUE;
3128 setvar_s(intvar[FILENAME], fname);
3129 rsm.F = F;
3130 return &rsm;
3131#undef rsm
3132#undef files_happen
3133}
3134
3135int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
3136int awk_main(int argc, char **argv)
3137{
3138 unsigned opt;
3139 char *opt_F;
3140 llist_t *list_v = NULL;
3141 llist_t *list_f = NULL;
3142#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3143 llist_t *list_e = NULL;
3144#endif
3145 int i, j;
3146 var *v;
3147 var tv;
3148 char **envp;
3149 char *vnames = (char *)vNames; /* cheat */
3150 char *vvalues = (char *)vValues;
3151
3152 INIT_G();
3153
3154 /* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3155 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3156 if (ENABLE_LOCALE_SUPPORT)
3157 setlocale(LC_NUMERIC, "C");
3158
3159 zero_out_var(&tv);
3160
3161 /* allocate global buffer */
3162 g_buf = xmalloc(MAXVARFMT + 1);
3163
3164 vhash = hash_init();
3165 ahash = hash_init();
3166 fdhash = hash_init();
3167 fnhash = hash_init();
3168
3169 /* initialize variables */
3170 for (i = 0; *vnames; i++) {
3171 intvar[i] = v = newvar(nextword(&vnames));
3172 if (*vvalues != '\377')
3173 setvar_s(v, nextword(&vvalues));
3174 else
3175 setvar_i(v, 0);
3176
3177 if (*vnames == '*') {
3178 v->type |= VF_SPECIAL;
3179 vnames++;
3180 }
3181 }
3182
3183 handle_special(intvar[FS]);
3184 handle_special(intvar[RS]);
3185
3186 newfile("/dev/stdin")->F = stdin;
3187 newfile("/dev/stdout")->F = stdout;
3188 newfile("/dev/stderr")->F = stderr;
3189
3190 /* Huh, people report that sometimes environ is NULL. Oh well. */
3191 if (environ) for (envp = environ; *envp; envp++) {
3192 /* environ is writable, thus we don't strdup it needlessly */
3193 char *s = *envp;
3194 char *s1 = strchr(s, '=');
3195 if (s1) {
3196 *s1 = '\0';
3197 /* Both findvar and setvar_u take const char*
3198 * as 2nd arg -> environment is not trashed */
3199 setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3200 *s1 = '=';
3201 }
3202 }
3203 opt_complementary = OPTCOMPLSTR_AWK;
3204 opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3205 argv += optind;
3206 argc -= optind;
3207 if (opt & OPT_W)
3208 bb_error_msg("warning: option -W is ignored");
3209 if (opt & OPT_F) {
3210 unescape_string_in_place(opt_F);
3211 setvar_s(intvar[FS], opt_F);
3212 }
3213 while (list_v) {
3214 if (!is_assignment(llist_pop(&list_v)))
3215 bb_show_usage();
3216 }
3217 while (list_f) {
3218 char *s = NULL;
3219 FILE *from_file;
3220
3221 g_progname = llist_pop(&list_f);
3222 from_file = xfopen_stdin(g_progname);
3223 /* one byte is reserved for some trick in next_token */
3224 for (i = j = 1; j > 0; i += j) {
3225 s = xrealloc(s, i + 4096);
3226 j = fread(s + i, 1, 4094, from_file);
3227 }
3228 s[i] = '\0';
3229 fclose(from_file);
3230 parse_program(s + 1);
3231 free(s);
3232 }
3233 g_progname = "cmd. line";
3234#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3235 while (list_e) {
3236 parse_program(llist_pop(&list_e));
3237 }
3238#endif
3239 if (!(opt & (OPT_f | OPT_e))) {
3240 if (!*argv)
3241 bb_show_usage();
3242 parse_program(*argv++);
3243 argc--;
3244 }
3245
3246 /* fill in ARGV array */
3247 setvar_i(intvar[ARGC], argc + 1);
3248 setari_u(intvar[ARGV], 0, "awk");
3249 i = 0;
3250 while (*argv)
3251 setari_u(intvar[ARGV], ++i, *argv++);
3252
3253 evaluate(beginseq.first, &tv);
3254 if (!mainseq.first && !endseq.first)
3255 awk_exit(EXIT_SUCCESS);
3256
3257 /* input file could already be opened in BEGIN block */
3258 if (!iF)
3259 iF = next_input_file();
3260
3261 /* passing through input files */
3262 while (iF) {
3263 nextfile = FALSE;
3264 setvar_i(intvar[FNR], 0);
3265
3266 while ((i = awk_getline(iF, intvar[F0])) > 0) {
3267 nextrec = FALSE;
3268 incvar(intvar[NR]);
3269 incvar(intvar[FNR]);
3270 evaluate(mainseq.first, &tv);
3271
3272 if (nextfile)
3273 break;
3274 }
3275
3276 if (i < 0)
3277 syntax_error(strerror(errno));
3278
3279 iF = next_input_file();
3280 }
3281
3282 awk_exit(EXIT_SUCCESS);
3283 /*return 0;*/
3284}
3285