summaryrefslogtreecommitdiff
path: root/networking/wget.c (plain)
blob: cf0769e5ecd0ac7e76dd0f0c309a01ca9f959dab
1/* vi: set sw=4 ts=4: */
2/*
3 * wget - retrieve a file using HTTP or FTP
4 *
5 * Chip Rosenthal Covad Communications <chip@laserlink.net>
6 * Licensed under GPLv2, see file LICENSE in this source tree.
7 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
9 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
10 */
11
12//config:config WGET
13//config: bool "wget"
14//config: default y
15//config: help
16//config: wget is a utility for non-interactive download of files from HTTP
17//config: and FTP servers.
18//config:
19//config:config FEATURE_WGET_STATUSBAR
20//config: bool "Enable a nifty process meter (+2k)"
21//config: default y
22//config: depends on WGET
23//config: help
24//config: Enable the transfer progress bar for wget transfers.
25//config:
26//config:config FEATURE_WGET_AUTHENTICATION
27//config: bool "Enable HTTP authentication"
28//config: default y
29//config: depends on WGET
30//config: help
31//config: Support authenticated HTTP transfers.
32//config:
33//config:config FEATURE_WGET_LONG_OPTIONS
34//config: bool "Enable long options"
35//config: default y
36//config: depends on WGET && LONG_OPTS
37//config: help
38//config: Support long options for the wget applet.
39//config:
40//config:config FEATURE_WGET_TIMEOUT
41//config: bool "Enable timeout option -T SEC"
42//config: default y
43//config: depends on WGET
44//config: help
45//config: Supports network read and connect timeouts for wget,
46//config: so that wget will give up and timeout, through the -T
47//config: command line option.
48//config:
49//config: Currently only connect and network data read timeout are
50//config: supported (i.e., timeout is not applied to the DNS query). When
51//config: FEATURE_WGET_LONG_OPTIONS is also enabled, the --timeout option
52//config: will work in addition to -T.
53//config:
54//config:config FEATURE_WGET_OPENSSL
55//config: bool "Try to connect to HTTPS using openssl"
56//config: default y
57//config: depends on WGET
58//config: help
59//config: Choose how wget establishes SSL connection for https:// URLs.
60//config:
61//config: Busybox itself contains no SSL code. wget will spawn
62//config: a helper program to talk over HTTPS.
63//config:
64//config: OpenSSL has a simple SSL client for debug purposes.
65//config: If you select "openssl" helper, wget will effectively run:
66//config: "openssl s_client -quiet -connect hostname:443
67//config: -servername hostname 2>/dev/null" and pipe its data
68//config: through it. -servername is not used if hostname is numeric.
69//config: Note inconvenient API: host resolution is done twice,
70//config: and there is no guarantee openssl's idea of IPv6 address
71//config: format is the same as ours.
72//config: Another problem is that s_client prints debug information
73//config: to stderr, and it needs to be suppressed. This means
74//config: all error messages get suppressed too.
75//config: openssl is also a big binary, often dynamically linked
76//config: against ~15 libraries.
77//config:
78//config:config FEATURE_WGET_SSL_HELPER
79//config: bool "Try to connect to HTTPS using ssl_helper"
80//config: default y
81//config: depends on WGET
82//config: help
83//config: Choose how wget establishes SSL connection for https:// URLs.
84//config:
85//config: Busybox itself contains no SSL code. wget will spawn
86//config: a helper program to talk over HTTPS.
87//config:
88//config: ssl_helper is a tool which can be built statically
89//config: from busybox sources against a small embedded SSL library.
90//config: Please see networking/ssl_helper/README.
91//config: It does not require double host resolution and emits
92//config: error messages to stderr.
93//config:
94//config: Precompiled static binary may be available at
95//config: http://busybox.net/downloads/binaries/
96
97//applet:IF_WGET(APPLET(wget, BB_DIR_USR_BIN, BB_SUID_DROP))
98
99//kbuild:lib-$(CONFIG_WGET) += wget.o
100
101//usage:#define wget_trivial_usage
102//usage: IF_FEATURE_WGET_LONG_OPTIONS(
103//usage: "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
104//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
105/* Since we ignore these opts, we don't show them in --help */
106/* //usage: " [--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
107/* //usage: " [-nv] [-nc] [-nH] [-np]" */
108//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
109//usage: )
110//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
111//usage: "[-cq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
112//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
113//usage: )
114//usage:#define wget_full_usage "\n\n"
115//usage: "Retrieve files via HTTP or FTP\n"
116//usage: IF_FEATURE_WGET_LONG_OPTIONS(
117//usage: "\n --spider Spider mode - only check file existence"
118//usage: )
119//usage: "\n -c Continue retrieval of aborted transfer"
120//usage: "\n -q Quiet"
121//usage: "\n -P DIR Save to DIR (default .)"
122//usage: IF_FEATURE_WGET_TIMEOUT(
123//usage: "\n -T SEC Network read timeout is SEC seconds"
124//usage: )
125//usage: "\n -O FILE Save to FILE ('-' for stdout)"
126//usage: "\n -U STR Use STR for User-Agent header"
127//usage: "\n -Y on/off Use proxy"
128
129#include "libbb.h"
130
131#if 0
132# define log_io(...) bb_error_msg(__VA_ARGS__)
133# define SENDFMT(fp, fmt, ...) \
134 do { \
135 log_io("> " fmt, ##__VA_ARGS__); \
136 fprintf(fp, fmt, ##__VA_ARGS__); \
137 } while (0);
138#else
139# define log_io(...) ((void)0)
140# define SENDFMT(fp, fmt, ...) fprintf(fp, fmt, ##__VA_ARGS__)
141#endif
142
143
144struct host_info {
145 char *allocated;
146 const char *path;
147 char *user;
148 const char *protocol;
149 char *host;
150 int port;
151};
152static const char P_FTP[] ALIGN1 = "ftp";
153static const char P_HTTP[] ALIGN1 = "http";
154#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
155static const char P_HTTPS[] ALIGN1 = "https";
156#endif
157
158#if ENABLE_FEATURE_WGET_LONG_OPTIONS
159/* User-specified headers prevent using our corresponding built-in headers. */
160enum {
161 HDR_HOST = (1<<0),
162 HDR_USER_AGENT = (1<<1),
163 HDR_RANGE = (1<<2),
164 HDR_AUTH = (1<<3) * ENABLE_FEATURE_WGET_AUTHENTICATION,
165 HDR_PROXY_AUTH = (1<<4) * ENABLE_FEATURE_WGET_AUTHENTICATION,
166};
167static const char wget_user_headers[] ALIGN1 =
168 "Host:\0"
169 "User-Agent:\0"
170 "Range:\0"
171# if ENABLE_FEATURE_WGET_AUTHENTICATION
172 "Authorization:\0"
173 "Proxy-Authorization:\0"
174# endif
175 ;
176# define USR_HEADER_HOST (G.user_headers & HDR_HOST)
177# define USR_HEADER_USER_AGENT (G.user_headers & HDR_USER_AGENT)
178# define USR_HEADER_RANGE (G.user_headers & HDR_RANGE)
179# define USR_HEADER_AUTH (G.user_headers & HDR_AUTH)
180# define USR_HEADER_PROXY_AUTH (G.user_headers & HDR_PROXY_AUTH)
181#else /* No long options, no user-headers :( */
182# define USR_HEADER_HOST 0
183# define USR_HEADER_USER_AGENT 0
184# define USR_HEADER_RANGE 0
185# define USR_HEADER_AUTH 0
186# define USR_HEADER_PROXY_AUTH 0
187#endif
188
189/* Globals */
190struct globals {
191 off_t content_len; /* Content-length of the file */
192 off_t beg_range; /* Range at which continue begins */
193#if ENABLE_FEATURE_WGET_STATUSBAR
194 off_t transferred; /* Number of bytes transferred so far */
195 const char *curfile; /* Name of current file being transferred */
196 bb_progress_t pmt;
197#endif
198 char *dir_prefix;
199#if ENABLE_FEATURE_WGET_LONG_OPTIONS
200 char *post_data;
201 char *extra_headers;
202 unsigned char user_headers; /* Headers mentioned by the user */
203#endif
204 char *fname_out; /* where to direct output (-O) */
205 const char *proxy_flag; /* Use proxies if env vars are set */
206 const char *user_agent; /* "User-Agent" header field */
207#if ENABLE_FEATURE_WGET_TIMEOUT
208 unsigned timeout_seconds;
209 bool die_if_timed_out;
210#endif
211 int output_fd;
212 int o_flags;
213 smallint chunked; /* chunked transfer encoding */
214 smallint got_clen; /* got content-length: from server */
215 /* Local downloads do benefit from big buffer.
216 * With 512 byte buffer, it was measured to be
217 * an order of magnitude slower than with big one.
218 */
219 uint64_t just_to_align_next_member;
220 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
221} FIX_ALIASING;
222#define G (*ptr_to_globals)
223#define INIT_G() do { \
224 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
225} while (0)
226#define FINI_G() do { \
227 FREE_PTR_TO_GLOBALS(); \
228} while (0)
229
230
231/* Must match option string! */
232enum {
233 WGET_OPT_CONTINUE = (1 << 0),
234 WGET_OPT_QUIET = (1 << 1),
235 WGET_OPT_OUTNAME = (1 << 2),
236 WGET_OPT_PREFIX = (1 << 3),
237 WGET_OPT_PROXY = (1 << 4),
238 WGET_OPT_USER_AGENT = (1 << 5),
239 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 6),
240 WGET_OPT_RETRIES = (1 << 7),
241 WGET_OPT_nsomething = (1 << 8),
242 WGET_OPT_HEADER = (1 << 9) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
243 WGET_OPT_POST_DATA = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
244 WGET_OPT_SPIDER = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
245};
246
247enum {
248 PROGRESS_START = -1,
249 PROGRESS_END = 0,
250 PROGRESS_BUMP = 1,
251};
252#if ENABLE_FEATURE_WGET_STATUSBAR
253static void progress_meter(int flag)
254{
255 if (option_mask32 & WGET_OPT_QUIET)
256 return;
257
258 if (flag == PROGRESS_START)
259 bb_progress_init(&G.pmt, G.curfile);
260
261 bb_progress_update(&G.pmt,
262 G.beg_range,
263 G.transferred,
264 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
265 );
266
267 if (flag == PROGRESS_END) {
268 bb_progress_free(&G.pmt);
269 bb_putchar_stderr('\n');
270 G.transferred = 0;
271 }
272}
273#else
274static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
275#endif
276
277
278/* IPv6 knows scoped address types i.e. link and site local addresses. Link
279 * local addresses can have a scope identifier to specify the
280 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
281 * identifier is only valid on a single node.
282 *
283 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
284 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
285 * in the Host header as invalid requests, see
286 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
287 */
288static void strip_ipv6_scope_id(char *host)
289{
290 char *scope, *cp;
291
292 /* bbox wget actually handles IPv6 addresses without [], like
293 * wget "http://::1/xxx", but this is not standard.
294 * To save code, _here_ we do not support it. */
295
296 if (host[0] != '[')
297 return; /* not IPv6 */
298
299 scope = strchr(host, '%');
300 if (!scope)
301 return;
302
303 /* Remove the IPv6 zone identifier from the host address */
304 cp = strchr(host, ']');
305 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
306 /* malformed address (not "[xx]:nn" or "[xx]") */
307 return;
308 }
309
310 /* cp points to "]...", scope points to "%eth0]..." */
311 overlapping_strcpy(scope, cp);
312}
313
314#if ENABLE_FEATURE_WGET_AUTHENTICATION
315/* Base64-encode character string. */
316static char *base64enc(const char *str)
317{
318 unsigned len = strlen(str);
319 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
320 len = sizeof(G.wget_buf)/4*3 - 10;
321 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
322 return G.wget_buf;
323}
324#endif
325
326static char* sanitize_string(char *s)
327{
328 unsigned char *p = (void *) s;
329 while (*p >= ' ')
330 p++;
331 *p = '\0';
332 return s;
333}
334
335#if ENABLE_FEATURE_WGET_TIMEOUT
336static void alarm_handler(int sig UNUSED_PARAM)
337{
338 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
339 if (G.die_if_timed_out)
340 bb_error_msg_and_die("download timed out");
341}
342static void set_alarm(void)
343{
344 if (G.timeout_seconds) {
345 alarm(G.timeout_seconds);
346 G.die_if_timed_out = 1;
347 }
348}
349# define clear_alarm() ((void)(G.die_if_timed_out = 0))
350#else
351# define set_alarm() ((void)0)
352# define clear_alarm() ((void)0)
353#endif
354
355#if ENABLE_FEATURE_WGET_OPENSSL
356/*
357 * is_ip_address() attempts to verify whether or not a string
358 * contains an IPv4 or IPv6 address (vs. an FQDN). The result
359 * of inet_pton() can be used to determine this.
360 *
361 * TODO add proper error checking when inet_pton() returns -1
362 * (some form of system error has occurred, and errno is set)
363 */
364static int is_ip_address(const char *string)
365{
366 struct sockaddr_in sa;
367
368 int result = inet_pton(AF_INET, string, &(sa.sin_addr));
369# if ENABLE_FEATURE_IPV6
370 if (result == 0) {
371 struct sockaddr_in6 sa6;
372 result = inet_pton(AF_INET6, string, &(sa6.sin6_addr));
373 }
374# endif
375 return (result == 1);
376}
377#endif
378
379static FILE *open_socket(len_and_sockaddr *lsa)
380{
381 int fd;
382 FILE *fp;
383
384 set_alarm();
385 fd = xconnect_stream(lsa);
386 clear_alarm();
387
388 /* glibc 2.4 seems to try seeking on it - ??! */
389 /* hopefully it understands what ESPIPE means... */
390 fp = fdopen(fd, "r+");
391 if (!fp)
392 bb_perror_msg_and_die("%s", bb_msg_memory_exhausted);
393
394 return fp;
395}
396
397/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
398static char fgets_and_trim(FILE *fp)
399{
400 char c;
401 char *buf_ptr;
402
403 set_alarm();
404 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
405 bb_perror_msg_and_die("error getting response");
406 clear_alarm();
407
408 buf_ptr = strchrnul(G.wget_buf, '\n');
409 c = *buf_ptr;
410 *buf_ptr = '\0';
411 buf_ptr = strchrnul(G.wget_buf, '\r');
412 *buf_ptr = '\0';
413
414 log_io("< %s", G.wget_buf);
415
416 return c;
417}
418
419static int ftpcmd(const char *s1, const char *s2, FILE *fp)
420{
421 int result;
422 if (s1) {
423 if (!s2)
424 s2 = "";
425 fprintf(fp, "%s%s\r\n", s1, s2);
426 fflush(fp);
427 log_io("> %s%s", s1, s2);
428 }
429
430 do {
431 fgets_and_trim(fp);
432 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
433
434 G.wget_buf[3] = '\0';
435 result = xatoi_positive(G.wget_buf);
436 G.wget_buf[3] = ' ';
437 return result;
438}
439
440static void parse_url(const char *src_url, struct host_info *h)
441{
442 char *url, *p, *sp;
443
444 free(h->allocated);
445 h->allocated = url = xstrdup(src_url);
446
447 h->protocol = P_FTP;
448 p = strstr(url, "://");
449 if (p) {
450 *p = '\0';
451 h->host = p + 3;
452 if (strcmp(url, P_FTP) == 0) {
453 h->port = bb_lookup_port(P_FTP, "tcp", 21);
454 } else
455#if ENABLE_FEATURE_WGET_OPENSSL || ENABLE_FEATURE_WGET_SSL_HELPER
456 if (strcmp(url, P_HTTPS) == 0) {
457 h->port = bb_lookup_port(P_HTTPS, "tcp", 443);
458 h->protocol = P_HTTPS;
459 } else
460#endif
461 if (strcmp(url, P_HTTP) == 0) {
462 http:
463 h->port = bb_lookup_port(P_HTTP, "tcp", 80);
464 h->protocol = P_HTTP;
465 } else {
466 *p = ':';
467 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
468 }
469 } else {
470 // GNU wget is user-friendly and falls back to http://
471 h->host = url;
472 goto http;
473 }
474
475 // FYI:
476 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
477 // 'GET /?var=a/b HTTP 1.0'
478 // and saves 'index.html?var=a%2Fb' (we save 'b')
479 // wget 'http://busybox.net?login=john@doe':
480 // request: 'GET /?login=john@doe HTTP/1.0'
481 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
482 // wget 'http://busybox.net#test/test':
483 // request: 'GET / HTTP/1.0'
484 // saves: 'index.html' (we save 'test')
485 //
486 // We also don't add unique .N suffix if file exists...
487 sp = strchr(h->host, '/');
488 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
489 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
490 if (!sp) {
491 h->path = "";
492 } else if (*sp == '/') {
493 *sp = '\0';
494 h->path = sp + 1;
495 } else { // '#' or '?'
496 // http://busybox.net?login=john@doe is a valid URL
497 // memmove converts to:
498 // http:/busybox.nett?login=john@doe...
499 memmove(h->host - 1, h->host, sp - h->host);
500 h->host--;
501 sp[-1] = '\0';
502 h->path = sp;
503 }
504
505 sp = strrchr(h->host, '@');
506 if (sp != NULL) {
507 // URL-decode "user:password" string before base64-encoding:
508 // wget http://test:my%20pass@example.com should send
509 // Authorization: Basic dGVzdDpteSBwYXNz
510 // which decodes to "test:my pass".
511 // Standard wget and curl do this too.
512 *sp = '\0';
513 free(h->user);
514 h->user = xstrdup(percent_decode_in_place(h->host, /*strict:*/ 0));
515 h->host = sp + 1;
516 }
517 /* else: h->user remains NULL, or as set by original request
518 * before redirect (if we are here after a redirect).
519 */
520}
521
522static char *gethdr(FILE *fp)
523{
524 char *s, *hdrval;
525 int c;
526
527 /* retrieve header line */
528 c = fgets_and_trim(fp);
529
530 /* end of the headers? */
531 if (G.wget_buf[0] == '\0')
532 return NULL;
533
534 /* convert the header name to lower case */
535 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
536 /*
537 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
538 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
539 * "A-Z" maps to "a-z".
540 * "@[\]" can't occur in header names.
541 * "^_" maps to "~,DEL" (which is wrong).
542 * "^" was never seen yet, "_" was seen from web.archive.org
543 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
544 */
545 *s |= 0x20;
546 }
547
548 /* verify we are at the end of the header name */
549 if (*s != ':')
550 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
551
552 /* locate the start of the header value */
553 *s++ = '\0';
554 hdrval = skip_whitespace(s);
555
556 if (c != '\n') {
557 /* Rats! The buffer isn't big enough to hold the entire header value */
558 while (c = getc(fp), c != EOF && c != '\n')
559 continue;
560 }
561
562 return hdrval;
563}
564
565static void reset_beg_range_to_zero(void)
566{
567 bb_error_msg("restart failed");
568 G.beg_range = 0;
569 xlseek(G.output_fd, 0, SEEK_SET);
570 /* Done at the end instead: */
571 /* ftruncate(G.output_fd, 0); */
572}
573
574static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
575{
576 FILE *sfp;
577 char *str;
578 int port;
579
580 if (!target->user)
581 target->user = xstrdup("anonymous:busybox@");
582
583 sfp = open_socket(lsa);
584 if (ftpcmd(NULL, NULL, sfp) != 220)
585 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
586
587 /*
588 * Splitting username:password pair,
589 * trying to log in
590 */
591 str = strchr(target->user, ':');
592 if (str)
593 *str++ = '\0';
594 switch (ftpcmd("USER ", target->user, sfp)) {
595 case 230:
596 break;
597 case 331:
598 if (ftpcmd("PASS ", str, sfp) == 230)
599 break;
600 /* fall through (failed login) */
601 default:
602 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
603 }
604
605 ftpcmd("TYPE I", NULL, sfp);
606
607 /*
608 * Querying file size
609 */
610 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
611 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
612 if (G.content_len < 0 || errno) {
613 bb_error_msg_and_die("SIZE value is garbage");
614 }
615 G.got_clen = 1;
616 }
617
618 /*
619 * Entering passive mode
620 */
621 if (ftpcmd("PASV", NULL, sfp) != 227) {
622 pasv_error:
623 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
624 }
625 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
626 // Server's IP is N1.N2.N3.N4 (we ignore it)
627 // Server's port for data connection is P1*256+P2
628 str = strrchr(G.wget_buf, ')');
629 if (str) str[0] = '\0';
630 str = strrchr(G.wget_buf, ',');
631 if (!str) goto pasv_error;
632 port = xatou_range(str+1, 0, 255);
633 *str = '\0';
634 str = strrchr(G.wget_buf, ',');
635 if (!str) goto pasv_error;
636 port += xatou_range(str+1, 0, 255) * 256;
637 set_nport(&lsa->u.sa, htons(port));
638
639 *dfpp = open_socket(lsa);
640
641 if (G.beg_range != 0) {
642 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
643 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
644 G.content_len -= G.beg_range;
645 else
646 reset_beg_range_to_zero();
647 }
648
649 if (ftpcmd("RETR ", target->path, sfp) > 150)
650 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
651
652 return sfp;
653}
654
655#if ENABLE_FEATURE_WGET_OPENSSL
656static int spawn_https_helper_openssl(const char *host, unsigned port)
657{
658 char *allocated = NULL;
659 char *servername;
660 int sp[2];
661 int pid;
662 IF_FEATURE_WGET_SSL_HELPER(volatile int child_failed = 0;)
663
664 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
665 /* Kernel can have AF_UNIX support disabled */
666 bb_perror_msg_and_die("socketpair");
667
668 if (!strchr(host, ':'))
669 host = allocated = xasprintf("%s:%u", host, port);
670 servername = xstrdup(host);
671 strrchr(servername, ':')[0] = '\0';
672
673 fflush_all();
674 pid = xvfork();
675 if (pid == 0) {
676 /* Child */
677 char *argv[8];
678
679 close(sp[0]);
680 xmove_fd(sp[1], 0);
681 xdup2(0, 1);
682 /*
683 * openssl s_client -quiet -connect www.kernel.org:443 2>/dev/null
684 * It prints some debug stuff on stderr, don't know how to suppress it.
685 * Work around by dev-nulling stderr. We lose all error messages :(
686 */
687 xmove_fd(2, 3);
688 xopen("/dev/null", O_RDWR);
689 memset(&argv, 0, sizeof(argv));
690 argv[0] = (char*)"openssl";
691 argv[1] = (char*)"s_client";
692 argv[2] = (char*)"-quiet";
693 argv[3] = (char*)"-connect";
694 argv[4] = (char*)host;
695 /*
696 * Per RFC 6066 Section 3, the only permitted values in the
697 * TLS server_name (SNI) field are FQDNs (DNS hostnames).
698 * IPv4 and IPv6 addresses, port numbers are not allowed.
699 */
700 if (!is_ip_address(servername)) {
701 argv[5] = (char*)"-servername";
702 argv[6] = (char*)servername;
703 }
704
705 BB_EXECVP(argv[0], argv);
706 xmove_fd(3, 2);
707# if ENABLE_FEATURE_WGET_SSL_HELPER
708 child_failed = 1;
709 xfunc_die();
710# else
711 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
712# endif
713 /* notreached */
714 }
715
716 /* Parent */
717 free(servername);
718 free(allocated);
719 close(sp[1]);
720# if ENABLE_FEATURE_WGET_SSL_HELPER
721 if (child_failed) {
722 close(sp[0]);
723 return -1;
724 }
725# endif
726 return sp[0];
727}
728#endif
729
730/* See networking/ssl_helper/README how to build one */
731#if ENABLE_FEATURE_WGET_SSL_HELPER
732static void spawn_https_helper_small(int network_fd)
733{
734 int sp[2];
735 int pid;
736
737 if (socketpair(AF_UNIX, SOCK_STREAM, 0, sp) != 0)
738 /* Kernel can have AF_UNIX support disabled */
739 bb_perror_msg_and_die("socketpair");
740
741 pid = BB_MMU ? xfork() : xvfork();
742 if (pid == 0) {
743 /* Child */
744 char *argv[3];
745
746 close(sp[0]);
747 xmove_fd(sp[1], 0);
748 xdup2(0, 1);
749 xmove_fd(network_fd, 3);
750 /*
751 * A simple ssl/tls helper
752 */
753 argv[0] = (char*)"ssl_helper";
754 argv[1] = (char*)"-d3";
755 argv[2] = NULL;
756 BB_EXECVP(argv[0], argv);
757 bb_perror_msg_and_die("can't execute '%s'", argv[0]);
758 /* notreached */
759 }
760
761 /* Parent */
762 close(sp[1]);
763 xmove_fd(sp[0], network_fd);
764}
765#endif
766
767static void NOINLINE retrieve_file_data(FILE *dfp)
768{
769#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
770# if ENABLE_FEATURE_WGET_TIMEOUT
771 unsigned second_cnt = G.timeout_seconds;
772# endif
773 struct pollfd polldata;
774
775 polldata.fd = fileno(dfp);
776 polldata.events = POLLIN | POLLPRI;
777#endif
778 progress_meter(PROGRESS_START);
779
780 if (G.chunked)
781 goto get_clen;
782
783 /* Loops only if chunked */
784 while (1) {
785
786#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
787 /* Must use nonblocking I/O, otherwise fread will loop
788 * and *block* until it reads full buffer,
789 * which messes up progress bar and/or timeout logic.
790 * Because of nonblocking I/O, we need to dance
791 * very carefully around EAGAIN. See explanation at
792 * clearerr() calls.
793 */
794 ndelay_on(polldata.fd);
795#endif
796 while (1) {
797 int n;
798 unsigned rdsz;
799
800#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
801 /* fread internally uses read loop, which in our case
802 * is usually exited when we get EAGAIN.
803 * In this case, libc sets error marker on the stream.
804 * Need to clear it before next fread to avoid possible
805 * rare false positive ferror below. Rare because usually
806 * fread gets more than zero bytes, and we don't fall
807 * into if (n <= 0) ...
808 */
809 clearerr(dfp);
810#endif
811 errno = 0;
812 rdsz = sizeof(G.wget_buf);
813 if (G.got_clen) {
814 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
815 if ((int)G.content_len <= 0)
816 break;
817 rdsz = (unsigned)G.content_len;
818 }
819 }
820 n = fread(G.wget_buf, 1, rdsz, dfp);
821
822 if (n > 0) {
823 xwrite(G.output_fd, G.wget_buf, n);
824#if ENABLE_FEATURE_WGET_STATUSBAR
825 G.transferred += n;
826#endif
827 if (G.got_clen) {
828 G.content_len -= n;
829 if (G.content_len == 0)
830 break;
831 }
832#if ENABLE_FEATURE_WGET_TIMEOUT
833 second_cnt = G.timeout_seconds;
834#endif
835 goto bump;
836 }
837
838 /* n <= 0.
839 * man fread:
840 * If error occurs, or EOF is reached, the return value
841 * is a short item count (or zero).
842 * fread does not distinguish between EOF and error.
843 */
844 if (errno != EAGAIN) {
845 if (ferror(dfp)) {
846 progress_meter(PROGRESS_END);
847 bb_perror_msg_and_die(bb_msg_read_error);
848 }
849 break; /* EOF, not error */
850 }
851
852#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
853 /* It was EAGAIN. There is no data. Wait up to one second
854 * then abort if timed out, or update the bar and try reading again.
855 */
856 if (safe_poll(&polldata, 1, 1000) == 0) {
857# if ENABLE_FEATURE_WGET_TIMEOUT
858 if (second_cnt != 0 && --second_cnt == 0) {
859 progress_meter(PROGRESS_END);
860 bb_error_msg_and_die("download timed out");
861 }
862# endif
863 /* We used to loop back to poll here,
864 * but there is no great harm in letting fread
865 * to try reading anyway.
866 */
867 }
868#endif
869 bump:
870 /* Need to do it _every_ second for "stalled" indicator
871 * to be shown properly.
872 */
873 progress_meter(PROGRESS_BUMP);
874 } /* while (reading data) */
875
876#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
877 clearerr(dfp);
878 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
879#endif
880 if (!G.chunked)
881 break;
882
883 fgets_and_trim(dfp); /* Eat empty line */
884 get_clen:
885 fgets_and_trim(dfp);
886 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
887 /* FIXME: error check? */
888 if (G.content_len == 0)
889 break; /* all done! */
890 G.got_clen = 1;
891 /*
892 * Note that fgets may result in some data being buffered in dfp.
893 * We loop back to fread, which will retrieve this data.
894 * Also note that code has to be arranged so that fread
895 * is done _before_ one-second poll wait - poll doesn't know
896 * about stdio buffering and can result in spurious one second waits!
897 */
898 }
899
900 /* If -c failed, we restart from the beginning,
901 * but we do not truncate file then, we do it only now, at the end.
902 * This lets user to ^C if his 99% complete 10 GB file download
903 * failed to restart *without* losing the almost complete file.
904 */
905 {
906 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
907 if (pos != (off_t)-1)
908 ftruncate(G.output_fd, pos);
909 }
910
911 /* Draw full bar and free its resources */
912 G.chunked = 0; /* makes it show 100% even for chunked download */
913 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
914 progress_meter(PROGRESS_END);
915}
916
917static void download_one_url(const char *url)
918{
919 bool use_proxy; /* Use proxies if env vars are set */
920 int redir_limit;
921 len_and_sockaddr *lsa;
922 FILE *sfp; /* socket to web/ftp server */
923 FILE *dfp; /* socket to ftp server (data) */
924 char *proxy = NULL;
925 char *fname_out_alloc;
926 char *redirected_path = NULL;
927 struct host_info server;
928 struct host_info target;
929
930 server.allocated = NULL;
931 target.allocated = NULL;
932 server.user = NULL;
933 target.user = NULL;
934
935 parse_url(url, &target);
936
937 /* Use the proxy if necessary */
938 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
939 if (use_proxy) {
940 proxy = getenv(target.protocol == P_FTP ? "ftp_proxy" : "http_proxy");
941//FIXME: what if protocol is https? Ok to use http_proxy?
942 use_proxy = (proxy && proxy[0]);
943 if (use_proxy)
944 parse_url(proxy, &server);
945 }
946 if (!use_proxy) {
947 server.port = target.port;
948 if (ENABLE_FEATURE_IPV6) {
949 //free(server.allocated); - can't be non-NULL
950 server.host = server.allocated = xstrdup(target.host);
951 } else {
952 server.host = target.host;
953 }
954 }
955
956 if (ENABLE_FEATURE_IPV6)
957 strip_ipv6_scope_id(target.host);
958
959 /* If there was no -O FILE, guess output filename */
960 fname_out_alloc = NULL;
961 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
962 G.fname_out = bb_get_last_path_component_nostrip(target.path);
963 /* handle "wget http://kernel.org//" */
964 if (G.fname_out[0] == '/' || !G.fname_out[0])
965 G.fname_out = (char*)"index.html";
966 /* -P DIR is considered only if there was no -O FILE */
967 if (G.dir_prefix)
968 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
969 else {
970 /* redirects may free target.path later, need to make a copy */
971 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
972 }
973 }
974#if ENABLE_FEATURE_WGET_STATUSBAR
975 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
976#endif
977
978 /* Determine where to start transfer */
979 G.beg_range = 0;
980 if (option_mask32 & WGET_OPT_CONTINUE) {
981 G.output_fd = open(G.fname_out, O_WRONLY);
982 if (G.output_fd >= 0) {
983 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
984 }
985 /* File doesn't exist. We do not create file here yet.
986 * We are not sure it exists on remote side */
987 }
988
989 redir_limit = 5;
990 resolve_lsa:
991 lsa = xhost2sockaddr(server.host, server.port);
992 if (!(option_mask32 & WGET_OPT_QUIET)) {
993 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
994 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
995 free(s);
996 }
997 establish_session:
998 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
999 G.got_clen = 0;
1000 G.chunked = 0;
1001 if (use_proxy || target.protocol != P_FTP) {
1002 /*
1003 * HTTP session
1004 */
1005 char *str;
1006 int status;
1007
1008 /* Open socket to http(s) server */
1009#if ENABLE_FEATURE_WGET_OPENSSL
1010 /* openssl (and maybe ssl_helper) support is configured */
1011 if (target.protocol == P_HTTPS) {
1012 /* openssl-based helper
1013 * Inconvenient API since we can't give it an open fd
1014 */
1015 int fd = spawn_https_helper_openssl(server.host, server.port);
1016# if ENABLE_FEATURE_WGET_SSL_HELPER
1017 if (fd < 0) { /* no openssl? try ssl_helper */
1018 sfp = open_socket(lsa);
1019 spawn_https_helper_small(fileno(sfp));
1020 goto socket_opened;
1021 }
1022# else
1023 /* We don't check for exec("openssl") failure in this case */
1024# endif
1025 sfp = fdopen(fd, "r+");
1026 if (!sfp)
1027 bb_perror_msg_and_die("%s", bb_msg_memory_exhausted);
1028 goto socket_opened;
1029 }
1030 sfp = open_socket(lsa);
1031 socket_opened:
1032#elif ENABLE_FEATURE_WGET_SSL_HELPER
1033 /* Only ssl_helper support is configured */
1034 sfp = open_socket(lsa);
1035 if (target.protocol == P_HTTPS)
1036 spawn_https_helper_small(fileno(sfp));
1037#else
1038 /* ssl (https) support is not configured */
1039 sfp = open_socket(lsa);
1040#endif
1041 /* Send HTTP request */
1042 if (use_proxy) {
1043 SENDFMT(sfp, "GET %s://%s/%s HTTP/1.1\r\n",
1044 target.protocol, target.host,
1045 target.path);
1046 } else {
1047 SENDFMT(sfp, "%s /%s HTTP/1.1\r\n",
1048 (option_mask32 & WGET_OPT_POST_DATA) ? "POST" : "GET",
1049 target.path);
1050 }
1051 if (!USR_HEADER_HOST)
1052 SENDFMT(sfp, "Host: %s\r\n", target.host);
1053 if (!USR_HEADER_USER_AGENT)
1054 SENDFMT(sfp, "User-Agent: %s\r\n", G.user_agent);
1055
1056 /* Ask server to close the connection as soon as we are done
1057 * (IOW: we do not intend to send more requests)
1058 */
1059 SENDFMT(sfp, "Connection: close\r\n");
1060
1061#if ENABLE_FEATURE_WGET_AUTHENTICATION
1062 if (target.user && !USR_HEADER_AUTH) {
1063 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
1064 base64enc(target.user));
1065 }
1066 if (use_proxy && server.user && !USR_HEADER_PROXY_AUTH) {
1067 SENDFMT(sfp, "Proxy-Authorization: Basic %s\r\n",
1068 base64enc(server.user));
1069 }
1070#endif
1071
1072 if (G.beg_range != 0 && !USR_HEADER_RANGE)
1073 SENDFMT(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
1074
1075#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1076 if (G.extra_headers) {
1077 log_io(G.extra_headers);
1078 fputs(G.extra_headers, sfp);
1079 }
1080
1081 if (option_mask32 & WGET_OPT_POST_DATA) {
1082 SENDFMT(sfp,
1083 "Content-Type: application/x-www-form-urlencoded\r\n"
1084 "Content-Length: %u\r\n"
1085 "\r\n"
1086 "%s",
1087 (int) strlen(G.post_data), G.post_data
1088 );
1089 } else
1090#endif
1091 {
1092 SENDFMT(sfp, "\r\n");
1093 }
1094
1095 fflush(sfp);
1096 /* If we use SSL helper, keeping our end of the socket open for writing
1097 * makes our end (i.e. the same fd!) readable (EAGAIN instead of EOF)
1098 * even after child closes its copy of the fd.
1099 * This helps:
1100 */
1101 shutdown(fileno(sfp), SHUT_WR);
1102
1103 /*
1104 * Retrieve HTTP response line and check for "200" status code.
1105 */
1106 read_response:
1107 fgets_and_trim(sfp);
1108
1109 str = G.wget_buf;
1110 str = skip_non_whitespace(str);
1111 str = skip_whitespace(str);
1112 // FIXME: no error check
1113 // xatou wouldn't work: "200 OK"
1114 status = atoi(str);
1115 switch (status) {
1116 case 0:
1117 case 100:
1118 while (gethdr(sfp) != NULL)
1119 /* eat all remaining headers */;
1120 goto read_response;
1121
1122 /* Success responses */
1123 case 200:
1124 /* fall through */
1125 case 201: /* 201 Created */
1126/* "The request has been fulfilled and resulted in a new resource being created" */
1127 /* Standard wget is reported to treat this as success */
1128 /* fall through */
1129 case 202: /* 202 Accepted */
1130/* "The request has been accepted for processing, but the processing has not been completed" */
1131 /* Treat as success: fall through */
1132 case 203: /* 203 Non-Authoritative Information */
1133/* "Use of this response code is not required and is only appropriate when the response would otherwise be 200 (OK)" */
1134 /* fall through */
1135 case 204: /* 204 No Content */
1136/*
1137Response 204 doesn't say "null file", it says "metadata
1138has changed but data didn't":
1139
1140"10.2.5 204 No Content
1141The server has fulfilled the request but does not need to return
1142an entity-body, and might want to return updated metainformation.
1143The response MAY include new or updated metainformation in the form
1144of entity-headers, which if present SHOULD be associated with
1145the requested variant.
1146
1147If the client is a user agent, it SHOULD NOT change its document
1148view from that which caused the request to be sent. This response
1149is primarily intended to allow input for actions to take place
1150without causing a change to the user agent's active document view,
1151although any new or updated metainformation SHOULD be applied
1152to the document currently in the user agent's active view.
1153
1154The 204 response MUST NOT include a message-body, and thus
1155is always terminated by the first empty line after the header fields."
1156
1157However, in real world it was observed that some web servers
1158(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
1159*/
1160 if (G.beg_range != 0) {
1161 /* "Range:..." was not honored by the server.
1162 * Restart download from the beginning.
1163 */
1164 reset_beg_range_to_zero();
1165 }
1166 break;
1167 /* 205 Reset Content ?? what to do on this ?? */
1168
1169 case 300: /* redirection */
1170 case 301:
1171 case 302:
1172 case 303:
1173 break;
1174
1175 case 206: /* Partial Content */
1176 if (G.beg_range != 0)
1177 /* "Range:..." worked. Good. */
1178 break;
1179 /* Partial Content even though we did not ask for it??? */
1180 /* fall through */
1181 default:
1182 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
1183 }
1184
1185 /*
1186 * Retrieve HTTP headers.
1187 */
1188 while ((str = gethdr(sfp)) != NULL) {
1189 static const char keywords[] ALIGN1 =
1190 "content-length\0""transfer-encoding\0""location\0";
1191 enum {
1192 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
1193 };
1194 smalluint key;
1195
1196 /* gethdr converted "FOO:" string to lowercase */
1197
1198 /* strip trailing whitespace */
1199 char *s = strchrnul(str, '\0') - 1;
1200 while (s >= str && (*s == ' ' || *s == '\t')) {
1201 *s = '\0';
1202 s--;
1203 }
1204 key = index_in_strings(keywords, G.wget_buf) + 1;
1205 if (key == KEY_content_length) {
1206 G.content_len = BB_STRTOOFF(str, NULL, 10);
1207 if (G.content_len < 0 || errno) {
1208 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
1209 }
1210 G.got_clen = 1;
1211 continue;
1212 }
1213 if (key == KEY_transfer_encoding) {
1214 if (strcmp(str_tolower(str), "chunked") != 0)
1215 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
1216 G.chunked = 1;
1217 }
1218 if (key == KEY_location && status >= 300) {
1219 if (--redir_limit == 0)
1220 bb_error_msg_and_die("too many redirections");
1221 fclose(sfp);
1222 if (str[0] == '/') {
1223 free(redirected_path);
1224 target.path = redirected_path = xstrdup(str+1);
1225 /* lsa stays the same: it's on the same server */
1226 } else {
1227 parse_url(str, &target);
1228 if (!use_proxy) {
1229 /* server.user remains untouched */
1230 free(server.allocated);
1231 server.allocated = NULL;
1232 server.host = target.host;
1233 /* strip_ipv6_scope_id(target.host); - no! */
1234 /* we assume remote never gives us IPv6 addr with scope id */
1235 server.port = target.port;
1236 free(lsa);
1237 goto resolve_lsa;
1238 } /* else: lsa stays the same: we use proxy */
1239 }
1240 goto establish_session;
1241 }
1242 }
1243// if (status >= 300)
1244// bb_error_msg_and_die("bad redirection (no Location: header from server)");
1245
1246 /* For HTTP, data is pumped over the same connection */
1247 dfp = sfp;
1248 } else {
1249 /*
1250 * FTP session
1251 */
1252 sfp = prepare_ftp_session(&dfp, &target, lsa);
1253 }
1254
1255 free(lsa);
1256
1257 if (!(option_mask32 & WGET_OPT_SPIDER)) {
1258 if (G.output_fd < 0)
1259 G.output_fd = xopen(G.fname_out, G.o_flags);
1260 retrieve_file_data(dfp);
1261 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
1262 xclose(G.output_fd);
1263 G.output_fd = -1;
1264 }
1265 }
1266
1267 if (dfp != sfp) {
1268 /* It's ftp. Close data connection properly */
1269 fclose(dfp);
1270 if (ftpcmd(NULL, NULL, sfp) != 226)
1271 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
1272 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
1273 }
1274 fclose(sfp);
1275
1276 free(server.allocated);
1277 free(target.allocated);
1278 free(server.user);
1279 free(target.user);
1280 free(fname_out_alloc);
1281 free(redirected_path);
1282}
1283
1284int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
1285int wget_main(int argc UNUSED_PARAM, char **argv)
1286{
1287#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1288 static const char wget_longopts[] ALIGN1 =
1289 /* name, has_arg, val */
1290 "continue\0" No_argument "c"
1291 "quiet\0" No_argument "q"
1292 "output-document\0" Required_argument "O"
1293 "directory-prefix\0" Required_argument "P"
1294 "proxy\0" Required_argument "Y"
1295 "user-agent\0" Required_argument "U"
1296IF_FEATURE_WGET_TIMEOUT(
1297 "timeout\0" Required_argument "T")
1298 /* Ignored: */
1299IF_DESKTOP( "tries\0" Required_argument "t")
1300 "header\0" Required_argument "\xff"
1301 "post-data\0" Required_argument "\xfe"
1302 "spider\0" No_argument "\xfd"
1303 /* Ignored (we always use PASV): */
1304IF_DESKTOP( "passive-ftp\0" No_argument "\xf0")
1305 /* Ignored (we don't do ssl) */
1306IF_DESKTOP( "no-check-certificate\0" No_argument "\xf0")
1307 /* Ignored (we don't support caching) */
1308IF_DESKTOP( "no-cache\0" No_argument "\xf0")
1309IF_DESKTOP( "no-verbose\0" No_argument "\xf0")
1310IF_DESKTOP( "no-clobber\0" No_argument "\xf0")
1311IF_DESKTOP( "no-host-directories\0" No_argument "\xf0")
1312IF_DESKTOP( "no-parent\0" No_argument "\xf0")
1313 ;
1314#endif
1315
1316#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1317 llist_t *headers_llist = NULL;
1318#endif
1319
1320 INIT_G();
1321
1322#if ENABLE_FEATURE_WGET_TIMEOUT
1323 G.timeout_seconds = 900;
1324 signal(SIGALRM, alarm_handler);
1325#endif
1326 G.proxy_flag = "on"; /* use proxies if env vars are set */
1327 G.user_agent = "Wget"; /* "User-Agent" header field */
1328
1329#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1330 applet_long_options = wget_longopts;
1331#endif
1332 opt_complementary = "-1" /* at least one URL */
1333 IF_FEATURE_WGET_LONG_OPTIONS(":\xff::"); /* --header is a list */
1334 getopt32(argv, "cqO:P:Y:U:T:+"
1335 /*ignored:*/ "t:"
1336 /*ignored:*/ "n::"
1337 /* wget has exactly four -n<letter> opts, all of which we can ignore:
1338 * -nv --no-verbose: be moderately quiet (-q is full quiet)
1339 * -nc --no-clobber: abort if exists, neither download to FILE.n nor overwrite FILE
1340 * -nH --no-host-directories: wget -r http://host/ won't create host/
1341 * -np --no-parent
1342 * "n::" above says that we accept -n[ARG].
1343 * Specifying "n:" would be a bug: "-n ARG" would eat ARG!
1344 */
1345 , &G.fname_out, &G.dir_prefix,
1346 &G.proxy_flag, &G.user_agent,
1347 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
1348 NULL, /* -t RETRIES */
1349 NULL /* -n[ARG] */
1350 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
1351 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
1352 );
1353#if 0 /* option bits debug */
1354 if (option_mask32 & WGET_OPT_RETRIES) bb_error_msg("-t NUM");
1355 if (option_mask32 & WGET_OPT_nsomething) bb_error_msg("-nsomething");
1356 if (option_mask32 & WGET_OPT_HEADER) bb_error_msg("--header");
1357 if (option_mask32 & WGET_OPT_POST_DATA) bb_error_msg("--post-data");
1358 if (option_mask32 & WGET_OPT_SPIDER) bb_error_msg("--spider");
1359 exit(0);
1360#endif
1361 argv += optind;
1362
1363#if ENABLE_FEATURE_WGET_LONG_OPTIONS
1364 if (headers_llist) {
1365 int size = 0;
1366 char *hdr;
1367 llist_t *ll = headers_llist;
1368 while (ll) {
1369 size += strlen(ll->data) + 2;
1370 ll = ll->link;
1371 }
1372 G.extra_headers = hdr = xmalloc(size + 1);
1373 while (headers_llist) {
1374 int bit;
1375 const char *words;
1376
1377 size = sprintf(hdr, "%s\r\n",
1378 (char*)llist_pop(&headers_llist));
1379 /* a bit like index_in_substrings but don't match full key */
1380 bit = 1;
1381 words = wget_user_headers;
1382 while (*words) {
1383 if (strstr(hdr, words) == hdr) {
1384 G.user_headers |= bit;
1385 break;
1386 }
1387 bit <<= 1;
1388 words += strlen(words) + 1;
1389 }
1390 hdr += size;
1391 }
1392 }
1393#endif
1394
1395 G.output_fd = -1;
1396 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1397 if (G.fname_out) { /* -O FILE ? */
1398 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1399 G.output_fd = 1;
1400 option_mask32 &= ~WGET_OPT_CONTINUE;
1401 }
1402 /* compat with wget: -O FILE can overwrite */
1403 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1404 }
1405
1406 while (*argv)
1407 download_one_url(*argv++);
1408
1409 if (G.output_fd >= 0)
1410 xclose(G.output_fd);
1411
1412#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1413 free(G.extra_headers);
1414#endif
1415 FINI_G();
1416
1417 return EXIT_SUCCESS;
1418}
1419