blob: c540485ac91524fbeaff1a423be10907d81b56b2
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * Mini unzip implementation for busybox |
4 | * |
5 | * Copyright (C) 2004 by Ed Clark |
6 | * |
7 | * Loosely based on original busybox unzip applet by Laurence Anderson. |
8 | * All options and features should work in this version. |
9 | * |
10 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
11 | */ |
12 | /* For reference see |
13 | * http://www.pkware.com/company/standards/appnote/ |
14 | * http://www.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip |
15 | * |
16 | * TODO |
17 | * Zip64 + other methods |
18 | */ |
19 | |
20 | //config:config UNZIP |
21 | //config: bool "unzip" |
22 | //config: default y |
23 | //config: help |
24 | //config: unzip will list or extract files from a ZIP archive, |
25 | //config: commonly found on DOS/WIN systems. The default behavior |
26 | //config: (with no options) is to extract the archive into the |
27 | //config: current directory. Use the `-d' option to extract to a |
28 | //config: directory of your choice. |
29 | |
30 | //applet:IF_UNZIP(APPLET(unzip, BB_DIR_USR_BIN, BB_SUID_DROP)) |
31 | //kbuild:lib-$(CONFIG_UNZIP) += unzip.o |
32 | |
33 | //usage:#define unzip_trivial_usage |
34 | //usage: "[-lnopq] FILE[.zip] [FILE]... [-x FILE...] [-d DIR]" |
35 | //usage:#define unzip_full_usage "\n\n" |
36 | //usage: "Extract FILEs from ZIP archive\n" |
37 | //usage: "\n -l List contents (with -q for short form)" |
38 | //usage: "\n -n Never overwrite files (default: ask)" |
39 | //usage: "\n -o Overwrite" |
40 | //usage: "\n -p Print to stdout" |
41 | //usage: "\n -q Quiet" |
42 | //usage: "\n -x FILE Exclude FILEs" |
43 | //usage: "\n -d DIR Extract into DIR" |
44 | |
45 | #include "libbb.h" |
46 | #include "bb_archive.h" |
47 | |
48 | #if 0 |
49 | # define dbg(...) bb_error_msg(__VA_ARGS__) |
50 | #else |
51 | # define dbg(...) ((void)0) |
52 | #endif |
53 | |
54 | enum { |
55 | #if BB_BIG_ENDIAN |
56 | ZIP_FILEHEADER_MAGIC = 0x504b0304, |
57 | ZIP_CDF_MAGIC = 0x504b0102, /* central directory's file header */ |
58 | ZIP_CDE_MAGIC = 0x504b0506, /* "end of central directory" record */ |
59 | ZIP_DD_MAGIC = 0x504b0708, |
60 | #else |
61 | ZIP_FILEHEADER_MAGIC = 0x04034b50, |
62 | ZIP_CDF_MAGIC = 0x02014b50, |
63 | ZIP_CDE_MAGIC = 0x06054b50, |
64 | ZIP_DD_MAGIC = 0x08074b50, |
65 | #endif |
66 | }; |
67 | |
68 | #define ZIP_HEADER_LEN 26 |
69 | |
70 | typedef union { |
71 | uint8_t raw[ZIP_HEADER_LEN]; |
72 | struct { |
73 | uint16_t version; /* 0-1 */ |
74 | uint16_t zip_flags; /* 2-3 */ |
75 | uint16_t method; /* 4-5 */ |
76 | uint16_t modtime; /* 6-7 */ |
77 | uint16_t moddate; /* 8-9 */ |
78 | uint32_t crc32 PACKED; /* 10-13 */ |
79 | uint32_t cmpsize PACKED; /* 14-17 */ |
80 | uint32_t ucmpsize PACKED; /* 18-21 */ |
81 | uint16_t filename_len; /* 22-23 */ |
82 | uint16_t extra_len; /* 24-25 */ |
83 | } formatted PACKED; |
84 | } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */ |
85 | |
86 | /* Check the offset of the last element, not the length. This leniency |
87 | * allows for poor packing, whereby the overall struct may be too long, |
88 | * even though the elements are all in the right place. |
89 | */ |
90 | struct BUG_zip_header_must_be_26_bytes { |
91 | char BUG_zip_header_must_be_26_bytes[ |
92 | offsetof(zip_header_t, formatted.extra_len) + 2 |
93 | == ZIP_HEADER_LEN ? 1 : -1]; |
94 | }; |
95 | |
96 | #define FIX_ENDIANNESS_ZIP(zip_header) do { \ |
97 | (zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \ |
98 | (zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \ |
99 | (zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \ |
100 | (zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \ |
101 | (zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \ |
102 | (zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \ |
103 | (zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \ |
104 | (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \ |
105 | (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \ |
106 | } while (0) |
107 | |
108 | #define CDF_HEADER_LEN 42 |
109 | |
110 | typedef union { |
111 | uint8_t raw[CDF_HEADER_LEN]; |
112 | struct { |
113 | /* uint32_t signature; 50 4b 01 02 */ |
114 | uint16_t version_made_by; /* 0-1 */ |
115 | uint16_t version_needed; /* 2-3 */ |
116 | uint16_t cdf_flags; /* 4-5 */ |
117 | uint16_t method; /* 6-7 */ |
118 | uint16_t mtime; /* 8-9 */ |
119 | uint16_t mdate; /* 10-11 */ |
120 | uint32_t crc32; /* 12-15 */ |
121 | uint32_t cmpsize; /* 16-19 */ |
122 | uint32_t ucmpsize; /* 20-23 */ |
123 | uint16_t file_name_length; /* 24-25 */ |
124 | uint16_t extra_field_length; /* 26-27 */ |
125 | uint16_t file_comment_length; /* 28-29 */ |
126 | uint16_t disk_number_start; /* 30-31 */ |
127 | uint16_t internal_file_attributes; /* 32-33 */ |
128 | uint32_t external_file_attributes PACKED; /* 34-37 */ |
129 | uint32_t relative_offset_of_local_header PACKED; /* 38-41 */ |
130 | } formatted PACKED; |
131 | } cdf_header_t; |
132 | |
133 | struct BUG_cdf_header_must_be_42_bytes { |
134 | char BUG_cdf_header_must_be_42_bytes[ |
135 | offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4 |
136 | == CDF_HEADER_LEN ? 1 : -1]; |
137 | }; |
138 | |
139 | #define FIX_ENDIANNESS_CDF(cdf_header) do { \ |
140 | (cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \ |
141 | (cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \ |
142 | (cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \ |
143 | (cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \ |
144 | (cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \ |
145 | (cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \ |
146 | IF_DESKTOP( \ |
147 | (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \ |
148 | (cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \ |
149 | ) \ |
150 | } while (0) |
151 | |
152 | #define CDE_HEADER_LEN 16 |
153 | |
154 | typedef union { |
155 | uint8_t raw[CDE_HEADER_LEN]; |
156 | struct { |
157 | /* uint32_t signature; 50 4b 05 06 */ |
158 | uint16_t this_disk_no; |
159 | uint16_t disk_with_cdf_no; |
160 | uint16_t cdf_entries_on_this_disk; |
161 | uint16_t cdf_entries_total; |
162 | uint32_t cdf_size; |
163 | uint32_t cdf_offset; |
164 | /* uint16_t file_comment_length; */ |
165 | /* .ZIP file comment (variable size) */ |
166 | } formatted PACKED; |
167 | } cde_header_t; |
168 | |
169 | struct BUG_cde_header_must_be_16_bytes { |
170 | char BUG_cde_header_must_be_16_bytes[ |
171 | sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1]; |
172 | }; |
173 | |
174 | #define FIX_ENDIANNESS_CDE(cde_header) do { \ |
175 | (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \ |
176 | } while (0) |
177 | |
178 | enum { zip_fd = 3 }; |
179 | |
180 | |
181 | #if ENABLE_DESKTOP |
182 | |
183 | /* Seen in the wild: |
184 | * Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive, |
185 | * where CDE was nearly 48 kbytes before EOF. |
186 | * (Surprisingly, it also apparently has *another* CDE structure |
187 | * closer to the end, with bogus cdf_offset). |
188 | * To make extraction work, bumped PEEK_FROM_END from 16k to 64k. |
189 | */ |
190 | #define PEEK_FROM_END (64*1024) |
191 | |
192 | /* This value means that we failed to find CDF */ |
193 | #define BAD_CDF_OFFSET ((uint32_t)0xffffffff) |
194 | |
195 | /* NB: does not preserve file position! */ |
196 | static uint32_t find_cdf_offset(void) |
197 | { |
198 | cde_header_t cde_header; |
199 | unsigned char *p; |
200 | off_t end; |
201 | unsigned char *buf = xzalloc(PEEK_FROM_END); |
202 | uint32_t found; |
203 | |
204 | end = xlseek(zip_fd, 0, SEEK_END); |
205 | end -= PEEK_FROM_END; |
206 | if (end < 0) |
207 | end = 0; |
208 | dbg("Looking for cdf_offset starting from 0x%"OFF_FMT"x", end); |
209 | xlseek(zip_fd, end, SEEK_SET); |
210 | full_read(zip_fd, buf, PEEK_FROM_END); |
211 | |
212 | found = BAD_CDF_OFFSET; |
213 | p = buf; |
214 | while (p <= buf + PEEK_FROM_END - CDE_HEADER_LEN - 4) { |
215 | if (*p != 'P') { |
216 | p++; |
217 | continue; |
218 | } |
219 | if (*++p != 'K') |
220 | continue; |
221 | if (*++p != 5) |
222 | continue; |
223 | if (*++p != 6) |
224 | continue; |
225 | /* we found CDE! */ |
226 | memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN); |
227 | FIX_ENDIANNESS_CDE(cde_header); |
228 | /* |
229 | * I've seen .ZIP files with seemingly valid CDEs |
230 | * where cdf_offset points past EOF - ?? |
231 | * This check ignores such CDEs: |
232 | */ |
233 | if (cde_header.formatted.cdf_offset < end + (p - buf)) { |
234 | found = cde_header.formatted.cdf_offset; |
235 | dbg("Possible cdf_offset:0x%x at 0x%"OFF_FMT"x", |
236 | (unsigned)found, end + (p-3 - buf)); |
237 | dbg(" cdf_offset+cdf_size:0x%x", |
238 | (unsigned)(found + SWAP_LE32(cde_header.formatted.cdf_size))); |
239 | /* |
240 | * We do not "break" here because only the last CDE is valid. |
241 | * I've seen a .zip archive which contained a .zip file, |
242 | * uncompressed, and taking the first CDE was using |
243 | * the CDE inside that file! |
244 | */ |
245 | } |
246 | } |
247 | free(buf); |
248 | dbg("Found cdf_offset:0x%x", (unsigned)found); |
249 | return found; |
250 | }; |
251 | |
252 | static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr) |
253 | { |
254 | off_t org; |
255 | |
256 | org = xlseek(zip_fd, 0, SEEK_CUR); |
257 | |
258 | if (!cdf_offset) |
259 | cdf_offset = find_cdf_offset(); |
260 | |
261 | if (cdf_offset != BAD_CDF_OFFSET) { |
262 | dbg("Reading CDF at 0x%x", (unsigned)cdf_offset); |
263 | xlseek(zip_fd, cdf_offset + 4, SEEK_SET); |
264 | xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN); |
265 | FIX_ENDIANNESS_CDF(*cdf_ptr); |
266 | dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u", |
267 | (unsigned)cdf_ptr->formatted.file_name_length, |
268 | (unsigned)cdf_ptr->formatted.extra_field_length, |
269 | (unsigned)cdf_ptr->formatted.file_comment_length |
270 | ); |
271 | cdf_offset += 4 + CDF_HEADER_LEN |
272 | + cdf_ptr->formatted.file_name_length |
273 | + cdf_ptr->formatted.extra_field_length |
274 | + cdf_ptr->formatted.file_comment_length; |
275 | } |
276 | |
277 | dbg("Returning file position to 0x%"OFF_FMT"x", org); |
278 | xlseek(zip_fd, org, SEEK_SET); |
279 | return cdf_offset; |
280 | }; |
281 | #endif |
282 | |
283 | static void unzip_skip(off_t skip) |
284 | { |
285 | if (skip != 0) |
286 | if (lseek(zip_fd, skip, SEEK_CUR) == (off_t)-1) |
287 | bb_copyfd_exact_size(zip_fd, -1, skip); |
288 | } |
289 | |
290 | static void unzip_create_leading_dirs(const char *fn) |
291 | { |
292 | /* Create all leading directories */ |
293 | char *name = xstrdup(fn); |
294 | if (bb_make_directory(dirname(name), 0777, FILEUTILS_RECUR)) { |
295 | xfunc_die(); /* bb_make_directory is noisy */ |
296 | } |
297 | free(name); |
298 | } |
299 | |
300 | static void unzip_extract(zip_header_t *zip_header, int dst_fd) |
301 | { |
302 | if (zip_header->formatted.method == 0) { |
303 | /* Method 0 - stored (not compressed) */ |
304 | off_t size = zip_header->formatted.ucmpsize; |
305 | if (size) |
306 | bb_copyfd_exact_size(zip_fd, dst_fd, size); |
307 | } else { |
308 | /* Method 8 - inflate */ |
309 | transformer_state_t xstate; |
310 | init_transformer_state(&xstate); |
311 | xstate.bytes_in = zip_header->formatted.cmpsize; |
312 | xstate.src_fd = zip_fd; |
313 | xstate.dst_fd = dst_fd; |
314 | if (inflate_unzip(&xstate) < 0) |
315 | bb_error_msg_and_die("inflate error"); |
316 | /* Validate decompression - crc */ |
317 | if (zip_header->formatted.crc32 != (xstate.crc32 ^ 0xffffffffL)) { |
318 | bb_error_msg_and_die("crc error"); |
319 | } |
320 | /* Validate decompression - size */ |
321 | if (zip_header->formatted.ucmpsize != xstate.bytes_out) { |
322 | /* Don't die. Who knows, maybe len calculation |
323 | * was botched somewhere. After all, crc matched! */ |
324 | bb_error_msg("bad length"); |
325 | } |
326 | } |
327 | } |
328 | |
329 | static void my_fgets80(char *buf80) |
330 | { |
331 | fflush_all(); |
332 | if (!fgets(buf80, 80, stdin)) { |
333 | bb_perror_msg_and_die("can't read standard input"); |
334 | } |
335 | } |
336 | |
337 | int unzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
338 | int unzip_main(int argc, char **argv) |
339 | { |
340 | enum { O_PROMPT, O_NEVER, O_ALWAYS }; |
341 | |
342 | zip_header_t zip_header; |
343 | smallint quiet = 0; |
344 | IF_NOT_DESKTOP(const) smallint verbose = 0; |
345 | smallint listing = 0; |
346 | smallint overwrite = O_PROMPT; |
347 | smallint x_opt_seen; |
348 | #if ENABLE_DESKTOP |
349 | uint32_t cdf_offset; |
350 | #endif |
351 | unsigned long total_usize; |
352 | unsigned long total_size; |
353 | unsigned total_entries; |
354 | int dst_fd = -1; |
355 | char *src_fn = NULL; |
356 | char *dst_fn = NULL; |
357 | llist_t *zaccept = NULL; |
358 | llist_t *zreject = NULL; |
359 | char *base_dir = NULL; |
360 | int i, opt; |
361 | char key_buf[80]; /* must match size used by my_fgets80 */ |
362 | struct stat stat_buf; |
363 | |
364 | /* -q, -l and -v: UnZip 5.52 of 28 February 2005, by Info-ZIP: |
365 | * |
366 | * # /usr/bin/unzip -qq -v decompress_unlzma.i.zip |
367 | * 204372 Defl:N 35278 83% 09-06-09 14:23 0d056252 decompress_unlzma.i |
368 | * # /usr/bin/unzip -q -v decompress_unlzma.i.zip |
369 | * Length Method Size Ratio Date Time CRC-32 Name |
370 | * -------- ------ ------- ----- ---- ---- ------ ---- |
371 | * 204372 Defl:N 35278 83% 09-06-09 14:23 0d056252 decompress_unlzma.i |
372 | * -------- ------- --- ------- |
373 | * 204372 35278 83% 1 file |
374 | * # /usr/bin/unzip -v decompress_unlzma.i.zip |
375 | * Archive: decompress_unlzma.i.zip |
376 | * Length Method Size Ratio Date Time CRC-32 Name |
377 | * -------- ------ ------- ----- ---- ---- ------ ---- |
378 | * 204372 Defl:N 35278 83% 09-06-09 14:23 0d056252 decompress_unlzma.i |
379 | * -------- ------- --- ------- |
380 | * 204372 35278 83% 1 file |
381 | * # unzip -v decompress_unlzma.i.zip |
382 | * Archive: decompress_unlzma.i.zip |
383 | * Length Date Time Name |
384 | * -------- ---- ---- ---- |
385 | * 204372 09-06-09 14:23 decompress_unlzma.i |
386 | * -------- ------- |
387 | * 204372 1 files |
388 | * # /usr/bin/unzip -l -qq decompress_unlzma.i.zip |
389 | * 204372 09-06-09 14:23 decompress_unlzma.i |
390 | * # /usr/bin/unzip -l -q decompress_unlzma.i.zip |
391 | * Length Date Time Name |
392 | * -------- ---- ---- ---- |
393 | * 204372 09-06-09 14:23 decompress_unlzma.i |
394 | * -------- ------- |
395 | * 204372 1 file |
396 | * # /usr/bin/unzip -l decompress_unlzma.i.zip |
397 | * Archive: decompress_unlzma.i.zip |
398 | * Length Date Time Name |
399 | * -------- ---- ---- ---- |
400 | * 204372 09-06-09 14:23 decompress_unlzma.i |
401 | * -------- ------- |
402 | * 204372 1 file |
403 | */ |
404 | |
405 | x_opt_seen = 0; |
406 | /* '-' makes getopt return 1 for non-options */ |
407 | while ((opt = getopt(argc, argv, "-d:lnopqxv")) != -1) { |
408 | switch (opt) { |
409 | case 'd': /* Extract to base directory */ |
410 | base_dir = optarg; |
411 | break; |
412 | |
413 | case 'l': /* List */ |
414 | listing = 1; |
415 | break; |
416 | |
417 | case 'n': /* Never overwrite existing files */ |
418 | overwrite = O_NEVER; |
419 | break; |
420 | |
421 | case 'o': /* Always overwrite existing files */ |
422 | overwrite = O_ALWAYS; |
423 | break; |
424 | |
425 | case 'p': /* Extract files to stdout and fall through to set verbosity */ |
426 | dst_fd = STDOUT_FILENO; |
427 | |
428 | case 'q': /* Be quiet */ |
429 | quiet++; |
430 | break; |
431 | |
432 | case 'v': /* Verbose list */ |
433 | IF_DESKTOP(verbose++;) |
434 | listing = 1; |
435 | break; |
436 | |
437 | case 'x': |
438 | x_opt_seen = 1; |
439 | break; |
440 | |
441 | case 1: |
442 | if (!src_fn) { |
443 | /* The zip file */ |
444 | /* +5: space for ".zip" and NUL */ |
445 | src_fn = xmalloc(strlen(optarg) + 5); |
446 | strcpy(src_fn, optarg); |
447 | } else if (!x_opt_seen) { |
448 | /* Include files */ |
449 | llist_add_to(&zaccept, optarg); |
450 | } else { |
451 | /* Exclude files */ |
452 | llist_add_to(&zreject, optarg); |
453 | } |
454 | break; |
455 | |
456 | default: |
457 | bb_show_usage(); |
458 | } |
459 | } |
460 | |
461 | #ifndef __GLIBC__ |
462 | /* |
463 | * This code is needed for non-GNU getopt |
464 | * which doesn't understand "-" in option string. |
465 | * The -x option won't work properly in this case: |
466 | * "unzip a.zip q -x w e" will be interpreted as |
467 | * "unzip a.zip q w e -x" = "unzip a.zip q w e" |
468 | */ |
469 | argv += optind; |
470 | if (argv[0]) { |
471 | /* +5: space for ".zip" and NUL */ |
472 | src_fn = xmalloc(strlen(argv[0]) + 5); |
473 | strcpy(src_fn, argv[0]); |
474 | while (*++argv) |
475 | llist_add_to(&zaccept, *argv); |
476 | } |
477 | #endif |
478 | |
479 | if (!src_fn) { |
480 | bb_show_usage(); |
481 | } |
482 | |
483 | /* Open input file */ |
484 | if (LONE_DASH(src_fn)) { |
485 | xdup2(STDIN_FILENO, zip_fd); |
486 | /* Cannot use prompt mode since zip data is arriving on STDIN */ |
487 | if (overwrite == O_PROMPT) |
488 | overwrite = O_NEVER; |
489 | } else { |
490 | static const char extn[][5] ALIGN1 = { ".zip", ".ZIP" }; |
491 | char *ext = src_fn + strlen(src_fn); |
492 | int src_fd; |
493 | |
494 | i = 0; |
495 | for (;;) { |
496 | src_fd = open(src_fn, O_RDONLY); |
497 | if (src_fd >= 0) |
498 | break; |
499 | if (++i > 2) { |
500 | *ext = '\0'; |
501 | bb_error_msg_and_die("can't open %s[.zip]", src_fn); |
502 | } |
503 | strcpy(ext, extn[i - 1]); |
504 | } |
505 | xmove_fd(src_fd, zip_fd); |
506 | } |
507 | |
508 | /* Change dir if necessary */ |
509 | if (base_dir) |
510 | xchdir(base_dir); |
511 | |
512 | if (quiet <= 1) { /* not -qq */ |
513 | if (quiet == 0) |
514 | printf("Archive: %s\n", src_fn); |
515 | if (listing) { |
516 | puts(verbose ? |
517 | " Length Method Size Cmpr Date Time CRC-32 Name\n" |
518 | "-------- ------ ------- ---- ---------- ----- -------- ----" |
519 | : |
520 | " Length Date Time Name\n" |
521 | "--------- ---------- ----- ----" |
522 | ); |
523 | } |
524 | } |
525 | |
526 | /* Example of an archive with one 0-byte long file named 'z' |
527 | * created by Zip 2.31 on Unix: |
528 | * 0000 [50 4b]03 04 0a 00 00 00 00 00 42 1a b8 3c 00 00 |PK........B..<..| |
529 | * sig........ vneed flags compr mtime mdate crc32> |
530 | * 0010 00 00 00 00 00 00 00 00 00 00 01 00 15 00 7a 55 |..............zU| |
531 | * >..... csize...... usize...... fnlen exlen fn ex> |
532 | * 0020 54 09 00 03 cc d3 f9 4b cc d3 f9 4b 55 78 04 00 |T......K...KUx..| |
533 | * >tra_field...................................... |
534 | * 0030 00 00 00 00[50 4b]01 02 17 03 0a 00 00 00 00 00 |....PK..........| |
535 | * ........... sig........ vmade vneed flags compr |
536 | * 0040 42 1a b8 3c 00 00 00 00 00 00 00 00 00 00 00 00 |B..<............| |
537 | * mtime mdate crc32...... csize...... usize...... |
538 | * 0050 01 00 0d 00 00 00 00 00 00 00 00 00 a4 81 00 00 |................| |
539 | * fnlen exlen clen. dnum. iattr eattr...... relofs> (eattr = rw-r--r--) |
540 | * 0060 00 00 7a 55 54 05 00 03 cc d3 f9 4b 55 78 00 00 |..zUT......KUx..| |
541 | * >..... fn extra_field........................... |
542 | * 0070 [50 4b]05 06 00 00 00 00 01 00 01 00 3c 00 00 00 |PK..........<...| |
543 | * 0080 34 00 00 00 00 00 |4.....| |
544 | */ |
545 | total_usize = 0; |
546 | total_size = 0; |
547 | total_entries = 0; |
548 | #if ENABLE_DESKTOP |
549 | cdf_offset = 0; |
550 | #endif |
551 | while (1) { |
552 | uint32_t magic; |
553 | mode_t dir_mode = 0777; |
554 | #if ENABLE_DESKTOP |
555 | mode_t file_mode = 0666; |
556 | #endif |
557 | |
558 | /* Check magic number */ |
559 | xread(zip_fd, &magic, 4); |
560 | /* Central directory? It's at the end, so exit */ |
561 | if (magic == ZIP_CDF_MAGIC) { |
562 | dbg("got ZIP_CDF_MAGIC"); |
563 | break; |
564 | } |
565 | #if ENABLE_DESKTOP |
566 | /* Data descriptor? It was a streaming file, go on */ |
567 | if (magic == ZIP_DD_MAGIC) { |
568 | dbg("got ZIP_DD_MAGIC"); |
569 | /* skip over duplicate crc32, cmpsize and ucmpsize */ |
570 | unzip_skip(3 * 4); |
571 | continue; |
572 | } |
573 | #endif |
574 | if (magic != ZIP_FILEHEADER_MAGIC) |
575 | bb_error_msg_and_die("invalid zip magic %08X", (int)magic); |
576 | dbg("got ZIP_FILEHEADER_MAGIC"); |
577 | |
578 | /* Read the file header */ |
579 | xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); |
580 | FIX_ENDIANNESS_ZIP(zip_header); |
581 | if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) { |
582 | bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); |
583 | } |
584 | #if !ENABLE_DESKTOP |
585 | if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) { |
586 | bb_error_msg_and_die("zip flags 1 and 8 are not supported"); |
587 | } |
588 | #else |
589 | if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) { |
590 | /* 0x0001 - encrypted */ |
591 | bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); |
592 | } |
593 | |
594 | if (cdf_offset != BAD_CDF_OFFSET) { |
595 | cdf_header_t cdf_header; |
596 | cdf_offset = read_next_cdf(cdf_offset, &cdf_header); |
597 | /* |
598 | * Note: cdf_offset can become BAD_CDF_OFFSET after the above call. |
599 | */ |
600 | if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) { |
601 | /* 0x0008 - streaming. [u]cmpsize can be reliably gotten |
602 | * only from Central Directory. See unzip_doc.txt |
603 | */ |
604 | zip_header.formatted.crc32 = cdf_header.formatted.crc32; |
605 | zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize; |
606 | zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize; |
607 | } |
608 | if ((cdf_header.formatted.version_made_by >> 8) == 3) { |
609 | /* This archive is created on Unix */ |
610 | dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16); |
611 | } |
612 | } |
613 | if (cdf_offset == BAD_CDF_OFFSET |
614 | && (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) |
615 | ) { |
616 | /* If it's a streaming zip, we _require_ CDF */ |
617 | bb_error_msg_and_die("can't find file table"); |
618 | } |
619 | #endif |
620 | dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x", |
621 | (unsigned)zip_header.formatted.cmpsize, |
622 | (unsigned)zip_header.formatted.extra_len, |
623 | (unsigned)zip_header.formatted.ucmpsize |
624 | ); |
625 | |
626 | /* Read filename */ |
627 | free(dst_fn); |
628 | dst_fn = xzalloc(zip_header.formatted.filename_len + 1); |
629 | xread(zip_fd, dst_fn, zip_header.formatted.filename_len); |
630 | |
631 | /* Skip extra header bytes */ |
632 | unzip_skip(zip_header.formatted.extra_len); |
633 | |
634 | /* Guard against "/abspath", "/../" and similar attacks */ |
635 | overlapping_strcpy(dst_fn, strip_unsafe_prefix(dst_fn)); |
636 | |
637 | /* Filter zip entries */ |
638 | if (find_list_entry(zreject, dst_fn) |
639 | || (zaccept && !find_list_entry(zaccept, dst_fn)) |
640 | ) { /* Skip entry */ |
641 | i = 'n'; |
642 | } else { |
643 | if (listing) { |
644 | /* List entry */ |
645 | char dtbuf[sizeof("mm-dd-yyyy hh:mm")]; |
646 | sprintf(dtbuf, "%02u-%02u-%04u %02u:%02u", |
647 | (zip_header.formatted.moddate >> 5) & 0xf, // mm: 0x01e0 |
648 | (zip_header.formatted.moddate) & 0x1f, // dd: 0x001f |
649 | (zip_header.formatted.moddate >> 9) + 1980, // yy: 0xfe00 |
650 | (zip_header.formatted.modtime >> 11), // hh: 0xf800 |
651 | (zip_header.formatted.modtime >> 5) & 0x3f // mm: 0x07e0 |
652 | // seconds/2 are not shown, encoded in ----------- 0x001f |
653 | ); |
654 | if (!verbose) { |
655 | // " Length Date Time Name\n" |
656 | // "--------- ---------- ----- ----" |
657 | printf( "%9u " "%s " "%s\n", |
658 | (unsigned)zip_header.formatted.ucmpsize, |
659 | dtbuf, |
660 | dst_fn); |
661 | } else { |
662 | unsigned long percents = zip_header.formatted.ucmpsize - zip_header.formatted.cmpsize; |
663 | if ((int32_t)percents < 0) |
664 | percents = 0; /* happens if ucmpsize < cmpsize */ |
665 | percents = percents * 100; |
666 | if (zip_header.formatted.ucmpsize) |
667 | percents /= zip_header.formatted.ucmpsize; |
668 | // " Length Method Size Cmpr Date Time CRC-32 Name\n" |
669 | // "-------- ------ ------- ---- ---------- ----- -------- ----" |
670 | printf( "%8u %s" "%9u%4u%% " "%s " "%08x " "%s\n", |
671 | (unsigned)zip_header.formatted.ucmpsize, |
672 | zip_header.formatted.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */ |
673 | /* TODO: show other methods? |
674 | * 1 - Shrunk |
675 | * 2 - Reduced with compression factor 1 |
676 | * 3 - Reduced with compression factor 2 |
677 | * 4 - Reduced with compression factor 3 |
678 | * 5 - Reduced with compression factor 4 |
679 | * 6 - Imploded |
680 | * 7 - Reserved for Tokenizing compression algorithm |
681 | * 9 - Deflate64 |
682 | * 10 - PKWARE Data Compression Library Imploding |
683 | * 11 - Reserved by PKWARE |
684 | * 12 - BZIP2 |
685 | */ |
686 | (unsigned)zip_header.formatted.cmpsize, |
687 | (unsigned)percents, |
688 | dtbuf, |
689 | zip_header.formatted.crc32, |
690 | dst_fn); |
691 | total_size += zip_header.formatted.cmpsize; |
692 | } |
693 | total_usize += zip_header.formatted.ucmpsize; |
694 | i = 'n'; |
695 | } else if (dst_fd == STDOUT_FILENO) { |
696 | /* Extracting to STDOUT */ |
697 | i = -1; |
698 | } else if (last_char_is(dst_fn, '/')) { |
699 | /* Extract directory */ |
700 | if (stat(dst_fn, &stat_buf) == -1) { |
701 | if (errno != ENOENT) { |
702 | bb_perror_msg_and_die("can't stat '%s'", dst_fn); |
703 | } |
704 | if (!quiet) { |
705 | printf(" creating: %s\n", dst_fn); |
706 | } |
707 | unzip_create_leading_dirs(dst_fn); |
708 | if (bb_make_directory(dst_fn, dir_mode, FILEUTILS_IGNORE_CHMOD_ERR)) { |
709 | xfunc_die(); |
710 | } |
711 | } else { |
712 | if (!S_ISDIR(stat_buf.st_mode)) { |
713 | bb_error_msg_and_die("'%s' exists but is not a %s", |
714 | dst_fn, "directory"); |
715 | } |
716 | } |
717 | i = 'n'; |
718 | } else { |
719 | /* Extract file */ |
720 | check_file: |
721 | if (stat(dst_fn, &stat_buf) == -1) { |
722 | /* File does not exist */ |
723 | if (errno != ENOENT) { |
724 | bb_perror_msg_and_die("can't stat '%s'", dst_fn); |
725 | } |
726 | i = 'y'; |
727 | } else { |
728 | /* File already exists */ |
729 | if (overwrite == O_NEVER) { |
730 | i = 'n'; |
731 | } else if (S_ISREG(stat_buf.st_mode)) { |
732 | /* File is regular file */ |
733 | if (overwrite == O_ALWAYS) { |
734 | i = 'y'; |
735 | } else { |
736 | printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn); |
737 | my_fgets80(key_buf); |
738 | i = key_buf[0]; |
739 | } |
740 | } else { |
741 | /* File is not regular file */ |
742 | bb_error_msg_and_die("'%s' exists but is not a %s", |
743 | dst_fn, "regular file"); |
744 | } |
745 | } |
746 | } |
747 | } |
748 | |
749 | switch (i) { |
750 | case 'A': |
751 | overwrite = O_ALWAYS; |
752 | case 'y': /* Open file and fall into unzip */ |
753 | unzip_create_leading_dirs(dst_fn); |
754 | #if ENABLE_DESKTOP |
755 | dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode); |
756 | #else |
757 | dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC); |
758 | #endif |
759 | case -1: /* Unzip */ |
760 | if (!quiet) { |
761 | printf(" inflating: %s\n", dst_fn); |
762 | } |
763 | unzip_extract(&zip_header, dst_fd); |
764 | if (dst_fd != STDOUT_FILENO) { |
765 | /* closing STDOUT is potentially bad for future business */ |
766 | close(dst_fd); |
767 | } |
768 | break; |
769 | |
770 | case 'N': |
771 | overwrite = O_NEVER; |
772 | case 'n': |
773 | /* Skip entry data */ |
774 | unzip_skip(zip_header.formatted.cmpsize); |
775 | break; |
776 | |
777 | case 'r': |
778 | /* Prompt for new name */ |
779 | printf("new name: "); |
780 | my_fgets80(key_buf); |
781 | free(dst_fn); |
782 | dst_fn = xstrdup(key_buf); |
783 | chomp(dst_fn); |
784 | goto check_file; |
785 | |
786 | default: |
787 | printf("error: invalid response [%c]\n", (char)i); |
788 | goto check_file; |
789 | } |
790 | |
791 | total_entries++; |
792 | } |
793 | |
794 | if (listing && quiet <= 1) { |
795 | if (!verbose) { |
796 | // " Length Date Time Name\n" |
797 | // "--------- ---------- ----- ----" |
798 | printf( " --------%21s" "-------\n" |
799 | "%9lu%21s" "%u files\n", |
800 | "", |
801 | total_usize, "", total_entries); |
802 | } else { |
803 | unsigned long percents = total_usize - total_size; |
804 | if ((long)percents < 0) |
805 | percents = 0; /* happens if usize < size */ |
806 | percents = percents * 100; |
807 | if (total_usize) |
808 | percents /= total_usize; |
809 | // " Length Method Size Cmpr Date Time CRC-32 Name\n" |
810 | // "-------- ------ ------- ---- ---------- ----- -------- ----" |
811 | printf( "-------- ------- ----%28s" "----\n" |
812 | "%8lu" "%17lu%4u%%%28s" "%u files\n", |
813 | "", |
814 | total_usize, total_size, (unsigned)percents, "", |
815 | total_entries); |
816 | } |
817 | } |
818 | |
819 | return 0; |
820 | } |
821 |