blob: e97bc0130b595b244c78a555411e13881b1a61ee
1 | /* vi: set sw=4 ts=4: */ |
2 | /* |
3 | * reformime: parse MIME-encoded message |
4 | * |
5 | * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com> |
6 | * |
7 | * Licensed under GPLv2, see file LICENSE in this source tree. |
8 | */ |
9 | //config:config REFORMIME |
10 | //config: bool "reformime" |
11 | //config: default y |
12 | //config: help |
13 | //config: Parse MIME-formatted messages. |
14 | //config: |
15 | //config:config FEATURE_REFORMIME_COMPAT |
16 | //config: bool "Accept and ignore options other than -x and -X" |
17 | //config: default y |
18 | //config: depends on REFORMIME |
19 | //config: help |
20 | //config: Accept (for compatibility only) and ignore options |
21 | //config: other than -x and -X. |
22 | |
23 | //applet:IF_REFORMIME(APPLET(reformime, BB_DIR_BIN, BB_SUID_DROP)) |
24 | |
25 | //kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o |
26 | |
27 | #include "libbb.h" |
28 | #include "mail.h" |
29 | |
30 | #if 0 |
31 | # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__) |
32 | #else |
33 | # define dbg_error_msg(...) ((void)0) |
34 | #endif |
35 | |
36 | static const char *find_token(const char *const string_array[], const char *key, const char *defvalue) |
37 | { |
38 | const char *r = NULL; |
39 | int i; |
40 | for (i = 0; string_array[i] != NULL; i++) { |
41 | if (strcasecmp(string_array[i], key) == 0) { |
42 | r = (char *)string_array[i+1]; |
43 | break; |
44 | } |
45 | } |
46 | return (r) ? r : defvalue; |
47 | } |
48 | |
49 | static const char *xfind_token(const char *const string_array[], const char *key) |
50 | { |
51 | const char *r = find_token(string_array, key, NULL); |
52 | if (r) |
53 | return r; |
54 | bb_error_msg_and_die("not found: '%s'", key); |
55 | } |
56 | |
57 | enum { |
58 | OPT_x = 1 << 0, |
59 | OPT_X = 1 << 1, |
60 | #if ENABLE_FEATURE_REFORMIME_COMPAT |
61 | OPT_d = 1 << 2, |
62 | OPT_e = 1 << 3, |
63 | OPT_i = 1 << 4, |
64 | OPT_s = 1 << 5, |
65 | OPT_r = 1 << 6, |
66 | OPT_c = 1 << 7, |
67 | OPT_m = 1 << 8, |
68 | OPT_h = 1 << 9, |
69 | OPT_o = 1 << 10, |
70 | OPT_O = 1 << 11, |
71 | #endif |
72 | }; |
73 | |
74 | static int parse(const char *boundary, char **argv) |
75 | { |
76 | int boundary_len = strlen(boundary); |
77 | char uniq[sizeof("%%llu.%u") + sizeof(int)*3]; |
78 | |
79 | dbg_error_msg("BOUNDARY[%s]", boundary); |
80 | |
81 | // prepare unique string pattern |
82 | sprintf(uniq, "%%llu.%u", (unsigned)getpid()); |
83 | dbg_error_msg("UNIQ[%s]", uniq); |
84 | |
85 | while (1) { |
86 | char *header; |
87 | const char *tokens[32]; /* 32 is enough */ |
88 | const char *type; |
89 | |
90 | /* Read the header (everything up to two \n) */ |
91 | { |
92 | unsigned header_idx = 0; |
93 | int last_ch = 0; |
94 | header = NULL; |
95 | while (1) { |
96 | int ch = fgetc(stdin); |
97 | if (ch == '\r') /* Support both line endings */ |
98 | continue; |
99 | if (ch == EOF) |
100 | break; |
101 | if (ch == '\n' && last_ch == ch) |
102 | break; |
103 | if (!(header_idx & 0xff)) |
104 | header = xrealloc(header, header_idx + 0x101); |
105 | header[header_idx++] = last_ch = ch; |
106 | } |
107 | if (!header) { |
108 | dbg_error_msg("EOF"); |
109 | break; |
110 | } |
111 | header[header_idx] = '\0'; |
112 | dbg_error_msg("H:'%s'", p); |
113 | } |
114 | |
115 | /* Split to tokens */ |
116 | { |
117 | char *s, *p; |
118 | unsigned ntokens; |
119 | const char *delims = ";=\" \t\n"; |
120 | |
121 | /* Skip to last Content-Type: */ |
122 | s = p = header; |
123 | while ((p = strchr(p, '\n')) != NULL) { |
124 | p++; |
125 | if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0) |
126 | s = p; |
127 | } |
128 | dbg_error_msg("L:'%s'", p); |
129 | ntokens = 0; |
130 | s = strtok(s, delims); |
131 | while (s) { |
132 | tokens[ntokens] = s; |
133 | if (ntokens < ARRAY_SIZE(tokens) - 1) |
134 | ntokens++; |
135 | dbg_error_msg("L[%d]='%s'", ntokens, s); |
136 | s = strtok(NULL, delims); |
137 | } |
138 | tokens[ntokens] = NULL; |
139 | dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens); |
140 | if (ntokens == 0) |
141 | break; |
142 | } |
143 | |
144 | /* Is it multipart? */ |
145 | type = find_token(tokens, "Content-Type:", "text/plain"); |
146 | dbg_error_msg("TYPE:'%s'", type); |
147 | if (0 == strncasecmp(type, "multipart/", 10)) { |
148 | /* Yes, recurse */ |
149 | if (strcasecmp(type + 10, "mixed") != 0) |
150 | bb_error_msg_and_die("no support of content type '%s'", type); |
151 | parse(xfind_token(tokens, "boundary"), argv); |
152 | } else { |
153 | /* No, process one non-multipart section */ |
154 | char *end; |
155 | pid_t pid = pid; |
156 | FILE *fp; |
157 | |
158 | const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET); |
159 | const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit"); |
160 | |
161 | /* Compose target filename */ |
162 | char *filename = (char *)find_token(tokens, "filename", NULL); |
163 | if (!filename) |
164 | filename = xasprintf(uniq, monotonic_us()); |
165 | else |
166 | filename = bb_get_last_path_component_strip(xstrdup(filename)); |
167 | |
168 | if (opts & OPT_X) { |
169 | int fd[2]; |
170 | |
171 | /* start external helper */ |
172 | xpipe(fd); |
173 | pid = vfork(); |
174 | if (0 == pid) { |
175 | /* child reads from fd[0] */ |
176 | close(fd[1]); |
177 | xmove_fd(fd[0], STDIN_FILENO); |
178 | xsetenv("CONTENT_TYPE", type); |
179 | xsetenv("CHARSET", charset); |
180 | xsetenv("ENCODING", encoding); |
181 | xsetenv("FILENAME", filename); |
182 | BB_EXECVP_or_die(argv); |
183 | } |
184 | /* parent will write to fd[1] */ |
185 | close(fd[0]); |
186 | fp = xfdopen_for_write(fd[1]); |
187 | signal(SIGPIPE, SIG_IGN); |
188 | } else { |
189 | /* write to file */ |
190 | char *fname = xasprintf("%s%s", *argv, filename); |
191 | fp = xfopen_for_write(fname); |
192 | free(fname); |
193 | } |
194 | free(filename); |
195 | |
196 | /* write to fp */ |
197 | end = NULL; |
198 | if (0 == strcasecmp(encoding, "base64")) { |
199 | read_base64(stdin, fp, '-'); |
200 | } else |
201 | if (0 != strcasecmp(encoding, "7bit") |
202 | && 0 != strcasecmp(encoding, "8bit") |
203 | ) { |
204 | /* quoted-printable, binary, user-defined are unsupported so far */ |
205 | bb_error_msg_and_die("encoding '%s' not supported", encoding); |
206 | } else { |
207 | /* plain 7bit or 8bit */ |
208 | while ((end = xmalloc_fgets(stdin)) != NULL) { |
209 | if ('-' == end[0] |
210 | && '-' == end[1] |
211 | && strncmp(end + 2, boundary, boundary_len) == 0 |
212 | ) { |
213 | break; |
214 | } |
215 | fputs(end, fp); |
216 | } |
217 | } |
218 | fclose(fp); |
219 | |
220 | /* Wait for child */ |
221 | if (opts & OPT_X) { |
222 | int rc; |
223 | signal(SIGPIPE, SIG_DFL); |
224 | rc = (wait4pid(pid) & 0xff); |
225 | if (rc != 0) |
226 | return rc + 20; |
227 | } |
228 | |
229 | /* Multipart ended? */ |
230 | if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) { |
231 | dbg_error_msg("FINISHED MPART:'%s'", end); |
232 | break; |
233 | } |
234 | dbg_error_msg("FINISHED:'%s'", end); |
235 | free(end); |
236 | } /* end of "handle one non-multipart block" */ |
237 | |
238 | free(header); |
239 | } /* while (1) */ |
240 | |
241 | dbg_error_msg("ENDPARSE[%s]", boundary); |
242 | |
243 | return EXIT_SUCCESS; |
244 | } |
245 | |
246 | //usage:#define reformime_trivial_usage |
247 | //usage: "[OPTIONS]" |
248 | //usage:#define reformime_full_usage "\n\n" |
249 | //usage: "Parse MIME-encoded message on stdin\n" |
250 | //usage: "\n -x PREFIX Extract content of MIME sections to files" |
251 | //usage: "\n -X PROG ARGS Filter content of MIME sections through PROG" |
252 | //usage: "\n Must be the last option" |
253 | //usage: "\n" |
254 | //usage: "\nOther options are silently ignored" |
255 | |
256 | /* |
257 | Usage: reformime [options] |
258 | -d - parse a delivery status notification. |
259 | -e - extract contents of MIME section. |
260 | -x - extract MIME section to a file. |
261 | -X - pipe MIME section to a program. |
262 | -i - show MIME info. |
263 | -s n.n.n.n - specify MIME section. |
264 | -r - rewrite message, filling in missing MIME headers. |
265 | -r7 - also convert 8bit/raw encoding to quoted-printable, if possible. |
266 | -r8 - also convert quoted-printable encoding to 8bit, if possible. |
267 | -c charset - default charset for rewriting, -o, and -O. |
268 | -m [file] [file]... - create a MIME message digest. |
269 | -h "header" - decode RFC 2047-encoded header. |
270 | -o "header" - encode unstructured header using RFC 2047. |
271 | -O "header" - encode address list header using RFC 2047. |
272 | */ |
273 | |
274 | int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
275 | int reformime_main(int argc UNUSED_PARAM, char **argv) |
276 | { |
277 | const char *opt_prefix = ""; |
278 | |
279 | INIT_G(); |
280 | |
281 | // parse options |
282 | // N.B. only -x and -X are supported so far |
283 | opt_complementary = "x--X:X--x"; |
284 | opts = getopt32(argv, |
285 | "x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:*h:o:O:"), |
286 | &opt_prefix |
287 | IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL) |
288 | ); |
289 | argv += optind; |
290 | |
291 | return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix); |
292 | } |
293 |