platform/external/busybox.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* vi: set sw=4 ts=4: */
2 /*
3  * wc implementation for busybox
4  *
5  * Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 /* BB_AUDIT SUSv3 compliant. */
11 /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
12
13 /* Mar 16, 2003      Manuel Novoa III   (mjn3@codepoet.org)
14  *
15  * Rewritten to fix a number of problems and do some size optimizations.
16  * Problems in the previous busybox implementation (besides bloat) included:
17  *  1) broken 'wc -c' optimization (read note below)
18  *  2) broken handling of '-' args
19  *  3) no checking of ferror on EOF returns
20  *  4) isprint() wasn't considered when word counting.
21  *
22  * NOTES:
23  *
24  * The previous busybox wc attempted an optimization using stat for the
25  * case of counting chars only.  I omitted that because it was broken.
26  * It didn't take into account the possibility of input coming from a
27  * pipe, or input from a file with file pointer not at the beginning.
28  *
29  * To implement such a speed optimization correctly, not only do you
30  * need the size, but also the file position.  Note also that the
31  * file position may be past the end of file.  Consider the example
32  * (adapted from example in gnu wc.c)
33  *
34  *      echo hello > /tmp/testfile &&
35  *      (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
36  *
37  * for which 'wc -c' should output '0'.
38  */
39 #include "libbb.h"
40 #include "unicode.h"
41
42 #if !ENABLE_LOCALE_SUPPORT
43 # undef isprint
44 # undef isspace
45 # define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
46 # define isspace(c) ((c) == ' ')
47 #endif
48
49 #if ENABLE_FEATURE_WC_LARGE
50 # define COUNT_T unsigned long long
51 # define COUNT_FMT "llu"
52 #else
53 # define COUNT_T unsigned
54 # define COUNT_FMT "u"
55 #endif
56
57 /* We support -m even when UNICODE_SUPPORT is off,
58  * we just don't advertise it in help text,
59  * since it is the same as -c in this case.
60  */
61
62 //usage:#define wc_trivial_usage
63 //usage:       "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
64 //usage:
65 //usage:#define wc_full_usage "\n\n"
66 //usage:       "Count lines, words, and bytes for each FILE (or stdin)\n"
67 //usage:     "\n	-c	Count bytes"
68 //usage:	IF_UNICODE_SUPPORT(
69 //usage:     "\n	-m	Count characters"
70 //usage:	)
71 //usage:     "\n	-l	Count newlines"
72 //usage:     "\n	-w	Count words"
73 //usage:     "\n	-L	Print longest line length"
74 //usage:
75 //usage:#define wc_example_usage
76 //usage:       "$ wc /etc/passwd\n"
77 //usage:       "     31      46    1365 /etc/passwd\n"
78
79 /* Order is important if we want to be compatible with
80  * column order in "wc -cmlwL" output:
81  */
82 enum {
83 	WC_LINES    = 0, /* -l */
84 	WC_WORDS    = 1, /* -w */
85 	WC_UNICHARS = 2, /* -m */
86 	WC_BYTES    = 3, /* -c */
87 	WC_LENGTH   = 4, /* -L */
88 	NUM_WCS     = 5,
89 };
90
91 int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
92 int wc_main(int argc UNUSED_PARAM, char **argv)
93 {
94 	const char *arg;
95 	const char *start_fmt = " %9"COUNT_FMT + 1;
96 	const char *fname_fmt = " %s\n";
97 	COUNT_T *pcounts;
98 	COUNT_T counts[NUM_WCS];
99 	COUNT_T totals[NUM_WCS];
100 	int num_files;
101 	smallint status = EXIT_SUCCESS;
102 	unsigned print_type;
103
104 	init_unicode();
105
106 	print_type = getopt32(argv, "lwmcL");
107
108 	if (print_type == 0) {
109 		print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_BYTES);
110 	}
111
112 	argv += optind;
113 	if (!argv[0]) {
114 		*--argv = (char *) bb_msg_standard_input;
115 		fname_fmt = "\n";
116 	}
117 	if (!argv[1]) { /* zero or one filename? */
118 		if (!((print_type-1) & print_type)) /* exactly one option? */
119 			start_fmt = "%"COUNT_FMT;
120 	}
121
122 	memset(totals, 0, sizeof(totals));
123
124 	pcounts = counts;
125
126 	num_files = 0;
127 	while ((arg = *argv++) != NULL) {
128 		FILE *fp;
129 		const char *s;
130 		unsigned u;
131 		unsigned linepos;
132 		smallint in_word;
133
134 		++num_files;
135 		fp = fopen_or_warn_stdin(arg);
136 		if (!fp) {
137 			status = EXIT_FAILURE;
138 			continue;
139 		}
140
141 		memset(counts, 0, sizeof(counts));
142 		linepos = 0;
143 		in_word = 0;
144
145 		while (1) {
146 			int c;
147 			/* Our -w doesn't match GNU wc exactly... oh well */
148
149 			c = getc(fp);
150 			if (c == EOF) {
151 				if (ferror(fp)) {
152 					bb_simple_perror_msg(arg);
153 					status = EXIT_FAILURE;
154 				}
155 				goto DO_EOF;  /* Treat an EOF as '\r'. */
156 			}
157
158 			/* Cater for -c and -m */
159 			++counts[WC_BYTES];
160 			if (unicode_status != UNICODE_ON /* every byte is a new char */
161 			 || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
162 			) {
163 				++counts[WC_UNICHARS];
164 			}
165
166 			if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
167 				++linepos;
168 				if (!isspace(c)) {
169 					in_word = 1;
170 					continue;
171 				}
172 			} else if ((unsigned)(c - 9) <= 4) {
173 				/* \t  9
174 				 * \n 10
175 				 * \v 11
176 				 * \f 12
177 				 * \r 13
178 				 */
179 				if (c == '\t') {
180 					linepos = (linepos | 7) + 1;
181 				} else {  /* '\n', '\r', '\f', or '\v' */
182  DO_EOF:
183 					if (linepos > counts[WC_LENGTH]) {
184 						counts[WC_LENGTH] = linepos;
185 					}
186 					if (c == '\n') {
187 						++counts[WC_LINES];
188 					}
189 					if (c != '\v') {
190 						linepos = 0;
191 					}
192 				}
193 			} else {
194 				continue;
195 			}
196
197 			counts[WC_WORDS] += in_word;
198 			in_word = 0;
199 			if (c == EOF) {
200 				break;
201 			}
202 		}
203
204 		fclose_if_not_stdin(fp);
205
206 		if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
207 			totals[WC_LENGTH] = counts[WC_LENGTH];
208 		}
209 		totals[WC_LENGTH] -= counts[WC_LENGTH];
210
211  OUTPUT:
212 		/* coreutils wc tries hard to print pretty columns
213 		 * (saves results for all files, finds max col len etc...)
214 		 * we won't try that hard, it will bloat us too much */
215 		s = start_fmt;
216 		u = 0;
217 		do {
218 			if (print_type & (1 << u)) {
219 				printf(s, pcounts[u]);
220 				s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
221 			}
222 			totals[u] += pcounts[u];
223 		} while (++u < NUM_WCS);
224 		printf(fname_fmt, arg);
225 	}
226
227 	/* If more than one file was processed, we want the totals.  To save some
228 	 * space, we set the pcounts ptr to the totals array.  This has the side
229 	 * effect of trashing the totals array after outputting it, but that's
230 	 * irrelavent since we no longer need it. */
231 	if (num_files > 1) {
232 		num_files = 0;  /* Make sure we don't get here again. */
233 		arg = "total";
234 		pcounts = totals;
235 		--argv;
236 		goto OUTPUT;
237 	}
238
239 	fflush_stdout_and_exit(status);
240 }
241
1	/* vi: set sw=4 ts=4: */
2	/*
3	* wc implementation for busybox
4	*
5	* Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org>
6	*
7	* Licensed under GPLv2 or later, see file LICENSE in this source tree.
8	*/
9
10	/* BB_AUDIT SUSv3 compliant. */
11	/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */
12
13	/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org)
14	*
15	* Rewritten to fix a number of problems and do some size optimizations.
16	* Problems in the previous busybox implementation (besides bloat) included:
17	* 1) broken 'wc -c' optimization (read note below)
18	* 2) broken handling of '-' args
19	* 3) no checking of ferror on EOF returns
20	* 4) isprint() wasn't considered when word counting.
21	*
22	* NOTES:
23	*
24	* The previous busybox wc attempted an optimization using stat for the
25	* case of counting chars only. I omitted that because it was broken.
26	* It didn't take into account the possibility of input coming from a
27	* pipe, or input from a file with file pointer not at the beginning.
28	*
29	* To implement such a speed optimization correctly, not only do you
30	* need the size, but also the file position. Note also that the
31	* file position may be past the end of file. Consider the example
32	* (adapted from example in gnu wc.c)
33	*
34	* echo hello > /tmp/testfile &&
35	* (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile
36	*
37	* for which 'wc -c' should output '0'.
38	*/
39	#include "libbb.h"
40	#include "unicode.h"
41
42	#if !ENABLE_LOCALE_SUPPORT
43	# undef isprint
44	# undef isspace
45	# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20))
46	# define isspace(c) ((c) == ' ')
47	#endif
48
49	#if ENABLE_FEATURE_WC_LARGE
50	# define COUNT_T unsigned long long
51	# define COUNT_FMT "llu"
52	#else
53	# define COUNT_T unsigned
54	# define COUNT_FMT "u"
55	#endif
56
57	/* We support -m even when UNICODE_SUPPORT is off,
58	* we just don't advertise it in help text,
59	* since it is the same as -c in this case.
60	*/
61
62	//usage:#define wc_trivial_usage
63	//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..."
64	//usage:
65	//usage:#define wc_full_usage "\n\n"
66	//usage: "Count lines, words, and bytes for each FILE (or stdin)\n"
67	//usage: "\n -c Count bytes"
68	//usage: IF_UNICODE_SUPPORT(
69	//usage: "\n -m Count characters"
70	//usage: )
71	//usage: "\n -l Count newlines"
72	//usage: "\n -w Count words"
73	//usage: "\n -L Print longest line length"
74	//usage:
75	//usage:#define wc_example_usage
76	//usage: "$ wc /etc/passwd\n"
77	//usage: " 31 46 1365 /etc/passwd\n"
78
79	/* Order is important if we want to be compatible with
80	* column order in "wc -cmlwL" output:
81	*/
82	enum {
83	WC_LINES = 0, /* -l */
84	WC_WORDS = 1, /* -w */
85	WC_UNICHARS = 2, /* -m */
86	WC_BYTES = 3, /* -c */
87	WC_LENGTH = 4, /* -L */
88	NUM_WCS = 5,
89	};
90
91	int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
92	int wc_main(int argc UNUSED_PARAM, char **argv)
93	{
94	const char *arg;
95	const char *start_fmt = " %9"COUNT_FMT + 1;
96	const char *fname_fmt = " %s\n";
97	COUNT_T *pcounts;
98	COUNT_T counts[NUM_WCS];
99	COUNT_T totals[NUM_WCS];
100	int num_files;
101	smallint status = EXIT_SUCCESS;
102	unsigned print_type;
103
104	init_unicode();
105
106	print_type = getopt32(argv, "lwmcL");
107
108	if (print_type == 0) {
109	print_type = (1 << WC_LINES) \| (1 << WC_WORDS) \| (1 << WC_BYTES);
110	}
111
112	argv += optind;
113	if (!argv[0]) {
114	--argv = (char ) bb_msg_standard_input;
115	fname_fmt = "\n";
116	}
117	if (!argv[1]) { /* zero or one filename? */
118	if (!((print_type-1) & print_type)) /* exactly one option? */
119	start_fmt = "%"COUNT_FMT;
120	}
121
122	memset(totals, 0, sizeof(totals));
123
124	pcounts = counts;
125
126	num_files = 0;
127	while ((arg = *argv++) != NULL) {
128	FILE *fp;
129	const char *s;
130	unsigned u;
131	unsigned linepos;
132	smallint in_word;
133
134	++num_files;
135	fp = fopen_or_warn_stdin(arg);
136	if (!fp) {
137	status = EXIT_FAILURE;
138	continue;
139	}
140
141	memset(counts, 0, sizeof(counts));
142	linepos = 0;
143	in_word = 0;
144
145	while (1) {
146	int c;
147	/* Our -w doesn't match GNU wc exactly... oh well */
148
149	c = getc(fp);
150	if (c == EOF) {
151	if (ferror(fp)) {
152	bb_simple_perror_msg(arg);
153	status = EXIT_FAILURE;
154	}
155	goto DO_EOF; /* Treat an EOF as '\r'. */
156	}
157
158	/* Cater for -c and -m */
159	++counts[WC_BYTES];
160	if (unicode_status != UNICODE_ON /* every byte is a new char */
161	\|\| (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */
162	) {
163	++counts[WC_UNICHARS];
164	}
165
166	if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */
167	++linepos;
168	if (!isspace(c)) {
169	in_word = 1;
170	continue;
171	}
172	} else if ((unsigned)(c - 9) <= 4) {
173	/* \t 9
174	* \n 10
175	* \v 11
176	* \f 12
177	* \r 13
178	*/
179	if (c == '\t') {
180	linepos = (linepos \| 7) + 1;
181	} else { /* '\n', '\r', '\f', or '\v' */
182	DO_EOF:
183	if (linepos > counts[WC_LENGTH]) {
184	counts[WC_LENGTH] = linepos;
185	}
186	if (c == '\n') {
187	++counts[WC_LINES];
188	}
189	if (c != '\v') {
190	linepos = 0;
191	}
192	}
193	} else {
194	continue;
195	}
196
197	counts[WC_WORDS] += in_word;
198	in_word = 0;
199	if (c == EOF) {
200	break;
201	}
202	}
203
204	fclose_if_not_stdin(fp);
205
206	if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
207	totals[WC_LENGTH] = counts[WC_LENGTH];
208	}
209	totals[WC_LENGTH] -= counts[WC_LENGTH];
210
211	OUTPUT:
212	/* coreutils wc tries hard to print pretty columns
213	* (saves results for all files, finds max col len etc...)
214	* we won't try that hard, it will bloat us too much */
215	s = start_fmt;
216	u = 0;
217	do {
218	if (print_type & (1 << u)) {
219	printf(s, pcounts[u]);
220	s = " %9"COUNT_FMT; /* Ok... restore the leading space. */
221	}
222	totals[u] += pcounts[u];
223	} while (++u < NUM_WCS);
224	printf(fname_fmt, arg);
225	}
226
227	/* If more than one file was processed, we want the totals. To save some
228	* space, we set the pcounts ptr to the totals array. This has the side
229	* effect of trashing the totals array after outputting it, but that's
230	* irrelavent since we no longer need it. */
231	if (num_files > 1) {
232	num_files = 0; /* Make sure we don't get here again. */
233	arg = "total";
234	pcounts = totals;
235	--argv;
236	goto OUTPUT;
237	}
238
239	fflush_stdout_and_exit(status);
240	}
241