platform/external/ntfs-3g.git - Unnamed repository; edit this file 'description' to name the repository.

1 /**
2  * unistr.c - Unicode string handling. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2000-2004 Anton Altaparmakov
5  * Copyright (c) 2002-2009 Szabolcs Szakacsits
6  * Copyright (c) 2008-2009 Jean-Pierre Andre
7  * Copyright (c) 2008      Bernhard Kaindl
8  *
9  * This program/include file is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as published
11  * by the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program/include file is distributed in the hope that it will be
15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program (in the main directory of the NTFS-3G
21  * distribution in the file COPYING); if not, write to the Free Software
22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28
29 #ifdef HAVE_STDIO_H
30 #include <stdio.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef HAVE_WCHAR_H
36 #include <wchar.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41 #ifdef HAVE_ERRNO_H
42 #include <errno.h>
43 #endif
44 #ifdef HAVE_LOCALE_H
45 #include <locale.h>
46 #endif
47
48 #if defined(__APPLE__) || defined(__DARWIN__)
49 #ifdef ENABLE_NFCONV
50 #include <CoreFoundation/CoreFoundation.h>
51 #endif /* ENABLE_NFCONV */
52 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
53
54 #include "compat.h"
55 #include "attrib.h"
56 #include "types.h"
57 #include "unistr.h"
58 #include "debug.h"
59 #include "logging.h"
60 #include "misc.h"
61
62 #define NOREVBOM 0  /* JPA rejecting U+FFFE and U+FFFF, open to debate */
63
64 /*
65  * IMPORTANT
66  * =========
67  *
68  * All these routines assume that the Unicode characters are in little endian
69  * encoding inside the strings!!!
70  */
71
72 static int use_utf8 = 1; /* use UTF-8 encoding for file names */
73
74 #if defined(__APPLE__) || defined(__DARWIN__)
75 #ifdef ENABLE_NFCONV
76 /**
77  * This variable controls whether or not automatic normalization form conversion
78  * should be performed when translating NTFS unicode file names to UTF-8.
79  * Defaults to on, but can be controlled from the outside using the function
80  *   int ntfs_macosx_normalize_filenames(int normalize);
81  */
82 static int nfconvert_utf8 = 1;
83 #endif /* ENABLE_NFCONV */
84 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
85
86 /*
87  * This is used by the name collation functions to quickly determine what
88  * characters are (in)valid.
89  */
90 #if 0
91 static const u8 legal_ansi_char_array[0x40] = {
92 	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
93 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
94
95 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
96 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
97
98 	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
99 	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
100
101 	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
102 	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
103 };
104 #endif
105
106 /**
107  * ntfs_names_are_equal - compare two Unicode names for equality
108  * @s1:			name to compare to @s2
109  * @s1_len:		length in Unicode characters of @s1
110  * @s2:			name to compare to @s1
111  * @s2_len:		length in Unicode characters of @s2
112  * @ic:			ignore case bool
113  * @upcase:		upcase table (only if @ic == IGNORE_CASE)
114  * @upcase_size:	length in Unicode characters of @upcase (if present)
115  *
116  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
117  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
118  * the @upcase table is used to perform a case insensitive comparison.
119  */
120 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
121 		const ntfschar *s2, size_t s2_len,
122 		const IGNORE_CASE_BOOL ic,
123 		const ntfschar *upcase, const u32 upcase_size)
124 {
125 	if (s1_len != s2_len)
126 		return FALSE;
127 	if (!s1_len)
128 		return TRUE;
129 	if (ic == CASE_SENSITIVE)
130 		return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
131 	return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
132 								       TRUE;
133 }
134
135 /*
136  * ntfs_names_full_collate() fully collate two Unicode names
137  *
138  * @name1:	first Unicode name to compare
139  * @name1_len:	length of first Unicode name to compare
140  * @name2:	second Unicode name to compare
141  * @name2_len:	length of second Unicode name to compare
142  * @ic:		either CASE_SENSITIVE or IGNORE_CASE
143  * @upcase:	upcase table (ignored if @ic is CASE_SENSITIVE)
144  * @upcase_len:	upcase table size (ignored if @ic is CASE_SENSITIVE)
145  *
146  *  -1 if the first name collates before the second one,
147  *   0 if the names match,
148  *   1 if the second name collates before the first one, or
149  *
150  */
151 int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
152 		const ntfschar *name2, const u32 name2_len,
153 		const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
154 		const u32 upcase_len)
155 {
156 	u32 cnt;
157 	u16 c1, c2;
158 	u16 u1, u2;
159
160 #ifdef DEBUG
161 	if (!name1 || !name2 || (ic && (!upcase || !upcase_len))) {
162 		ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
163 		exit(1);
164 	}
165 #endif
166 	cnt = min(name1_len, name2_len);
167 	if (cnt > 0) {
168 		if (ic == CASE_SENSITIVE) {
169 			do {
170 				c1 = le16_to_cpu(*name1);
171 				name1++;
172 				c2 = le16_to_cpu(*name2);
173 				name2++;
174 			} while (--cnt && (c1 == c2));
175 			u1 = c1;
176 			u2 = c2;
177 			if (u1 < upcase_len)
178 				u1 = le16_to_cpu(upcase[u1]);
179 			if (u2 < upcase_len)
180 				u2 = le16_to_cpu(upcase[u2]);
181 			if ((u1 == u2) && cnt)
182 				do {
183 					u1 = le16_to_cpu(*name1);
184 					name1++;
185 					u2 = le16_to_cpu(*name2);
186 					name2++;
187 					if (u1 < upcase_len)
188 						u1 = le16_to_cpu(upcase[u1]);
189 					if (u2 < upcase_len)
190 						u2 = le16_to_cpu(upcase[u2]);
191 				} while ((u1 == u2) && --cnt);
192 			if (u1 < u2)
193 				return -1;
194 			if (u1 > u2)
195 				return 1;
196 			if (name1_len < name2_len)
197 				return -1;
198 			if (name1_len > name2_len)
199 				return 1;
200 			if (c1 < c2)
201 				return -1;
202 			if (c1 > c2)
203 				return 1;
204 		} else {
205 			do {
206 				u1 = c1 = le16_to_cpu(*name1);
207 				name1++;
208 				u2 = c2 = le16_to_cpu(*name2);
209 				name2++;
210 				if (u1 < upcase_len)
211 					u1 = le16_to_cpu(upcase[u1]);
212 				if (u2 < upcase_len)
213 					u2 = le16_to_cpu(upcase[u2]);
214 			} while ((u1 == u2) && --cnt);
215 			if (u1 < u2)
216 				return -1;
217 			if (u1 > u2)
218 				return 1;
219 			if (name1_len < name2_len)
220 				return -1;
221 			if (name1_len > name2_len)
222 				return 1;
223 		}
224 	} else {
225 		if (name1_len < name2_len)
226 			return -1;
227 		if (name1_len > name2_len)
228 			return 1;
229 	}
230 	return 0;
231 }
232
233 /**
234  * ntfs_ucsncmp - compare two little endian Unicode strings
235  * @s1:		first string
236  * @s2:		second string
237  * @n:		maximum unicode characters to compare
238  *
239  * Compare the first @n characters of the Unicode strings @s1 and @s2,
240  * The strings in little endian format and appropriate le16_to_cpu()
241  * conversion is performed on non-little endian machines.
242  *
243  * The function returns an integer less than, equal to, or greater than zero
244  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
245  * to be less than, to match, or be greater than @s2.
246  */
247 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
248 {
249 	ntfschar c1, c2;
250 	size_t i;
251
252 #ifdef DEBUG
253 	if (!s1 || !s2) {
254 		ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
255 		exit(1);
256 	}
257 #endif
258 	for (i = 0; i < n; ++i) {
259 		c1 = le16_to_cpu(s1[i]);
260 		c2 = le16_to_cpu(s2[i]);
261 		if (c1 < c2)
262 			return -1;
263 		if (c1 > c2)
264 			return 1;
265 		if (!c1)
266 			break;
267 	}
268 	return 0;
269 }
270
271 /**
272  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
273  * @s1:			first string
274  * @s2:			second string
275  * @n:			maximum unicode characters to compare
276  * @upcase:		upcase table
277  * @upcase_size:	upcase table size in Unicode characters
278  *
279  * Compare the first @n characters of the Unicode strings @s1 and @s2,
280  * ignoring case. The strings in little endian format and appropriate
281  * le16_to_cpu() conversion is performed on non-little endian machines.
282  *
283  * Each character is uppercased using the @upcase table before the comparison.
284  *
285  * The function returns an integer less than, equal to, or greater than zero
286  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
287  * to be less than, to match, or be greater than @s2.
288  */
289 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
290 		const ntfschar *upcase, const u32 upcase_size)
291 {
292 	u16 c1, c2;
293 	size_t i;
294
295 #ifdef DEBUG
296 	if (!s1 || !s2 || !upcase) {
297 		ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
298 		exit(1);
299 	}
300 #endif
301 	for (i = 0; i < n; ++i) {
302 		if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
303 			c1 = le16_to_cpu(upcase[c1]);
304 		if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
305 			c2 = le16_to_cpu(upcase[c2]);
306 		if (c1 < c2)
307 			return -1;
308 		if (c1 > c2)
309 			return 1;
310 		if (!c1)
311 			break;
312 	}
313 	return 0;
314 }
315
316 /**
317  * ntfs_ucsnlen - determine the length of a little endian Unicode string
318  * @s:		pointer to Unicode string
319  * @maxlen:	maximum length of string @s
320  *
321  * Return the number of Unicode characters in the little endian Unicode
322  * string @s up to a maximum of maxlen Unicode characters, not including
323  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
324  * and @s + @maxlen, @maxlen is returned.
325  *
326  * This function never looks beyond @s + @maxlen.
327  */
328 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
329 {
330 	u32 i;
331
332 	for (i = 0; i < maxlen; i++) {
333 		if (!le16_to_cpu(s[i]))
334 			break;
335 	}
336 	return i;
337 }
338
339 /**
340  * ntfs_ucsndup - duplicate little endian Unicode string
341  * @s:		pointer to Unicode string
342  * @maxlen:	maximum length of string @s
343  *
344  * Return a pointer to a new little endian Unicode string which is a duplicate
345  * of the string s.  Memory for the new string is obtained with ntfs_malloc(3),
346  * and can be freed with free(3).
347  *
348  * A maximum of @maxlen Unicode characters are copied and a terminating
349  * (ntfschar)'\0' little endian Unicode character is added.
350  *
351  * This function never looks beyond @s + @maxlen.
352  *
353  * Return a pointer to the new little endian Unicode string on success and NULL
354  * on failure with errno set to the error code.
355  */
356 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
357 {
358 	ntfschar *dst;
359 	u32 len;
360
361 	len = ntfs_ucsnlen(s, maxlen);
362 	dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
363 	if (dst) {
364 		memcpy(dst, s, len * sizeof(ntfschar));
365 		dst[len] = cpu_to_le16(L'\0');
366 	}
367 	return dst;
368 }
369
370 /**
371  * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent
372  * @name:
373  * @name_len:
374  * @upcase:
375  * @upcase_len:
376  *
377  * Description...
378  *
379  * Returns:
380  */
381 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
382 		const u32 upcase_len)
383 {
384 	u32 i;
385 	u16 u;
386
387 	for (i = 0; i < name_len; i++)
388 		if ((u = le16_to_cpu(name[i])) < upcase_len)
389 			name[i] = upcase[u];
390 }
391
392 /**
393  * ntfs_file_value_upcase - Convert a filename to upper case
394  * @file_name_attr:
395  * @upcase:
396  * @upcase_len:
397  *
398  * Description...
399  *
400  * Returns:
401  */
402 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
403 		const ntfschar *upcase, const u32 upcase_len)
404 {
405 	ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
406 			file_name_attr->file_name_length, upcase, upcase_len);
407 }
408
409 /*
410    NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
411    for now]) for path names, but the Unicode code points need to be
412    converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI,
413    glibc does this even without a locale in a hard-coded fashion as that
414    appears to be is easy because the low 7-bit ASCII range appears to be
415    available in all charsets but it does not convert anything if
416    there was some error with the locale setup or none set up like
417    when mount is called during early boot where he (by policy) do
418    not use locales (and may be not available if /usr is not yet mounted),
419    so this patch fixes the resulting issues for systems which use
420    UTF-8 and for others, specifying the locale in fstab brings them
421    the encoding which they want.
422
423    If no locale is defined or there was a problem with setting one
424    up and whenever nl_langinfo(CODESET) returns a sting starting with
425    "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix
426    the bug where NTFS-3G does not show any path names which include
427    international characters!!! (and also fails on creating them) as result.
428
429    Author: Bernhard Kaindl <bk@suse.de>
430    Jean-Pierre Andre made it compliant with RFC3629/RFC2781.
431 */
432
433 /*
434  * Return the amount of 8-bit elements in UTF-8 needed (without the terminating
435  * null) to store a given UTF-16LE string.
436  *
437  * Return -1 with errno set if string has invalid byte sequence or too long.
438  */
439 static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
440 {
441 	int i, ret = -1;
442 	int count = 0;
443 	BOOL surrog;
444
445 	surrog = FALSE;
446 	for (i = 0; i < ins_len && ins[i]; i++) {
447 		unsigned short c = le16_to_cpu(ins[i]);
448 		if (surrog) {
449 			if ((c >= 0xdc00) && (c < 0xe000)) {
450 				surrog = FALSE;
451 				count += 4;
452 			} else
453 				goto fail;
454 		} else
455 			if (c < 0x80)
456 				count++;
457 			else if (c < 0x800)
458 				count += 2;
459 			else if (c < 0xd800)
460 				count += 3;
461 			else if (c < 0xdc00)
462 				surrog = TRUE;
463 #if NOREVBOM
464 			else if ((c >= 0xe000) && (c < 0xfffe))
465 #else
466 			else if (c >= 0xe000)
467 #endif
468 				count += 3;
469 			else
470 				goto fail;
471 		if (count > outs_len) {
472 			errno = ENAMETOOLONG;
473 			goto out;
474 		}
475 	}
476 	if (surrog)
477 		goto fail;
478
479 	ret = count;
480 out:
481 	return ret;
482 fail:
483 	errno = EILSEQ;
484 	goto out;
485 }
486
487 /*
488  * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string
489  * @ins:	input utf16 string buffer
490  * @ins_len:	length of input string in utf16 characters
491  * @outs:	on return contains the (allocated) output multibyte string
492  * @outs_len:	length of output buffer in bytes
493  *
494  * Return -1 with errno set if string has invalid byte sequence or too long.
495  */
496 static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
497 			      char **outs, int outs_len)
498 {
499 #if defined(__APPLE__) || defined(__DARWIN__)
500 #ifdef ENABLE_NFCONV
501 	char *original_outs_value = *outs;
502 	int original_outs_len = outs_len;
503 #endif /* ENABLE_NFCONV */
504 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
505
506 	char *t;
507 	int i, size, ret = -1;
508 	int halfpair;
509
510 	halfpair = 0;
511 	if (!*outs)
512 		outs_len = PATH_MAX;
513
514 	size = utf16_to_utf8_size(ins, ins_len, outs_len);
515
516 	if (size < 0)
517 		goto out;
518
519 	if (!*outs) {
520 		outs_len = size + 1;
521 		*outs = ntfs_malloc(outs_len);
522 		if (!*outs)
523 			goto out;
524 	}
525
526 	t = *outs;
527
528 	for (i = 0; i < ins_len && ins[i]; i++) {
529 	    unsigned short c = le16_to_cpu(ins[i]);
530 			/* size not double-checked */
531 		if (halfpair) {
532 			if ((c >= 0xdc00) && (c < 0xe000)) {
533 				*t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
534 				*t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
535 				*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
536 				*t++ = 0x80 + (c & 63);
537 				halfpair = 0;
538 			} else
539 				goto fail;
540 		} else if (c < 0x80) {
541 			*t++ = c;
542 	    	} else {
543 			if (c < 0x800) {
544 			   	*t++ = (0xc0 | ((c >> 6) & 0x3f));
545 			        *t++ = 0x80 | (c & 0x3f);
546 			} else if (c < 0xd800) {
547 			   	*t++ = 0xe0 | (c >> 12);
548 			   	*t++ = 0x80 | ((c >> 6) & 0x3f);
549 		        	*t++ = 0x80 | (c & 0x3f);
550 			} else if (c < 0xdc00)
551 				halfpair = c;
552 			else if (c >= 0xe000) {
553 				*t++ = 0xe0 | (c >> 12);
554 				*t++ = 0x80 | ((c >> 6) & 0x3f);
555 			        *t++ = 0x80 | (c & 0x3f);
556 			} else
557 				goto fail;
558 	        }
559 	}
560 	*t = '\0';
561
562 #if defined(__APPLE__) || defined(__DARWIN__)
563 #ifdef ENABLE_NFCONV
564 	if(nfconvert_utf8 && (t - *outs) > 0) {
565 		char *new_outs = NULL;
566 		int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
567 		if(new_outs_len >= 0 && new_outs != NULL) {
568 			if(original_outs_value != *outs) {
569 				// We have allocated outs ourselves.
570 				free(*outs);
571 				*outs = new_outs;
572 				t = *outs + new_outs_len;
573 			}
574 			else {
575 				// We need to copy new_outs into the fixed outs buffer.
576 				memset(*outs, 0, original_outs_len);
577 				strncpy(*outs, new_outs, original_outs_len-1);
578 				t = *outs + original_outs_len;
579 				free(new_outs);
580 			}
581 		}
582 		else {
583 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
584 			ntfs_log_error("  new_outs=0x%p\n", new_outs);
585 			ntfs_log_error("  new_outs_len=%d\n", new_outs_len);
586 		}
587 	}
588 #endif /* ENABLE_NFCONV */
589 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
590
591 	ret = t - *outs;
592 out:
593 	return ret;
594 fail:
595 	errno = EILSEQ;
596 	goto out;
597 }
598
599 /*
600  * Return the amount of 16-bit elements in UTF-16LE needed
601  * (without the terminating null) to store given UTF-8 string.
602  *
603  * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
604  *
605  * Note: This does not check whether the input sequence is a valid utf8 string,
606  *	 and should be used only in context where such check is made!
607  */
608 static int utf8_to_utf16_size(const char *s)
609 {
610 	int ret = -1;
611 	unsigned int byte;
612 	size_t count = 0;
613
614 	while ((byte = *((const unsigned char *)s++))) {
615 		if (++count >= PATH_MAX)
616 			goto fail;
617 		if (byte >= 0xc0) {
618 			if (byte >= 0xF5) {
619 				errno = EILSEQ;
620 				goto out;
621 			}
622 			if (!*s)
623 				break;
624 			if (byte >= 0xC0)
625 				s++;
626 			if (!*s)
627 				break;
628 			if (byte >= 0xE0)
629 				s++;
630 			if (!*s)
631 				break;
632 			if (byte >= 0xF0) {
633 				s++;
634 				if (++count >= PATH_MAX)
635 					goto fail;
636 			}
637 		}
638 	}
639 	ret = count;
640 out:
641 	return ret;
642 fail:
643 	errno = ENAMETOOLONG;
644 	goto out;
645 }
646 /*
647  * This converts one UTF-8 sequence to cpu-endian Unicode value
648  * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
649  *
650  * Return the number of used utf8 bytes or -1 with errno set
651  * if sequence is invalid.
652  */
653 static int utf8_to_unicode(u32 *wc, const char *s)
654 {
655     	unsigned int byte = *((const unsigned char *)s);
656
657 					/* single byte */
658 	if (byte == 0) {
659 		*wc = (u32) 0;
660 		return 0;
661 	} else if (byte < 0x80) {
662 		*wc = (u32) byte;
663 		return 1;
664 					/* double byte */
665 	} else if (byte < 0xc2) {
666 		goto fail;
667 	} else if (byte < 0xE0) {
668 		if ((s[1] & 0xC0) == 0x80) {
669 			*wc = ((u32)(byte & 0x1F) << 6)
670 			    | ((u32)(s[1] & 0x3F));
671 			return 2;
672 		} else
673 			goto fail;
674 					/* three-byte */
675 	} else if (byte < 0xF0) {
676 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
677 			*wc = ((u32)(byte & 0x0F) << 12)
678 			    | ((u32)(s[1] & 0x3F) << 6)
679 			    | ((u32)(s[2] & 0x3F));
680 			/* Check valid ranges */
681 #if NOREVBOM
682 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
683 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
684 				return 3;
685 #else
686 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
687 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
688 				return 3;
689 #endif
690 		}
691 		goto fail;
692 					/* four-byte */
693 	} else if (byte < 0xF5) {
694 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
695 		  && ((s[3] & 0xC0) == 0x80)) {
696 			*wc = ((u32)(byte & 0x07) << 18)
697 			    | ((u32)(s[1] & 0x3F) << 12)
698 			    | ((u32)(s[2] & 0x3F) << 6)
699 			    | ((u32)(s[3] & 0x3F));
700 		/* Check valid ranges */
701 		if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
702 			return 4;
703 		}
704 		goto fail;
705 	}
706 fail:
707 	errno = EILSEQ;
708 	return -1;
709 }
710
711 /**
712  * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string
713  * @ins:	input multibyte string buffer
714  * @outs:	on return contains the (allocated) output utf16 string
715  * @outs_len:	length of output buffer in utf16 characters
716  *
717  * Return -1 with errno set.
718  */
719 static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
720 {
721 #if defined(__APPLE__) || defined(__DARWIN__)
722 #ifdef ENABLE_NFCONV
723 	char *new_ins = NULL;
724 	if(nfconvert_utf8) {
725 		int new_ins_len;
726 		new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
727 		if(new_ins_len >= 0)
728 			ins = new_ins;
729 		else
730 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
731 	}
732 #endif /* ENABLE_NFCONV */
733 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
734 	const char *t = ins;
735 	u32 wc;
736 	BOOL allocated;
737 	ntfschar *outpos;
738 	int shorts, ret = -1;
739
740 	shorts = utf8_to_utf16_size(ins);
741 	if (shorts < 0)
742 		goto fail;
743
744 	allocated = FALSE;
745 	if (!*outs) {
746 		*outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
747 		if (!*outs)
748 			goto fail;
749 		allocated = TRUE;
750 	}
751
752 	outpos = *outs;
753
754 	while(1) {
755 		int m  = utf8_to_unicode(&wc, t);
756 		if (m <= 0) {
757 			if (m < 0) {
758 				/* do not leave space allocated if failed */
759 				if (allocated) {
760 					free(*outs);
761 					*outs = (ntfschar*)NULL;
762 				}
763 				goto fail;
764 			}
765 			*outpos++ = const_cpu_to_le16(0);
766 			break;
767 		}
768 		if (wc < 0x10000)
769 			*outpos++ = cpu_to_le16(wc);
770 		else {
771 			wc -= 0x10000;
772 			*outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
773 			*outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
774 		}
775 		t += m;
776 	}
777
778 	ret = --outpos - *outs;
779 fail:
780 #if defined(__APPLE__) || defined(__DARWIN__)
781 #ifdef ENABLE_NFCONV
782 	if(new_ins != NULL)
783 		free(new_ins);
784 #endif /* ENABLE_NFCONV */
785 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
786 	return ret;
787 }
788
789 /**
790  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
791  * @ins:	input Unicode string buffer
792  * @ins_len:	length of input string in Unicode characters
793  * @outs:	on return contains the (allocated) output multibyte string
794  * @outs_len:	length of output buffer in bytes
795  *
796  * Convert the input little endian, 2-byte Unicode string @ins, of length
797  * @ins_len into the multibyte string format dictated by the current locale.
798  *
799  * If *@outs is NULL, the function allocates the string and the caller is
800  * responsible for calling free(*@outs); when finished with it.
801  *
802  * On success the function returns the number of bytes written to the output
803  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
804  * string buffer was allocated, *@outs is set to it.
805  *
806  * On error, -1 is returned, and errno is set to the error code. The following
807  * error codes can be expected:
808  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
809  *	EILSEQ		The input string cannot be represented as a multibyte
810  *			sequence according to the current locale.
811  *	ENAMETOOLONG	Destination buffer is too small for input string.
812  *	ENOMEM		Not enough memory to allocate destination buffer.
813  */
814 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
815 		int outs_len)
816 {
817 	char *mbs;
818 	wchar_t wc;
819 	int i, o, mbs_len;
820 	int cnt = 0;
821 #ifdef HAVE_MBSINIT
822 	mbstate_t mbstate;
823 #endif
824
825 	if (!ins || !outs) {
826 		errno = EINVAL;
827 		return -1;
828 	}
829 	mbs = *outs;
830 	mbs_len = outs_len;
831 	if (mbs && !mbs_len) {
832 		errno = ENAMETOOLONG;
833 		return -1;
834 	}
835 	if (use_utf8)
836 		return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
837 	if (!mbs) {
838 		mbs_len = (ins_len + 1) * MB_CUR_MAX;
839 		mbs = ntfs_malloc(mbs_len);
840 		if (!mbs)
841 			return -1;
842 	}
843 #ifdef HAVE_MBSINIT
844 	memset(&mbstate, 0, sizeof(mbstate));
845 #else
846 	wctomb(NULL, 0);
847 #endif
848 	for (i = o = 0; i < ins_len; i++) {
849 		/* Reallocate memory if necessary or abort. */
850 		if ((int)(o + MB_CUR_MAX) > mbs_len) {
851 			char *tc;
852 			if (mbs == *outs) {
853 				errno = ENAMETOOLONG;
854 				return -1;
855 			}
856 			tc = ntfs_malloc((mbs_len + 64) & ~63);
857 			if (!tc)
858 				goto err_out;
859 			memcpy(tc, mbs, mbs_len);
860 			mbs_len = (mbs_len + 64) & ~63;
861 			free(mbs);
862 			mbs = tc;
863 		}
864 		/* Convert the LE Unicode character to a CPU wide character. */
865 		wc = (wchar_t)le16_to_cpu(ins[i]);
866 		if (!wc)
867 			break;
868 		/* Convert the CPU endian wide character to multibyte. */
869 #ifdef HAVE_MBSINIT
870 		cnt = wcrtomb(mbs + o, wc, &mbstate);
871 #else
872 		cnt = wctomb(mbs + o, wc);
873 #endif
874 		if (cnt == -1)
875 			goto err_out;
876 		if (cnt <= 0) {
877 			ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
878 			errno = EINVAL;
879 			goto err_out;
880 		}
881 		o += cnt;
882 	}
883 #ifdef HAVE_MBSINIT
884 	/* Make sure we are back in the initial state. */
885 	if (!mbsinit(&mbstate)) {
886 		ntfs_log_debug("Eeek. mbstate not in initial state!\n");
887 		errno = EILSEQ;
888 		goto err_out;
889 	}
890 #endif
891 	/* Now write the NULL character. */
892 	mbs[o] = '\0';
893 	if (*outs != mbs)
894 		*outs = mbs;
895 	return o;
896 err_out:
897 	if (mbs != *outs) {
898 		int eo = errno;
899 		free(mbs);
900 		errno = eo;
901 	}
902 	return -1;
903 }
904
905 /**
906  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
907  * @ins:	input multibyte string buffer
908  * @outs:	on return contains the (allocated) output Unicode string
909  *
910  * Convert the input multibyte string @ins, from the current locale into the
911  * corresponding little endian, 2-byte Unicode string.
912  *
913  * The function allocates the string and the caller is responsible for calling
914  * free(*@outs); when finished with it.
915  *
916  * On success the function returns the number of Unicode characters written to
917  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
918  * character.
919  *
920  * On error, -1 is returned, and errno is set to the error code. The following
921  * error codes can be expected:
922  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
923  *	EILSEQ		The input string cannot be represented as a Unicode
924  *			string according to the current locale.
925  *	ENAMETOOLONG	Destination buffer is too small for input string.
926  *	ENOMEM		Not enough memory to allocate destination buffer.
927  */
928 int ntfs_mbstoucs(const char *ins, ntfschar **outs)
929 {
930 	ntfschar *ucs;
931 	const char *s;
932 	wchar_t wc;
933 	int i, o, cnt, ins_len, ucs_len, ins_size;
934 #ifdef HAVE_MBSINIT
935 	mbstate_t mbstate;
936 #endif
937
938 	if (!ins || !outs) {
939 		errno = EINVAL;
940 		return -1;
941 	}
942
943 	if (use_utf8)
944 		return ntfs_utf8_to_utf16(ins, outs);
945
946 	/* Determine the size of the multi-byte string in bytes. */
947 	ins_size = strlen(ins);
948 	/* Determine the length of the multi-byte string. */
949 	s = ins;
950 #if defined(HAVE_MBSINIT)
951 	memset(&mbstate, 0, sizeof(mbstate));
952 	ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
953 #ifdef __CYGWIN32__
954 	if (!ins_len && *ins) {
955 		/* Older Cygwin had broken mbsrtowcs() implementation. */
956 		ins_len = strlen(ins);
957 	}
958 #endif
959 #elif !defined(DJGPP)
960 	ins_len = mbstowcs(NULL, s, 0);
961 #else
962 	/* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
963 	ins_len = strlen(ins);
964 #endif
965 	if (ins_len == -1)
966 		return ins_len;
967 #ifdef HAVE_MBSINIT
968 	if ((s != ins) || !mbsinit(&mbstate)) {
969 #else
970 	if (s != ins) {
971 #endif
972 		errno = EILSEQ;
973 		return -1;
974 	}
975 	/* Add the NULL terminator. */
976 	ins_len++;
977 	ucs_len = ins_len;
978 	ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
979 	if (!ucs)
980 		return -1;
981 #ifdef HAVE_MBSINIT
982 	memset(&mbstate, 0, sizeof(mbstate));
983 #else
984 	mbtowc(NULL, NULL, 0);
985 #endif
986 	for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
987 		/* Reallocate memory if necessary. */
988 		if (o >= ucs_len) {
989 			ntfschar *tc;
990 			ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
991 			tc = realloc(ucs, ucs_len);
992 			if (!tc)
993 				goto err_out;
994 			ucs = tc;
995 			ucs_len /= sizeof(ntfschar);
996 		}
997 		/* Convert the multibyte character to a wide character. */
998 #ifdef HAVE_MBSINIT
999 		cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
1000 #else
1001 		cnt = mbtowc(&wc, ins + i, ins_size - i);
1002 #endif
1003 		if (!cnt)
1004 			break;
1005 		if (cnt == -1)
1006 			goto err_out;
1007 		if (cnt < -1) {
1008 			ntfs_log_trace("Eeek. cnt = %i\n", cnt);
1009 			errno = EINVAL;
1010 			goto err_out;
1011 		}
1012 		/* Make sure we are not overflowing the NTFS Unicode set. */
1013 		if ((unsigned long)wc >= (unsigned long)(1 <<
1014 				(8 * sizeof(ntfschar)))) {
1015 			errno = EILSEQ;
1016 			goto err_out;
1017 		}
1018 		/* Convert the CPU wide character to a LE Unicode character. */
1019 		ucs[o] = cpu_to_le16(wc);
1020 	}
1021 #ifdef HAVE_MBSINIT
1022 	/* Make sure we are back in the initial state. */
1023 	if (!mbsinit(&mbstate)) {
1024 		ntfs_log_trace("Eeek. mbstate not in initial state!\n");
1025 		errno = EILSEQ;
1026 		goto err_out;
1027 	}
1028 #endif
1029 	/* Now write the NULL character. */
1030 	ucs[o] = cpu_to_le16(L'\0');
1031 	*outs = ucs;
1032 	return o;
1033 err_out:
1034 	free(ucs);
1035 	return -1;
1036 }
1037
1038 /**
1039  * ntfs_upcase_table_build - build the default upcase table for NTFS
1040  * @uc:		destination buffer where to store the built table
1041  * @uc_len:	size of destination buffer in bytes
1042  *
1043  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
1044  * stores it in the caller supplied buffer @uc of size @uc_len.
1045  *
1046  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
1047  */
1048 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
1049 {
1050 	static int uc_run_table[][3] = { /* Start, End, Add */
1051 	{0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
1052 	{0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
1053 	{0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
1054 	{0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
1055 	{0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
1056 	{0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
1057 	{0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
1058 	{0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
1059 	{0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
1060 	{0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
1061 	{0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
1062 	{0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
1063 	{0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
1064 	{0}
1065 	};
1066 	static int uc_dup_table[][2] = { /* Start, End */
1067 	{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
1068 	{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
1069 	{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
1070 	{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
1071 	{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
1072 	{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
1073 	{0}
1074 	};
1075 	static int uc_byte_table[][2] = { /* Offset, Value */
1076 	{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
1077 	{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
1078 	{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
1079 	{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
1080 	{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
1081 	{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
1082 	{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
1083 	{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
1084 	{0}
1085 	};
1086 	int i, r;
1087 	int k, off;
1088
1089 	memset((char*)uc, 0, uc_len);
1090 	uc_len >>= 1;
1091 	if (uc_len > 65536)
1092 		uc_len = 65536;
1093 	for (i = 0; (u32)i < uc_len; i++)
1094 		uc[i] = cpu_to_le16(i);
1095 	for (r = 0; uc_run_table[r][0]; r++) {
1096 		off = uc_run_table[r][2];
1097 		for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
1098 			uc[i] = cpu_to_le16(i + off);
1099 	}
1100 	for (r = 0; uc_dup_table[r][0]; r++)
1101 		for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
1102 			uc[i + 1] = cpu_to_le16(i);
1103 	for (r = 0; uc_byte_table[r][0]; r++) {
1104 		k = uc_byte_table[r][1];
1105 		uc[uc_byte_table[r][0]] = cpu_to_le16(k);
1106 	}
1107 }
1108
1109 /**
1110  * ntfs_str2ucs - convert a string to a valid NTFS file name
1111  * @s:		input string
1112  * @len:	length of output buffer in Unicode characters
1113  *
1114  * Convert the input @s string into the corresponding little endian,
1115  * 2-byte Unicode string. The length of the converted string is less
1116  * or equal to the maximum length allowed by the NTFS format (255).
1117  *
1118  * If @s is NULL then return AT_UNNAMED.
1119  *
1120  * On success the function returns the Unicode string in an allocated
1121  * buffer and the caller is responsible to free it when it's not needed
1122  * anymore.
1123  *
1124  * On error NULL is returned and errno is set to the error code.
1125  */
1126 ntfschar *ntfs_str2ucs(const char *s, int *len)
1127 {
1128 	ntfschar *ucs = NULL;
1129
1130 	if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
1131 		ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
1132 		return NULL;
1133 	}
1134 	if (*len > NTFS_MAX_NAME_LEN) {
1135 		free(ucs);
1136 		errno = ENAMETOOLONG;
1137 		return NULL;
1138 	}
1139 	if (!ucs || !*len) {
1140 		ucs  = AT_UNNAMED;
1141 		*len = 0;
1142 	}
1143 	return ucs;
1144 }
1145
1146 /**
1147  * ntfs_ucsfree - free memory allocated by ntfs_str2ucs()
1148  * @ucs		input string to be freed
1149  *
1150  * Free memory at @ucs and which was allocated by ntfs_str2ucs.
1151  *
1152  * Return value: none.
1153  */
1154 void ntfs_ucsfree(ntfschar *ucs)
1155 {
1156 	if (ucs && (ucs != AT_UNNAMED))
1157 		free(ucs);
1158 }
1159
1160 /*
1161  *		Check whether a name contains no chars forbidden
1162  *	for DOS or Win32 use
1163  *
1164  *	If there is a bad char, errno is set to EINVAL
1165  */
1166
1167 BOOL ntfs_forbidden_chars(const ntfschar *name, int len)
1168 {
1169 	BOOL forbidden;
1170 	int ch;
1171 	int i;
1172 	u32 mainset =     (1L << ('\"' - 0x20))
1173 			| (1L << ('*' - 0x20))
1174 			| (1L << ('/' - 0x20))
1175 			| (1L << (':' - 0x20))
1176 			| (1L << ('<' - 0x20))
1177 			| (1L << ('>' - 0x20))
1178 			| (1L << ('?' - 0x20));
1179
1180 	forbidden = (len == 0) || (le16_to_cpu(name[len-1]) == ' ');
1181 	for (i=0; i<len; i++) {
1182 		ch = le16_to_cpu(name[i]);
1183 		if ((ch < 0x20)
1184 		    || ((ch < 0x40)
1185 			&& ((1L << (ch - 0x20)) & mainset))
1186 		    || (ch == '\\')
1187 		    || (ch == '|'))
1188 			forbidden = TRUE;
1189 	}
1190 	if (forbidden)
1191 		errno = EINVAL;
1192 	return (forbidden);
1193 }
1194
1195 /*
1196  *		Check whether the same name can be used as a DOS and
1197  *	a Win32 name
1198  *
1199  *	The names must be the same, or the short name the uppercase
1200  *	variant of the long name
1201  */
1202
1203 BOOL ntfs_collapsible_chars(ntfs_volume *vol,
1204 			const ntfschar *shortname, int shortlen,
1205 			const ntfschar *longname, int longlen)
1206 {
1207 	BOOL collapsible;
1208 	unsigned int ch;
1209 	int i;
1210
1211 	collapsible = shortlen == longlen;
1212 	if (collapsible)
1213 		for (i=0; i<shortlen; i++) {
1214 			ch = le16_to_cpu(longname[i]);
1215 			if ((ch >= vol->upcase_len)
1216 		   	 || ((shortname[i] != longname[i])
1217 				&& (shortname[i] != vol->upcase[ch])))
1218 					collapsible = FALSE;
1219 	}
1220 	return (collapsible);
1221 }
1222
1223 /*
1224  * Define the character encoding to be used.
1225  * Use UTF-8 unless specified otherwise.
1226  */
1227
1228 int ntfs_set_char_encoding(const char *locale)
1229 {
1230 	use_utf8 = 0;
1231 	if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
1232 	    || strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
1233 		use_utf8 = 1;
1234 	else
1235 		if (setlocale(LC_ALL, locale))
1236 			use_utf8 = 0;
1237 		else {
1238 			ntfs_log_error("Invalid locale, encoding to UTF-8\n");
1239 			use_utf8 = 1;
1240 	 	}
1241 	return 0; /* always successful */
1242 }
1243
1244 #if defined(__APPLE__) || defined(__DARWIN__)
1245
1246 int ntfs_macosx_normalize_filenames(int normalize) {
1247 #ifdef ENABLE_NFCONV
1248 	if(normalize == 0 || normalize == 1) {
1249 		nfconvert_utf8 = normalize;
1250 		return 0;
1251 	}
1252 	else
1253 		return -1;
1254 #else
1255 	return -1;
1256 #endif /* ENABLE_NFCONV */
1257 }
1258
1259 int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
1260  int composed) {
1261 #ifdef ENABLE_NFCONV
1262 	/* For this code to compile, the CoreFoundation framework must be fed to the linker. */
1263 	CFStringRef cfSourceString;
1264 	CFMutableStringRef cfMutableString;
1265 	CFRange rangeToProcess;
1266 	CFIndex requiredBufferLength;
1267 	char *result = NULL;
1268 	int resultLength = -1;
1269
1270 	/* Convert the UTF-8 string to a CFString. */
1271 	cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault, utf8_string, kCFStringEncodingUTF8);
1272 	if(cfSourceString == NULL) {
1273 		ntfs_log_error("CFStringCreateWithCString failed!\n");
1274 		return -2;
1275 	}
1276
1277 	/* Create a mutable string from cfSourceString that we are free to modify. */
1278 	cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0, cfSourceString);
1279 	CFRelease(cfSourceString); /* End-of-life. */
1280 	if(cfMutableString == NULL) {
1281 		ntfs_log_error("CFStringCreateMutableCopy failed!\n");
1282 		return -3;
1283 	}
1284
1285 	/* Normalize the mutable string to the desired normalization form. */
1286 	CFStringNormalize(cfMutableString, (composed != 0 ? kCFStringNormalizationFormC : kCFStringNormalizationFormD));
1287
1288 	/* Store the resulting string in a '\0'-terminated UTF-8 encoded char* buffer. */
1289 	rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
1290 	if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8, 0, false, NULL, 0, &requiredBufferLength) > 0) {
1291 		resultLength = sizeof(char)*(requiredBufferLength + 1);
1292 		result = ntfs_calloc(resultLength);
1293
1294 		if(result != NULL) {
1295 			if(CFStringGetBytes(cfMutableString, rangeToProcess, kCFStringEncodingUTF8,
1296 					    0, false, (UInt8*)result, resultLength-1, &requiredBufferLength) <= 0) {
1297 				ntfs_log_error("Could not perform UTF-8 conversion of normalized CFMutableString.\n");
1298 				free(result);
1299 				result = NULL;
1300 			}
1301 		}
1302 		else
1303 			ntfs_log_error("Could not perform a ntfs_calloc of %d bytes for char *result.\n", resultLength);
1304 	}
1305 	else
1306 		ntfs_log_error("Could not perform check for required length of UTF-8 conversion of normalized CFMutableString.\n");
1307
1308
1309 	CFRelease(cfMutableString);
1310
1311 	if(result != NULL) {
1312 	 	*target = result;
1313 		return resultLength - 1;
1314 	}
1315 	else
1316 		return -1;
1317 #else
1318 	return -1;
1319 #endif /* ENABLE_NFCONV */
1320 }
1321 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
1322