blob: e8818eaaacaf86a9062bb131dfaa514341ba46e7
1 | /* |
2 | * linux/fs/open.c |
3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ |
6 | |
7 | #include <linux/string.h> |
8 | #include <linux/mm.h> |
9 | #include <linux/file.h> |
10 | #include <linux/fdtable.h> |
11 | #include <linux/fsnotify.h> |
12 | #include <linux/module.h> |
13 | #include <linux/tty.h> |
14 | #include <linux/namei.h> |
15 | #include <linux/backing-dev.h> |
16 | #include <linux/capability.h> |
17 | #include <linux/securebits.h> |
18 | #include <linux/security.h> |
19 | #include <linux/mount.h> |
20 | #include <linux/fcntl.h> |
21 | #include <linux/slab.h> |
22 | #include <asm/uaccess.h> |
23 | #include <linux/fs.h> |
24 | #include <linux/personality.h> |
25 | #include <linux/pagemap.h> |
26 | #include <linux/syscalls.h> |
27 | #include <linux/rcupdate.h> |
28 | #include <linux/audit.h> |
29 | #include <linux/falloc.h> |
30 | #include <linux/fs_struct.h> |
31 | #include <linux/ima.h> |
32 | #include <linux/dnotify.h> |
33 | #include <linux/compat.h> |
34 | |
35 | #include "internal.h" |
36 | |
37 | int do_truncate2(struct vfsmount *mnt, struct dentry *dentry, loff_t length, |
38 | unsigned int time_attrs, struct file *filp) |
39 | { |
40 | int ret; |
41 | struct iattr newattrs; |
42 | |
43 | /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ |
44 | if (length < 0) |
45 | return -EINVAL; |
46 | |
47 | newattrs.ia_size = length; |
48 | newattrs.ia_valid = ATTR_SIZE | time_attrs; |
49 | if (filp) { |
50 | newattrs.ia_file = filp; |
51 | newattrs.ia_valid |= ATTR_FILE; |
52 | } |
53 | |
54 | /* Remove suid, sgid, and file capabilities on truncate too */ |
55 | ret = dentry_needs_remove_privs(dentry); |
56 | if (ret < 0) |
57 | return ret; |
58 | if (ret) |
59 | newattrs.ia_valid |= ret | ATTR_FORCE; |
60 | |
61 | inode_lock(dentry->d_inode); |
62 | /* Note any delegations or leases have already been broken: */ |
63 | ret = notify_change2(mnt, dentry, &newattrs, NULL); |
64 | inode_unlock(dentry->d_inode); |
65 | return ret; |
66 | } |
67 | int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, |
68 | struct file *filp) |
69 | { |
70 | return do_truncate2(NULL, dentry, length, time_attrs, filp); |
71 | } |
72 | |
73 | long vfs_truncate(const struct path *path, loff_t length) |
74 | { |
75 | struct inode *inode; |
76 | struct vfsmount *mnt; |
77 | struct dentry *upperdentry; |
78 | long error; |
79 | |
80 | inode = path->dentry->d_inode; |
81 | mnt = path->mnt; |
82 | |
83 | /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ |
84 | if (S_ISDIR(inode->i_mode)) |
85 | return -EISDIR; |
86 | if (!S_ISREG(inode->i_mode)) |
87 | return -EINVAL; |
88 | |
89 | error = mnt_want_write(path->mnt); |
90 | if (error) |
91 | goto out; |
92 | |
93 | error = inode_permission2(mnt, inode, MAY_WRITE); |
94 | if (error) |
95 | goto mnt_drop_write_and_out; |
96 | |
97 | error = -EPERM; |
98 | if (IS_APPEND(inode)) |
99 | goto mnt_drop_write_and_out; |
100 | |
101 | /* |
102 | * If this is an overlayfs then do as if opening the file so we get |
103 | * write access on the upper inode, not on the overlay inode. For |
104 | * non-overlay filesystems d_real() is an identity function. |
105 | */ |
106 | upperdentry = d_real(path->dentry, NULL, O_WRONLY); |
107 | error = PTR_ERR(upperdentry); |
108 | if (IS_ERR(upperdentry)) |
109 | goto mnt_drop_write_and_out; |
110 | |
111 | error = get_write_access(upperdentry->d_inode); |
112 | if (error) |
113 | goto mnt_drop_write_and_out; |
114 | |
115 | /* |
116 | * Make sure that there are no leases. get_write_access() protects |
117 | * against the truncate racing with a lease-granting setlease(). |
118 | */ |
119 | error = break_lease(inode, O_WRONLY); |
120 | if (error) |
121 | goto put_write_and_out; |
122 | |
123 | error = locks_verify_truncate(inode, NULL, length); |
124 | if (!error) |
125 | error = security_path_truncate(path); |
126 | if (!error) |
127 | error = do_truncate2(mnt, path->dentry, length, 0, NULL); |
128 | |
129 | put_write_and_out: |
130 | put_write_access(upperdentry->d_inode); |
131 | mnt_drop_write_and_out: |
132 | mnt_drop_write(path->mnt); |
133 | out: |
134 | return error; |
135 | } |
136 | EXPORT_SYMBOL_GPL(vfs_truncate); |
137 | |
138 | static long do_sys_truncate(const char __user *pathname, loff_t length) |
139 | { |
140 | unsigned int lookup_flags = LOOKUP_FOLLOW; |
141 | struct path path; |
142 | int error; |
143 | |
144 | if (length < 0) /* sorry, but loff_t says... */ |
145 | return -EINVAL; |
146 | |
147 | retry: |
148 | error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); |
149 | if (!error) { |
150 | error = vfs_truncate(&path, length); |
151 | path_put(&path); |
152 | } |
153 | if (retry_estale(error, lookup_flags)) { |
154 | lookup_flags |= LOOKUP_REVAL; |
155 | goto retry; |
156 | } |
157 | return error; |
158 | } |
159 | |
160 | SYSCALL_DEFINE2(truncate, const char __user *, path, long, length) |
161 | { |
162 | return do_sys_truncate(path, length); |
163 | } |
164 | |
165 | #ifdef CONFIG_COMPAT |
166 | COMPAT_SYSCALL_DEFINE2(truncate, const char __user *, path, compat_off_t, length) |
167 | { |
168 | return do_sys_truncate(path, length); |
169 | } |
170 | #endif |
171 | |
172 | static long do_sys_ftruncate(unsigned int fd, loff_t length, int small) |
173 | { |
174 | struct inode *inode; |
175 | struct dentry *dentry; |
176 | struct vfsmount *mnt; |
177 | struct fd f; |
178 | int error; |
179 | |
180 | error = -EINVAL; |
181 | if (length < 0) |
182 | goto out; |
183 | error = -EBADF; |
184 | f = fdget(fd); |
185 | if (!f.file) |
186 | goto out; |
187 | |
188 | /* explicitly opened as large or we are on 64-bit box */ |
189 | if (f.file->f_flags & O_LARGEFILE) |
190 | small = 0; |
191 | |
192 | dentry = f.file->f_path.dentry; |
193 | mnt = f.file->f_path.mnt; |
194 | inode = dentry->d_inode; |
195 | error = -EINVAL; |
196 | if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE)) |
197 | goto out_putf; |
198 | |
199 | error = -EINVAL; |
200 | /* Cannot ftruncate over 2^31 bytes without large file support */ |
201 | if (small && length > MAX_NON_LFS) |
202 | goto out_putf; |
203 | |
204 | error = -EPERM; |
205 | if (IS_APPEND(inode)) |
206 | goto out_putf; |
207 | |
208 | sb_start_write(inode->i_sb); |
209 | error = locks_verify_truncate(inode, f.file, length); |
210 | if (!error) |
211 | error = security_path_truncate(&f.file->f_path); |
212 | if (!error) |
213 | error = do_truncate2(mnt, dentry, length, ATTR_MTIME|ATTR_CTIME, f.file); |
214 | sb_end_write(inode->i_sb); |
215 | out_putf: |
216 | fdput(f); |
217 | out: |
218 | return error; |
219 | } |
220 | |
221 | SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) |
222 | { |
223 | return do_sys_ftruncate(fd, length, 1); |
224 | } |
225 | |
226 | #ifdef CONFIG_COMPAT |
227 | COMPAT_SYSCALL_DEFINE2(ftruncate, unsigned int, fd, compat_ulong_t, length) |
228 | { |
229 | return do_sys_ftruncate(fd, length, 1); |
230 | } |
231 | #endif |
232 | |
233 | /* LFS versions of truncate are only needed on 32 bit machines */ |
234 | #if BITS_PER_LONG == 32 |
235 | SYSCALL_DEFINE2(truncate64, const char __user *, path, loff_t, length) |
236 | { |
237 | return do_sys_truncate(path, length); |
238 | } |
239 | |
240 | SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length) |
241 | { |
242 | return do_sys_ftruncate(fd, length, 0); |
243 | } |
244 | #endif /* BITS_PER_LONG == 32 */ |
245 | |
246 | |
247 | int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len) |
248 | { |
249 | struct inode *inode = file_inode(file); |
250 | long ret; |
251 | |
252 | if (offset < 0 || len <= 0) |
253 | return -EINVAL; |
254 | |
255 | /* Return error if mode is not supported */ |
256 | if (mode & ~FALLOC_FL_SUPPORTED_MASK) |
257 | return -EOPNOTSUPP; |
258 | |
259 | /* Punch hole and zero range are mutually exclusive */ |
260 | if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == |
261 | (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) |
262 | return -EOPNOTSUPP; |
263 | |
264 | /* Punch hole must have keep size set */ |
265 | if ((mode & FALLOC_FL_PUNCH_HOLE) && |
266 | !(mode & FALLOC_FL_KEEP_SIZE)) |
267 | return -EOPNOTSUPP; |
268 | |
269 | /* Collapse range should only be used exclusively. */ |
270 | if ((mode & FALLOC_FL_COLLAPSE_RANGE) && |
271 | (mode & ~FALLOC_FL_COLLAPSE_RANGE)) |
272 | return -EINVAL; |
273 | |
274 | /* Insert range should only be used exclusively. */ |
275 | if ((mode & FALLOC_FL_INSERT_RANGE) && |
276 | (mode & ~FALLOC_FL_INSERT_RANGE)) |
277 | return -EINVAL; |
278 | |
279 | /* Unshare range should only be used with allocate mode. */ |
280 | if ((mode & FALLOC_FL_UNSHARE_RANGE) && |
281 | (mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE))) |
282 | return -EINVAL; |
283 | |
284 | if (!(file->f_mode & FMODE_WRITE)) |
285 | return -EBADF; |
286 | |
287 | /* |
288 | * We can only allow pure fallocate on append only files |
289 | */ |
290 | if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode)) |
291 | return -EPERM; |
292 | |
293 | if (IS_IMMUTABLE(inode)) |
294 | return -EPERM; |
295 | |
296 | /* |
297 | * We cannot allow any fallocate operation on an active swapfile |
298 | */ |
299 | if (IS_SWAPFILE(inode)) |
300 | return -ETXTBSY; |
301 | |
302 | /* |
303 | * Revalidate the write permissions, in case security policy has |
304 | * changed since the files were opened. |
305 | */ |
306 | ret = security_file_permission(file, MAY_WRITE); |
307 | if (ret) |
308 | return ret; |
309 | |
310 | if (S_ISFIFO(inode->i_mode)) |
311 | return -ESPIPE; |
312 | |
313 | /* |
314 | * Let individual file system decide if it supports preallocation |
315 | * for directories or not. |
316 | */ |
317 | if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && |
318 | !S_ISBLK(inode->i_mode)) |
319 | return -ENODEV; |
320 | |
321 | /* Check for wrap through zero too */ |
322 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) |
323 | return -EFBIG; |
324 | |
325 | if (!file->f_op->fallocate) |
326 | return -EOPNOTSUPP; |
327 | |
328 | sb_start_write(inode->i_sb); |
329 | ret = file->f_op->fallocate(file, mode, offset, len); |
330 | |
331 | /* |
332 | * Create inotify and fanotify events. |
333 | * |
334 | * To keep the logic simple always create events if fallocate succeeds. |
335 | * This implies that events are even created if the file size remains |
336 | * unchanged, e.g. when using flag FALLOC_FL_KEEP_SIZE. |
337 | */ |
338 | if (ret == 0) |
339 | fsnotify_modify(file); |
340 | |
341 | sb_end_write(inode->i_sb); |
342 | return ret; |
343 | } |
344 | EXPORT_SYMBOL_GPL(vfs_fallocate); |
345 | |
346 | SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len) |
347 | { |
348 | struct fd f = fdget(fd); |
349 | int error = -EBADF; |
350 | |
351 | if (f.file) { |
352 | error = vfs_fallocate(f.file, mode, offset, len); |
353 | fdput(f); |
354 | } |
355 | return error; |
356 | } |
357 | |
358 | /* |
359 | * access() needs to use the real uid/gid, not the effective uid/gid. |
360 | * We do this by temporarily clearing all FS-related capabilities and |
361 | * switching the fsuid/fsgid around to the real ones. |
362 | */ |
363 | SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) |
364 | { |
365 | const struct cred *old_cred; |
366 | struct cred *override_cred; |
367 | struct path path; |
368 | struct inode *inode; |
369 | struct vfsmount *mnt; |
370 | int res; |
371 | unsigned int lookup_flags = LOOKUP_FOLLOW; |
372 | |
373 | if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ |
374 | return -EINVAL; |
375 | |
376 | override_cred = prepare_creds(); |
377 | if (!override_cred) |
378 | return -ENOMEM; |
379 | |
380 | override_cred->fsuid = override_cred->uid; |
381 | override_cred->fsgid = override_cred->gid; |
382 | |
383 | if (!issecure(SECURE_NO_SETUID_FIXUP)) { |
384 | /* Clear the capabilities if we switch to a non-root user */ |
385 | kuid_t root_uid = make_kuid(override_cred->user_ns, 0); |
386 | if (!uid_eq(override_cred->uid, root_uid)) |
387 | cap_clear(override_cred->cap_effective); |
388 | else |
389 | override_cred->cap_effective = |
390 | override_cred->cap_permitted; |
391 | } |
392 | |
393 | old_cred = override_creds(override_cred); |
394 | retry: |
395 | res = user_path_at(dfd, filename, lookup_flags, &path); |
396 | if (res) |
397 | goto out; |
398 | |
399 | inode = d_backing_inode(path.dentry); |
400 | mnt = path.mnt; |
401 | |
402 | if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { |
403 | /* |
404 | * MAY_EXEC on regular files is denied if the fs is mounted |
405 | * with the "noexec" flag. |
406 | */ |
407 | res = -EACCES; |
408 | if (path_noexec(&path)) |
409 | goto out_path_release; |
410 | } |
411 | |
412 | res = inode_permission2(mnt, inode, mode | MAY_ACCESS); |
413 | /* SuS v2 requires we report a read only fs too */ |
414 | if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) |
415 | goto out_path_release; |
416 | /* |
417 | * This is a rare case where using __mnt_is_readonly() |
418 | * is OK without a mnt_want/drop_write() pair. Since |
419 | * no actual write to the fs is performed here, we do |
420 | * not need to telegraph to that to anyone. |
421 | * |
422 | * By doing this, we accept that this access is |
423 | * inherently racy and know that the fs may change |
424 | * state before we even see this result. |
425 | */ |
426 | if (__mnt_is_readonly(path.mnt)) |
427 | res = -EROFS; |
428 | |
429 | out_path_release: |
430 | path_put(&path); |
431 | if (retry_estale(res, lookup_flags)) { |
432 | lookup_flags |= LOOKUP_REVAL; |
433 | goto retry; |
434 | } |
435 | out: |
436 | revert_creds(old_cred); |
437 | put_cred(override_cred); |
438 | return res; |
439 | } |
440 | |
441 | SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) |
442 | { |
443 | return sys_faccessat(AT_FDCWD, filename, mode); |
444 | } |
445 | |
446 | SYSCALL_DEFINE1(chdir, const char __user *, filename) |
447 | { |
448 | struct path path; |
449 | int error; |
450 | unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
451 | retry: |
452 | error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); |
453 | if (error) |
454 | goto out; |
455 | |
456 | error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); |
457 | if (error) |
458 | goto dput_and_out; |
459 | |
460 | set_fs_pwd(current->fs, &path); |
461 | |
462 | dput_and_out: |
463 | path_put(&path); |
464 | if (retry_estale(error, lookup_flags)) { |
465 | lookup_flags |= LOOKUP_REVAL; |
466 | goto retry; |
467 | } |
468 | out: |
469 | return error; |
470 | } |
471 | |
472 | SYSCALL_DEFINE1(fchdir, unsigned int, fd) |
473 | { |
474 | struct fd f = fdget_raw(fd); |
475 | struct inode *inode; |
476 | struct vfsmount *mnt; |
477 | int error = -EBADF; |
478 | |
479 | error = -EBADF; |
480 | if (!f.file) |
481 | goto out; |
482 | |
483 | inode = file_inode(f.file); |
484 | mnt = f.file->f_path.mnt; |
485 | |
486 | error = -ENOTDIR; |
487 | if (!S_ISDIR(inode->i_mode)) |
488 | goto out_putf; |
489 | |
490 | error = inode_permission2(mnt, inode, MAY_EXEC | MAY_CHDIR); |
491 | if (!error) |
492 | set_fs_pwd(current->fs, &f.file->f_path); |
493 | out_putf: |
494 | fdput(f); |
495 | out: |
496 | return error; |
497 | } |
498 | |
499 | SYSCALL_DEFINE1(chroot, const char __user *, filename) |
500 | { |
501 | struct path path; |
502 | int error; |
503 | unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
504 | retry: |
505 | error = user_path_at(AT_FDCWD, filename, lookup_flags, &path); |
506 | if (error) |
507 | goto out; |
508 | |
509 | error = inode_permission2(path.mnt, path.dentry->d_inode, MAY_EXEC | MAY_CHDIR); |
510 | if (error) |
511 | goto dput_and_out; |
512 | |
513 | error = -EPERM; |
514 | if (!ns_capable(current_user_ns(), CAP_SYS_CHROOT)) |
515 | goto dput_and_out; |
516 | error = security_path_chroot(&path); |
517 | if (error) |
518 | goto dput_and_out; |
519 | |
520 | set_fs_root(current->fs, &path); |
521 | error = 0; |
522 | dput_and_out: |
523 | path_put(&path); |
524 | if (retry_estale(error, lookup_flags)) { |
525 | lookup_flags |= LOOKUP_REVAL; |
526 | goto retry; |
527 | } |
528 | out: |
529 | return error; |
530 | } |
531 | |
532 | static int chmod_common(const struct path *path, umode_t mode) |
533 | { |
534 | struct inode *inode = path->dentry->d_inode; |
535 | struct inode *delegated_inode = NULL; |
536 | struct iattr newattrs; |
537 | int error; |
538 | |
539 | error = mnt_want_write(path->mnt); |
540 | if (error) |
541 | return error; |
542 | retry_deleg: |
543 | inode_lock(inode); |
544 | error = security_path_chmod(path, mode); |
545 | if (error) |
546 | goto out_unlock; |
547 | newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); |
548 | newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; |
549 | error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); |
550 | out_unlock: |
551 | inode_unlock(inode); |
552 | if (delegated_inode) { |
553 | error = break_deleg_wait(&delegated_inode); |
554 | if (!error) |
555 | goto retry_deleg; |
556 | } |
557 | mnt_drop_write(path->mnt); |
558 | return error; |
559 | } |
560 | |
561 | SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode) |
562 | { |
563 | struct fd f = fdget(fd); |
564 | int err = -EBADF; |
565 | |
566 | if (f.file) { |
567 | audit_file(f.file); |
568 | err = chmod_common(&f.file->f_path, mode); |
569 | fdput(f); |
570 | } |
571 | return err; |
572 | } |
573 | |
574 | SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode) |
575 | { |
576 | struct path path; |
577 | int error; |
578 | unsigned int lookup_flags = LOOKUP_FOLLOW; |
579 | retry: |
580 | error = user_path_at(dfd, filename, lookup_flags, &path); |
581 | if (!error) { |
582 | error = chmod_common(&path, mode); |
583 | path_put(&path); |
584 | if (retry_estale(error, lookup_flags)) { |
585 | lookup_flags |= LOOKUP_REVAL; |
586 | goto retry; |
587 | } |
588 | } |
589 | return error; |
590 | } |
591 | |
592 | SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode) |
593 | { |
594 | return sys_fchmodat(AT_FDCWD, filename, mode); |
595 | } |
596 | |
597 | static int chown_common(const struct path *path, uid_t user, gid_t group) |
598 | { |
599 | struct inode *inode = path->dentry->d_inode; |
600 | struct inode *delegated_inode = NULL; |
601 | int error; |
602 | struct iattr newattrs; |
603 | kuid_t uid; |
604 | kgid_t gid; |
605 | |
606 | uid = make_kuid(current_user_ns(), user); |
607 | gid = make_kgid(current_user_ns(), group); |
608 | |
609 | retry_deleg: |
610 | newattrs.ia_valid = ATTR_CTIME; |
611 | if (user != (uid_t) -1) { |
612 | if (!uid_valid(uid)) |
613 | return -EINVAL; |
614 | newattrs.ia_valid |= ATTR_UID; |
615 | newattrs.ia_uid = uid; |
616 | } |
617 | if (group != (gid_t) -1) { |
618 | if (!gid_valid(gid)) |
619 | return -EINVAL; |
620 | newattrs.ia_valid |= ATTR_GID; |
621 | newattrs.ia_gid = gid; |
622 | } |
623 | if (!S_ISDIR(inode->i_mode)) |
624 | newattrs.ia_valid |= |
625 | ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV; |
626 | inode_lock(inode); |
627 | error = security_path_chown(path, uid, gid); |
628 | if (!error) |
629 | error = notify_change2(path->mnt, path->dentry, &newattrs, &delegated_inode); |
630 | inode_unlock(inode); |
631 | if (delegated_inode) { |
632 | error = break_deleg_wait(&delegated_inode); |
633 | if (!error) |
634 | goto retry_deleg; |
635 | } |
636 | return error; |
637 | } |
638 | |
639 | SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, |
640 | gid_t, group, int, flag) |
641 | { |
642 | struct path path; |
643 | int error = -EINVAL; |
644 | int lookup_flags; |
645 | |
646 | if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) |
647 | goto out; |
648 | |
649 | lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; |
650 | if (flag & AT_EMPTY_PATH) |
651 | lookup_flags |= LOOKUP_EMPTY; |
652 | retry: |
653 | error = user_path_at(dfd, filename, lookup_flags, &path); |
654 | if (error) |
655 | goto out; |
656 | error = mnt_want_write(path.mnt); |
657 | if (error) |
658 | goto out_release; |
659 | error = chown_common(&path, user, group); |
660 | mnt_drop_write(path.mnt); |
661 | out_release: |
662 | path_put(&path); |
663 | if (retry_estale(error, lookup_flags)) { |
664 | lookup_flags |= LOOKUP_REVAL; |
665 | goto retry; |
666 | } |
667 | out: |
668 | return error; |
669 | } |
670 | |
671 | SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) |
672 | { |
673 | return sys_fchownat(AT_FDCWD, filename, user, group, 0); |
674 | } |
675 | |
676 | SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) |
677 | { |
678 | return sys_fchownat(AT_FDCWD, filename, user, group, |
679 | AT_SYMLINK_NOFOLLOW); |
680 | } |
681 | |
682 | SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) |
683 | { |
684 | struct fd f = fdget(fd); |
685 | int error = -EBADF; |
686 | |
687 | if (!f.file) |
688 | goto out; |
689 | |
690 | error = mnt_want_write_file(f.file); |
691 | if (error) |
692 | goto out_fput; |
693 | audit_file(f.file); |
694 | error = chown_common(&f.file->f_path, user, group); |
695 | mnt_drop_write_file(f.file); |
696 | out_fput: |
697 | fdput(f); |
698 | out: |
699 | return error; |
700 | } |
701 | |
702 | int open_check_o_direct(struct file *f) |
703 | { |
704 | /* NB: we're sure to have correct a_ops only after f_op->open */ |
705 | if (f->f_flags & O_DIRECT) { |
706 | if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) |
707 | return -EINVAL; |
708 | } |
709 | return 0; |
710 | } |
711 | |
712 | static int do_dentry_open(struct file *f, |
713 | struct inode *inode, |
714 | int (*open)(struct inode *, struct file *), |
715 | const struct cred *cred) |
716 | { |
717 | static const struct file_operations empty_fops = {}; |
718 | int error; |
719 | |
720 | f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | |
721 | FMODE_PREAD | FMODE_PWRITE; |
722 | |
723 | path_get(&f->f_path); |
724 | f->f_inode = inode; |
725 | f->f_mapping = inode->i_mapping; |
726 | |
727 | if (unlikely(f->f_flags & O_PATH)) { |
728 | f->f_mode = FMODE_PATH; |
729 | f->f_op = &empty_fops; |
730 | return 0; |
731 | } |
732 | |
733 | /* Any file opened for execve()/uselib() has to be a regular file. */ |
734 | if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { |
735 | error = -EACCES; |
736 | goto cleanup_file; |
737 | } |
738 | |
739 | if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { |
740 | error = get_write_access(inode); |
741 | if (unlikely(error)) |
742 | goto cleanup_file; |
743 | error = __mnt_want_write(f->f_path.mnt); |
744 | if (unlikely(error)) { |
745 | put_write_access(inode); |
746 | goto cleanup_file; |
747 | } |
748 | f->f_mode |= FMODE_WRITER; |
749 | } |
750 | |
751 | /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ |
752 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) |
753 | f->f_mode |= FMODE_ATOMIC_POS; |
754 | |
755 | f->f_op = fops_get(inode->i_fop); |
756 | if (unlikely(WARN_ON(!f->f_op))) { |
757 | error = -ENODEV; |
758 | goto cleanup_all; |
759 | } |
760 | |
761 | error = security_file_open(f, cred); |
762 | if (error) |
763 | goto cleanup_all; |
764 | |
765 | error = break_lease(locks_inode(f), f->f_flags); |
766 | if (error) |
767 | goto cleanup_all; |
768 | |
769 | if (!open) |
770 | open = f->f_op->open; |
771 | if (open) { |
772 | error = open(inode, f); |
773 | if (error) |
774 | goto cleanup_all; |
775 | } |
776 | if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) |
777 | i_readcount_inc(inode); |
778 | if ((f->f_mode & FMODE_READ) && |
779 | likely(f->f_op->read || f->f_op->read_iter)) |
780 | f->f_mode |= FMODE_CAN_READ; |
781 | if ((f->f_mode & FMODE_WRITE) && |
782 | likely(f->f_op->write || f->f_op->write_iter)) |
783 | f->f_mode |= FMODE_CAN_WRITE; |
784 | |
785 | f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); |
786 | |
787 | file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); |
788 | |
789 | return 0; |
790 | |
791 | cleanup_all: |
792 | fops_put(f->f_op); |
793 | if (f->f_mode & FMODE_WRITER) { |
794 | put_write_access(inode); |
795 | __mnt_drop_write(f->f_path.mnt); |
796 | } |
797 | cleanup_file: |
798 | path_put(&f->f_path); |
799 | f->f_path.mnt = NULL; |
800 | f->f_path.dentry = NULL; |
801 | f->f_inode = NULL; |
802 | return error; |
803 | } |
804 | |
805 | /** |
806 | * finish_open - finish opening a file |
807 | * @file: file pointer |
808 | * @dentry: pointer to dentry |
809 | * @open: open callback |
810 | * @opened: state of open |
811 | * |
812 | * This can be used to finish opening a file passed to i_op->atomic_open(). |
813 | * |
814 | * If the open callback is set to NULL, then the standard f_op->open() |
815 | * filesystem callback is substituted. |
816 | * |
817 | * NB: the dentry reference is _not_ consumed. If, for example, the dentry is |
818 | * the return value of d_splice_alias(), then the caller needs to perform dput() |
819 | * on it after finish_open(). |
820 | * |
821 | * On successful return @file is a fully instantiated open file. After this, if |
822 | * an error occurs in ->atomic_open(), it needs to clean up with fput(). |
823 | * |
824 | * Returns zero on success or -errno if the open failed. |
825 | */ |
826 | int finish_open(struct file *file, struct dentry *dentry, |
827 | int (*open)(struct inode *, struct file *), |
828 | int *opened) |
829 | { |
830 | int error; |
831 | BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ |
832 | |
833 | file->f_path.dentry = dentry; |
834 | error = do_dentry_open(file, d_backing_inode(dentry), open, |
835 | current_cred()); |
836 | if (!error) |
837 | *opened |= FILE_OPENED; |
838 | |
839 | return error; |
840 | } |
841 | EXPORT_SYMBOL(finish_open); |
842 | |
843 | /** |
844 | * finish_no_open - finish ->atomic_open() without opening the file |
845 | * |
846 | * @file: file pointer |
847 | * @dentry: dentry or NULL (as returned from ->lookup()) |
848 | * |
849 | * This can be used to set the result of a successful lookup in ->atomic_open(). |
850 | * |
851 | * NB: unlike finish_open() this function does consume the dentry reference and |
852 | * the caller need not dput() it. |
853 | * |
854 | * Returns "1" which must be the return value of ->atomic_open() after having |
855 | * called this function. |
856 | */ |
857 | int finish_no_open(struct file *file, struct dentry *dentry) |
858 | { |
859 | file->f_path.dentry = dentry; |
860 | return 1; |
861 | } |
862 | EXPORT_SYMBOL(finish_no_open); |
863 | |
864 | char *file_path(struct file *filp, char *buf, int buflen) |
865 | { |
866 | return d_path(&filp->f_path, buf, buflen); |
867 | } |
868 | EXPORT_SYMBOL(file_path); |
869 | |
870 | /** |
871 | * vfs_open - open the file at the given path |
872 | * @path: path to open |
873 | * @file: newly allocated file with f_flag initialized |
874 | * @cred: credentials to use |
875 | */ |
876 | int vfs_open(const struct path *path, struct file *file, |
877 | const struct cred *cred) |
878 | { |
879 | struct dentry *dentry = d_real(path->dentry, NULL, file->f_flags); |
880 | |
881 | if (IS_ERR(dentry)) |
882 | return PTR_ERR(dentry); |
883 | |
884 | file->f_path = *path; |
885 | return do_dentry_open(file, d_backing_inode(dentry), NULL, cred); |
886 | } |
887 | |
888 | struct file *dentry_open(const struct path *path, int flags, |
889 | const struct cred *cred) |
890 | { |
891 | int error; |
892 | struct file *f; |
893 | |
894 | validate_creds(cred); |
895 | |
896 | /* We must always pass in a valid mount pointer. */ |
897 | BUG_ON(!path->mnt); |
898 | |
899 | f = get_empty_filp(); |
900 | if (!IS_ERR(f)) { |
901 | f->f_flags = flags; |
902 | error = vfs_open(path, f, cred); |
903 | if (!error) { |
904 | /* from now on we need fput() to dispose of f */ |
905 | error = open_check_o_direct(f); |
906 | if (error) { |
907 | fput(f); |
908 | f = ERR_PTR(error); |
909 | } |
910 | } else { |
911 | put_filp(f); |
912 | f = ERR_PTR(error); |
913 | } |
914 | } |
915 | return f; |
916 | } |
917 | EXPORT_SYMBOL(dentry_open); |
918 | |
919 | static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) |
920 | { |
921 | int lookup_flags = 0; |
922 | int acc_mode = ACC_MODE(flags); |
923 | |
924 | /* |
925 | * Clear out all open flags we don't know about so that we don't report |
926 | * them in fcntl(F_GETFD) or similar interfaces. |
927 | */ |
928 | flags &= VALID_OPEN_FLAGS; |
929 | |
930 | if (flags & (O_CREAT | __O_TMPFILE)) |
931 | op->mode = (mode & S_IALLUGO) | S_IFREG; |
932 | else |
933 | op->mode = 0; |
934 | |
935 | /* Must never be set by userspace */ |
936 | flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC; |
937 | |
938 | /* |
939 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only |
940 | * check for O_DSYNC if the need any syncing at all we enforce it's |
941 | * always set instead of having to deal with possibly weird behaviour |
942 | * for malicious applications setting only __O_SYNC. |
943 | */ |
944 | if (flags & __O_SYNC) |
945 | flags |= O_DSYNC; |
946 | |
947 | if (flags & __O_TMPFILE) { |
948 | if ((flags & O_TMPFILE_MASK) != O_TMPFILE) |
949 | return -EINVAL; |
950 | if (!(acc_mode & MAY_WRITE)) |
951 | return -EINVAL; |
952 | } else if (flags & O_PATH) { |
953 | /* |
954 | * If we have O_PATH in the open flag. Then we |
955 | * cannot have anything other than the below set of flags |
956 | */ |
957 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; |
958 | acc_mode = 0; |
959 | } |
960 | |
961 | op->open_flag = flags; |
962 | |
963 | /* O_TRUNC implies we need access checks for write permissions */ |
964 | if (flags & O_TRUNC) |
965 | acc_mode |= MAY_WRITE; |
966 | |
967 | /* Allow the LSM permission hook to distinguish append |
968 | access from general write access. */ |
969 | if (flags & O_APPEND) |
970 | acc_mode |= MAY_APPEND; |
971 | |
972 | op->acc_mode = acc_mode; |
973 | |
974 | op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; |
975 | |
976 | if (flags & O_CREAT) { |
977 | op->intent |= LOOKUP_CREATE; |
978 | if (flags & O_EXCL) |
979 | op->intent |= LOOKUP_EXCL; |
980 | } |
981 | |
982 | if (flags & O_DIRECTORY) |
983 | lookup_flags |= LOOKUP_DIRECTORY; |
984 | if (!(flags & O_NOFOLLOW)) |
985 | lookup_flags |= LOOKUP_FOLLOW; |
986 | op->lookup_flags = lookup_flags; |
987 | return 0; |
988 | } |
989 | |
990 | /** |
991 | * file_open_name - open file and return file pointer |
992 | * |
993 | * @name: struct filename containing path to open |
994 | * @flags: open flags as per the open(2) second argument |
995 | * @mode: mode for the new file if O_CREAT is set, else ignored |
996 | * |
997 | * This is the helper to open a file from kernelspace if you really |
998 | * have to. But in generally you should not do this, so please move |
999 | * along, nothing to see here.. |
1000 | */ |
1001 | struct file *file_open_name(struct filename *name, int flags, umode_t mode) |
1002 | { |
1003 | struct open_flags op; |
1004 | int err = build_open_flags(flags, mode, &op); |
1005 | return err ? ERR_PTR(err) : do_filp_open(AT_FDCWD, name, &op); |
1006 | } |
1007 | |
1008 | /** |
1009 | * filp_open - open file and return file pointer |
1010 | * |
1011 | * @filename: path to open |
1012 | * @flags: open flags as per the open(2) second argument |
1013 | * @mode: mode for the new file if O_CREAT is set, else ignored |
1014 | * |
1015 | * This is the helper to open a file from kernelspace if you really |
1016 | * have to. But in generally you should not do this, so please move |
1017 | * along, nothing to see here.. |
1018 | */ |
1019 | struct file *filp_open(const char *filename, int flags, umode_t mode) |
1020 | { |
1021 | struct filename *name = getname_kernel(filename); |
1022 | struct file *file = ERR_CAST(name); |
1023 | |
1024 | if (!IS_ERR(name)) { |
1025 | file = file_open_name(name, flags, mode); |
1026 | putname(name); |
1027 | } |
1028 | return file; |
1029 | } |
1030 | EXPORT_SYMBOL(filp_open); |
1031 | |
1032 | struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, |
1033 | const char *filename, int flags, umode_t mode) |
1034 | { |
1035 | struct open_flags op; |
1036 | int err = build_open_flags(flags, mode, &op); |
1037 | if (err) |
1038 | return ERR_PTR(err); |
1039 | return do_file_open_root(dentry, mnt, filename, &op); |
1040 | } |
1041 | EXPORT_SYMBOL(file_open_root); |
1042 | |
1043 | struct file *filp_clone_open(struct file *oldfile) |
1044 | { |
1045 | struct file *file; |
1046 | int retval; |
1047 | |
1048 | file = get_empty_filp(); |
1049 | if (IS_ERR(file)) |
1050 | return file; |
1051 | |
1052 | file->f_flags = oldfile->f_flags; |
1053 | retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred); |
1054 | if (retval) { |
1055 | put_filp(file); |
1056 | return ERR_PTR(retval); |
1057 | } |
1058 | |
1059 | return file; |
1060 | } |
1061 | EXPORT_SYMBOL(filp_clone_open); |
1062 | |
1063 | long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode) |
1064 | { |
1065 | struct open_flags op; |
1066 | int fd = build_open_flags(flags, mode, &op); |
1067 | struct filename *tmp; |
1068 | |
1069 | if (fd) |
1070 | return fd; |
1071 | |
1072 | tmp = getname(filename); |
1073 | if (IS_ERR(tmp)) |
1074 | return PTR_ERR(tmp); |
1075 | |
1076 | fd = get_unused_fd_flags(flags); |
1077 | if (fd >= 0) { |
1078 | struct file *f = do_filp_open(dfd, tmp, &op); |
1079 | if (IS_ERR(f)) { |
1080 | put_unused_fd(fd); |
1081 | fd = PTR_ERR(f); |
1082 | } else { |
1083 | fsnotify_open(f); |
1084 | fd_install(fd, f); |
1085 | } |
1086 | } |
1087 | putname(tmp); |
1088 | return fd; |
1089 | } |
1090 | |
1091 | SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode) |
1092 | { |
1093 | if (force_o_largefile()) |
1094 | flags |= O_LARGEFILE; |
1095 | |
1096 | return do_sys_open(AT_FDCWD, filename, flags, mode); |
1097 | } |
1098 | |
1099 | SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, |
1100 | umode_t, mode) |
1101 | { |
1102 | if (force_o_largefile()) |
1103 | flags |= O_LARGEFILE; |
1104 | |
1105 | return do_sys_open(dfd, filename, flags, mode); |
1106 | } |
1107 | |
1108 | #ifndef __alpha__ |
1109 | |
1110 | /* |
1111 | * For backward compatibility? Maybe this should be moved |
1112 | * into arch/i386 instead? |
1113 | */ |
1114 | SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode) |
1115 | { |
1116 | return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); |
1117 | } |
1118 | |
1119 | #endif |
1120 | |
1121 | /* |
1122 | * "id" is the POSIX thread ID. We use the |
1123 | * files pointer for this.. |
1124 | */ |
1125 | int filp_close(struct file *filp, fl_owner_t id) |
1126 | { |
1127 | int retval = 0; |
1128 | |
1129 | if (!file_count(filp)) { |
1130 | printk(KERN_ERR "VFS: Close: file count is 0\n"); |
1131 | return 0; |
1132 | } |
1133 | |
1134 | if (filp->f_op->flush) |
1135 | retval = filp->f_op->flush(filp, id); |
1136 | |
1137 | if (likely(!(filp->f_mode & FMODE_PATH))) { |
1138 | dnotify_flush(filp, id); |
1139 | locks_remove_posix(filp, id); |
1140 | } |
1141 | fput(filp); |
1142 | return retval; |
1143 | } |
1144 | |
1145 | EXPORT_SYMBOL(filp_close); |
1146 | |
1147 | /* |
1148 | * Careful here! We test whether the file pointer is NULL before |
1149 | * releasing the fd. This ensures that one clone task can't release |
1150 | * an fd while another clone is opening it. |
1151 | */ |
1152 | SYSCALL_DEFINE1(close, unsigned int, fd) |
1153 | { |
1154 | int retval = __close_fd(current->files, fd); |
1155 | |
1156 | /* can't restart close syscall because file table entry was cleared */ |
1157 | if (unlikely(retval == -ERESTARTSYS || |
1158 | retval == -ERESTARTNOINTR || |
1159 | retval == -ERESTARTNOHAND || |
1160 | retval == -ERESTART_RESTARTBLOCK)) |
1161 | retval = -EINTR; |
1162 | |
1163 | return retval; |
1164 | } |
1165 | EXPORT_SYMBOL(sys_close); |
1166 | |
1167 | /* |
1168 | * This routine simulates a hangup on the tty, to arrange that users |
1169 | * are given clean terminals at login time. |
1170 | */ |
1171 | SYSCALL_DEFINE0(vhangup) |
1172 | { |
1173 | if (capable(CAP_SYS_TTY_CONFIG)) { |
1174 | tty_vhangup_self(); |
1175 | return 0; |
1176 | } |
1177 | return -EPERM; |
1178 | } |
1179 | |
1180 | /* |
1181 | * Called when an inode is about to be open. |
1182 | * We use this to disallow opening large files on 32bit systems if |
1183 | * the caller didn't specify O_LARGEFILE. On 64bit systems we force |
1184 | * on this flag in sys_open. |
1185 | */ |
1186 | int generic_file_open(struct inode * inode, struct file * filp) |
1187 | { |
1188 | if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
1189 | return -EOVERFLOW; |
1190 | return 0; |
1191 | } |
1192 | |
1193 | EXPORT_SYMBOL(generic_file_open); |
1194 | |
1195 | /* |
1196 | * This is used by subsystems that don't want seekable |
1197 | * file descriptors. The function is not supposed to ever fail, the only |
1198 | * reason it returns an 'int' and not 'void' is so that it can be plugged |
1199 | * directly into file_operations structure. |
1200 | */ |
1201 | int nonseekable_open(struct inode *inode, struct file *filp) |
1202 | { |
1203 | filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); |
1204 | return 0; |
1205 | } |
1206 | |
1207 | EXPORT_SYMBOL(nonseekable_open); |
1208 |