summaryrefslogtreecommitdiff
path: root/security/commoncap.c (plain)
blob: 1076608ff38f8383a063b32618ce6ca40b4a951b
1/* Common capabilities, needed by capability.o.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 */
9
10#include <linux/capability.h>
11#include <linux/audit.h>
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/kernel.h>
15#include <linux/lsm_hooks.h>
16#include <linux/file.h>
17#include <linux/mm.h>
18#include <linux/mman.h>
19#include <linux/pagemap.h>
20#include <linux/swap.h>
21#include <linux/skbuff.h>
22#include <linux/netlink.h>
23#include <linux/ptrace.h>
24#include <linux/xattr.h>
25#include <linux/hugetlb.h>
26#include <linux/mount.h>
27#include <linux/sched.h>
28#include <linux/prctl.h>
29#include <linux/securebits.h>
30#include <linux/user_namespace.h>
31#include <linux/binfmts.h>
32#include <linux/personality.h>
33
34#ifdef CONFIG_ANDROID_PARANOID_NETWORK
35#include <linux/android_aid.h>
36#endif
37
38/*
39 * If a non-root user executes a setuid-root binary in
40 * !secure(SECURE_NOROOT) mode, then we raise capabilities.
41 * However if fE is also set, then the intent is for only
42 * the file capabilities to be applied, and the setuid-root
43 * bit is left on either to change the uid (plausible) or
44 * to get full privilege on a kernel without file capabilities
45 * support. So in that case we do not raise capabilities.
46 *
47 * Warn if that happens, once per boot.
48 */
49static void warn_setuid_and_fcaps_mixed(const char *fname)
50{
51 static int warned;
52 if (!warned) {
53 printk(KERN_INFO "warning: `%s' has both setuid-root and"
54 " effective capabilities. Therefore not raising all"
55 " capabilities.\n", fname);
56 warned = 1;
57 }
58}
59
60/**
61 * __cap_capable - Determine whether a task has a particular effective capability
62 * @cred: The credentials to use
63 * @ns: The user namespace in which we need the capability
64 * @cap: The capability to check for
65 * @audit: Whether to write an audit message or not
66 *
67 * Determine whether the nominated task has the specified capability amongst
68 * its effective set, returning 0 if it does, -ve if it does not.
69 *
70 * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
71 * and has_capability() functions. That is, it has the reverse semantics:
72 * cap_has_capability() returns 0 when a task has a capability, but the
73 * kernel's capable() and has_capability() returns 1 for this case.
74 */
75int __cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
76 int cap, int audit)
77{
78 struct user_namespace *ns = targ_ns;
79
80 /* See if cred has the capability in the target user namespace
81 * by examining the target user namespace and all of the target
82 * user namespace's parents.
83 */
84 for (;;) {
85 /* Do we have the necessary capabilities? */
86 if (ns == cred->user_ns)
87 return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
88
89 /* Have we tried all of the parent namespaces? */
90 if (ns == &init_user_ns)
91 return -EPERM;
92
93 /*
94 * The owner of the user namespace in the parent of the
95 * user namespace has all caps.
96 */
97 if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
98 return 0;
99
100 /*
101 * If you have a capability in a parent user ns, then you have
102 * it over all children user namespaces as well.
103 */
104 ns = ns->parent;
105 }
106
107 /* We never get here */
108}
109
110int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
111 int cap, int audit)
112{
113 int ret = __cap_capable(cred, targ_ns, cap, audit);
114
115#ifdef CONFIG_ANDROID_PARANOID_NETWORK
116 if (ret != 0 && cap == CAP_NET_RAW && in_egroup_p(AID_NET_RAW)) {
117 printk("Process %s granted CAP_NET_RAW from Android group net_raw.\n", current->comm);
118 printk(" Please update the .rc file to explictly set 'capabilities NET_RAW'\n");
119 printk(" Implicit grants are deprecated and will be removed in the future.\n");
120 return 0;
121 }
122 if (ret != 0 && cap == CAP_NET_ADMIN && in_egroup_p(AID_NET_ADMIN)) {
123 printk("Process %s granted CAP_NET_ADMIN from Android group net_admin.\n", current->comm);
124 printk(" Please update the .rc file to explictly set 'capabilities NET_ADMIN'\n");
125 printk(" Implicit grants are deprecated and will be removed in the future.\n");
126 return 0;
127 }
128#endif
129 return ret;
130}
131/**
132 * cap_settime - Determine whether the current process may set the system clock
133 * @ts: The time to set
134 * @tz: The timezone to set
135 *
136 * Determine whether the current process may set the system clock and timezone
137 * information, returning 0 if permission granted, -ve if denied.
138 */
139int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
140{
141 if (!capable(CAP_SYS_TIME))
142 return -EPERM;
143 return 0;
144}
145
146/**
147 * cap_ptrace_access_check - Determine whether the current process may access
148 * another
149 * @child: The process to be accessed
150 * @mode: The mode of attachment.
151 *
152 * If we are in the same or an ancestor user_ns and have all the target
153 * task's capabilities, then ptrace access is allowed.
154 * If we have the ptrace capability to the target user_ns, then ptrace
155 * access is allowed.
156 * Else denied.
157 *
158 * Determine whether a process may access another, returning 0 if permission
159 * granted, -ve if denied.
160 */
161int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
162{
163 int ret = 0;
164 const struct cred *cred, *child_cred;
165 const kernel_cap_t *caller_caps;
166
167 rcu_read_lock();
168 cred = current_cred();
169 child_cred = __task_cred(child);
170 if (mode & PTRACE_MODE_FSCREDS)
171 caller_caps = &cred->cap_effective;
172 else
173 caller_caps = &cred->cap_permitted;
174 if (cred->user_ns == child_cred->user_ns &&
175 cap_issubset(child_cred->cap_permitted, *caller_caps))
176 goto out;
177 if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
178 goto out;
179 ret = -EPERM;
180out:
181 rcu_read_unlock();
182 return ret;
183}
184
185/**
186 * cap_ptrace_traceme - Determine whether another process may trace the current
187 * @parent: The task proposed to be the tracer
188 *
189 * If parent is in the same or an ancestor user_ns and has all current's
190 * capabilities, then ptrace access is allowed.
191 * If parent has the ptrace capability to current's user_ns, then ptrace
192 * access is allowed.
193 * Else denied.
194 *
195 * Determine whether the nominated task is permitted to trace the current
196 * process, returning 0 if permission is granted, -ve if denied.
197 */
198int cap_ptrace_traceme(struct task_struct *parent)
199{
200 int ret = 0;
201 const struct cred *cred, *child_cred;
202
203 rcu_read_lock();
204 cred = __task_cred(parent);
205 child_cred = current_cred();
206 if (cred->user_ns == child_cred->user_ns &&
207 cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
208 goto out;
209 if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
210 goto out;
211 ret = -EPERM;
212out:
213 rcu_read_unlock();
214 return ret;
215}
216
217/**
218 * cap_capget - Retrieve a task's capability sets
219 * @target: The task from which to retrieve the capability sets
220 * @effective: The place to record the effective set
221 * @inheritable: The place to record the inheritable set
222 * @permitted: The place to record the permitted set
223 *
224 * This function retrieves the capabilities of the nominated task and returns
225 * them to the caller.
226 */
227int cap_capget(struct task_struct *target, kernel_cap_t *effective,
228 kernel_cap_t *inheritable, kernel_cap_t *permitted)
229{
230 const struct cred *cred;
231
232 /* Derived from kernel/capability.c:sys_capget. */
233 rcu_read_lock();
234 cred = __task_cred(target);
235 *effective = cred->cap_effective;
236 *inheritable = cred->cap_inheritable;
237 *permitted = cred->cap_permitted;
238 rcu_read_unlock();
239 return 0;
240}
241
242/*
243 * Determine whether the inheritable capabilities are limited to the old
244 * permitted set. Returns 1 if they are limited, 0 if they are not.
245 */
246static inline int cap_inh_is_capped(void)
247{
248
249 /* they are so limited unless the current task has the CAP_SETPCAP
250 * capability
251 */
252 if (cap_capable(current_cred(), current_cred()->user_ns,
253 CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
254 return 0;
255 return 1;
256}
257
258/**
259 * cap_capset - Validate and apply proposed changes to current's capabilities
260 * @new: The proposed new credentials; alterations should be made here
261 * @old: The current task's current credentials
262 * @effective: A pointer to the proposed new effective capabilities set
263 * @inheritable: A pointer to the proposed new inheritable capabilities set
264 * @permitted: A pointer to the proposed new permitted capabilities set
265 *
266 * This function validates and applies a proposed mass change to the current
267 * process's capability sets. The changes are made to the proposed new
268 * credentials, and assuming no error, will be committed by the caller of LSM.
269 */
270int cap_capset(struct cred *new,
271 const struct cred *old,
272 const kernel_cap_t *effective,
273 const kernel_cap_t *inheritable,
274 const kernel_cap_t *permitted)
275{
276 if (cap_inh_is_capped() &&
277 !cap_issubset(*inheritable,
278 cap_combine(old->cap_inheritable,
279 old->cap_permitted)))
280 /* incapable of using this inheritable set */
281 return -EPERM;
282
283 if (!cap_issubset(*inheritable,
284 cap_combine(old->cap_inheritable,
285 old->cap_bset)))
286 /* no new pI capabilities outside bounding set */
287 return -EPERM;
288
289 /* verify restrictions on target's new Permitted set */
290 if (!cap_issubset(*permitted, old->cap_permitted))
291 return -EPERM;
292
293 /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
294 if (!cap_issubset(*effective, *permitted))
295 return -EPERM;
296
297 new->cap_effective = *effective;
298 new->cap_inheritable = *inheritable;
299 new->cap_permitted = *permitted;
300
301 /*
302 * Mask off ambient bits that are no longer both permitted and
303 * inheritable.
304 */
305 new->cap_ambient = cap_intersect(new->cap_ambient,
306 cap_intersect(*permitted,
307 *inheritable));
308 if (WARN_ON(!cap_ambient_invariant_ok(new)))
309 return -EINVAL;
310 return 0;
311}
312
313/*
314 * Clear proposed capability sets for execve().
315 */
316static inline void bprm_clear_caps(struct linux_binprm *bprm)
317{
318 cap_clear(bprm->cred->cap_permitted);
319 bprm->cap_effective = false;
320}
321
322/**
323 * cap_inode_need_killpriv - Determine if inode change affects privileges
324 * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
325 *
326 * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
327 * affects the security markings on that inode, and if it is, should
328 * inode_killpriv() be invoked or the change rejected?
329 *
330 * Returns 0 if granted; +ve if granted, but inode_killpriv() is required; and
331 * -ve to deny the change.
332 */
333int cap_inode_need_killpriv(struct dentry *dentry)
334{
335 struct inode *inode = d_backing_inode(dentry);
336 int error;
337
338 error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
339 return error > 0;
340}
341
342/**
343 * cap_inode_killpriv - Erase the security markings on an inode
344 * @dentry: The inode/dentry to alter
345 *
346 * Erase the privilege-enhancing security markings on an inode.
347 *
348 * Returns 0 if successful, -ve on error.
349 */
350int cap_inode_killpriv(struct dentry *dentry)
351{
352 int error;
353
354 error = __vfs_removexattr(dentry, XATTR_NAME_CAPS);
355 if (error == -EOPNOTSUPP)
356 error = 0;
357 return error;
358}
359
360/*
361 * Calculate the new process capability sets from the capability sets attached
362 * to a file.
363 */
364static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
365 struct linux_binprm *bprm,
366 bool *effective,
367 bool *has_cap)
368{
369 struct cred *new = bprm->cred;
370 unsigned i;
371 int ret = 0;
372
373 if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
374 *effective = true;
375
376 if (caps->magic_etc & VFS_CAP_REVISION_MASK)
377 *has_cap = true;
378
379 CAP_FOR_EACH_U32(i) {
380 __u32 permitted = caps->permitted.cap[i];
381 __u32 inheritable = caps->inheritable.cap[i];
382
383 /*
384 * pP' = (X & fP) | (pI & fI)
385 * The addition of pA' is handled later.
386 */
387 new->cap_permitted.cap[i] =
388 (new->cap_bset.cap[i] & permitted) |
389 (new->cap_inheritable.cap[i] & inheritable);
390
391 if (permitted & ~new->cap_permitted.cap[i])
392 /* insufficient to execute correctly */
393 ret = -EPERM;
394 }
395
396 /*
397 * For legacy apps, with no internal support for recognizing they
398 * do not have enough capabilities, we return an error if they are
399 * missing some "forced" (aka file-permitted) capabilities.
400 */
401 return *effective ? ret : 0;
402}
403
404/*
405 * Extract the on-exec-apply capability sets for an executable file.
406 */
407int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
408{
409 struct inode *inode = d_backing_inode(dentry);
410 __u32 magic_etc;
411 unsigned tocopy, i;
412 int size;
413 struct vfs_cap_data caps;
414
415 memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
416
417 if (!inode)
418 return -ENODATA;
419
420 size = __vfs_getxattr((struct dentry *)dentry, inode,
421 XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ);
422 if (size == -ENODATA || size == -EOPNOTSUPP)
423 /* no data, that's ok */
424 return -ENODATA;
425 if (size < 0)
426 return size;
427
428 if (size < sizeof(magic_etc))
429 return -EINVAL;
430
431 cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps.magic_etc);
432
433 switch (magic_etc & VFS_CAP_REVISION_MASK) {
434 case VFS_CAP_REVISION_1:
435 if (size != XATTR_CAPS_SZ_1)
436 return -EINVAL;
437 tocopy = VFS_CAP_U32_1;
438 break;
439 case VFS_CAP_REVISION_2:
440 if (size != XATTR_CAPS_SZ_2)
441 return -EINVAL;
442 tocopy = VFS_CAP_U32_2;
443 break;
444 default:
445 return -EINVAL;
446 }
447
448 CAP_FOR_EACH_U32(i) {
449 if (i >= tocopy)
450 break;
451 cpu_caps->permitted.cap[i] = le32_to_cpu(caps.data[i].permitted);
452 cpu_caps->inheritable.cap[i] = le32_to_cpu(caps.data[i].inheritable);
453 }
454
455 cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
456 cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK;
457
458 return 0;
459}
460
461/*
462 * Attempt to get the on-exec apply capability sets for an executable file from
463 * its xattrs and, if present, apply them to the proposed credentials being
464 * constructed by execve().
465 */
466static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_cap)
467{
468 int rc = 0;
469 struct cpu_vfs_cap_data vcaps;
470
471 bprm_clear_caps(bprm);
472
473 if (!file_caps_enabled)
474 return 0;
475
476 if (!mnt_may_suid(bprm->file->f_path.mnt))
477 return 0;
478
479 /*
480 * This check is redundant with mnt_may_suid() but is kept to make
481 * explicit that capability bits are limited to s_user_ns and its
482 * descendants.
483 */
484 if (!current_in_userns(bprm->file->f_path.mnt->mnt_sb->s_user_ns))
485 return 0;
486
487 rc = get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
488 if (rc < 0) {
489 if (rc == -EINVAL)
490 printk(KERN_NOTICE "%s: get_vfs_caps_from_disk returned %d for %s\n",
491 __func__, rc, bprm->filename);
492 else if (rc == -ENODATA)
493 rc = 0;
494 goto out;
495 }
496
497 rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_cap);
498 if (rc == -EINVAL)
499 printk(KERN_NOTICE "%s: cap_from_disk returned %d for %s\n",
500 __func__, rc, bprm->filename);
501
502out:
503 if (rc)
504 bprm_clear_caps(bprm);
505
506 return rc;
507}
508
509/**
510 * cap_bprm_set_creds - Set up the proposed credentials for execve().
511 * @bprm: The execution parameters, including the proposed creds
512 *
513 * Set up the proposed credentials for a new execution context being
514 * constructed by execve(). The proposed creds in @bprm->cred is altered,
515 * which won't take effect immediately. Returns 0 if successful, -ve on error.
516 */
517int cap_bprm_set_creds(struct linux_binprm *bprm)
518{
519 const struct cred *old = current_cred();
520 struct cred *new = bprm->cred;
521 bool effective, has_cap = false, is_setid;
522 int ret;
523 kuid_t root_uid;
524
525 if (WARN_ON(!cap_ambient_invariant_ok(old)))
526 return -EPERM;
527
528 effective = false;
529 ret = get_file_caps(bprm, &effective, &has_cap);
530 if (ret < 0)
531 return ret;
532
533 root_uid = make_kuid(new->user_ns, 0);
534
535 if (!issecure(SECURE_NOROOT)) {
536 /*
537 * If the legacy file capability is set, then don't set privs
538 * for a setuid root binary run by a non-root user. Do set it
539 * for a root user just to cause least surprise to an admin.
540 */
541 if (has_cap && !uid_eq(new->uid, root_uid) && uid_eq(new->euid, root_uid)) {
542 warn_setuid_and_fcaps_mixed(bprm->filename);
543 goto skip;
544 }
545 /*
546 * To support inheritance of root-permissions and suid-root
547 * executables under compatibility mode, we override the
548 * capability sets for the file.
549 *
550 * If only the real uid is 0, we do not set the effective bit.
551 */
552 if (uid_eq(new->euid, root_uid) || uid_eq(new->uid, root_uid)) {
553 /* pP' = (cap_bset & ~0) | (pI & ~0) */
554 new->cap_permitted = cap_combine(old->cap_bset,
555 old->cap_inheritable);
556 }
557 if (uid_eq(new->euid, root_uid))
558 effective = true;
559 }
560skip:
561
562 /* if we have fs caps, clear dangerous personality flags */
563 if (!cap_issubset(new->cap_permitted, old->cap_permitted))
564 bprm->per_clear |= PER_CLEAR_ON_SETID;
565
566
567 /* Don't let someone trace a set[ug]id/setpcap binary with the revised
568 * credentials unless they have the appropriate permit.
569 *
570 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
571 */
572 is_setid = !uid_eq(new->euid, old->uid) || !gid_eq(new->egid, old->gid);
573
574 if ((is_setid ||
575 !cap_issubset(new->cap_permitted, old->cap_permitted)) &&
576 bprm->unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
577 /* downgrade; they get no more than they had, and maybe less */
578 if (!capable(CAP_SETUID) ||
579 (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
580 new->euid = new->uid;
581 new->egid = new->gid;
582 }
583 new->cap_permitted = cap_intersect(new->cap_permitted,
584 old->cap_permitted);
585 }
586
587 new->suid = new->fsuid = new->euid;
588 new->sgid = new->fsgid = new->egid;
589
590 /* File caps or setid cancels ambient. */
591 if (has_cap || is_setid)
592 cap_clear(new->cap_ambient);
593
594 /*
595 * Now that we've computed pA', update pP' to give:
596 * pP' = (X & fP) | (pI & fI) | pA'
597 */
598 new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
599
600 /*
601 * Set pE' = (fE ? pP' : pA'). Because pA' is zero if fE is set,
602 * this is the same as pE' = (fE ? pP' : 0) | pA'.
603 */
604 if (effective)
605 new->cap_effective = new->cap_permitted;
606 else
607 new->cap_effective = new->cap_ambient;
608
609 if (WARN_ON(!cap_ambient_invariant_ok(new)))
610 return -EPERM;
611
612 bprm->cap_effective = effective;
613
614 /*
615 * Audit candidate if current->cap_effective is set
616 *
617 * We do not bother to audit if 3 things are true:
618 * 1) cap_effective has all caps
619 * 2) we are root
620 * 3) root is supposed to have all caps (SECURE_NOROOT)
621 * Since this is just a normal root execing a process.
622 *
623 * Number 1 above might fail if you don't have a full bset, but I think
624 * that is interesting information to audit.
625 */
626 if (!cap_issubset(new->cap_effective, new->cap_ambient)) {
627 if (!cap_issubset(CAP_FULL_SET, new->cap_effective) ||
628 !uid_eq(new->euid, root_uid) || !uid_eq(new->uid, root_uid) ||
629 issecure(SECURE_NOROOT)) {
630 ret = audit_log_bprm_fcaps(bprm, new, old);
631 if (ret < 0)
632 return ret;
633 }
634 }
635
636 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
637
638 if (WARN_ON(!cap_ambient_invariant_ok(new)))
639 return -EPERM;
640
641 return 0;
642}
643
644/**
645 * cap_bprm_secureexec - Determine whether a secure execution is required
646 * @bprm: The execution parameters
647 *
648 * Determine whether a secure execution is required, return 1 if it is, and 0
649 * if it is not.
650 *
651 * The credentials have been committed by this point, and so are no longer
652 * available through @bprm->cred.
653 */
654int cap_bprm_secureexec(struct linux_binprm *bprm)
655{
656 const struct cred *cred = current_cred();
657 kuid_t root_uid = make_kuid(cred->user_ns, 0);
658
659 if (!uid_eq(cred->uid, root_uid)) {
660 if (bprm->cap_effective)
661 return 1;
662 if (!cap_issubset(cred->cap_permitted, cred->cap_ambient))
663 return 1;
664 }
665
666 return (!uid_eq(cred->euid, cred->uid) ||
667 !gid_eq(cred->egid, cred->gid));
668}
669
670/**
671 * cap_inode_setxattr - Determine whether an xattr may be altered
672 * @dentry: The inode/dentry being altered
673 * @name: The name of the xattr to be changed
674 * @value: The value that the xattr will be changed to
675 * @size: The size of value
676 * @flags: The replacement flag
677 *
678 * Determine whether an xattr may be altered or set on an inode, returning 0 if
679 * permission is granted, -ve if denied.
680 *
681 * This is used to make sure security xattrs don't get updated or set by those
682 * who aren't privileged to do so.
683 */
684int cap_inode_setxattr(struct dentry *dentry, const char *name,
685 const void *value, size_t size, int flags)
686{
687 if (!strcmp(name, XATTR_NAME_CAPS)) {
688 if (!capable(CAP_SETFCAP))
689 return -EPERM;
690 return 0;
691 }
692
693 if (!strncmp(name, XATTR_SECURITY_PREFIX,
694 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
695 !capable(CAP_SYS_ADMIN))
696 return -EPERM;
697 return 0;
698}
699
700/**
701 * cap_inode_removexattr - Determine whether an xattr may be removed
702 * @dentry: The inode/dentry being altered
703 * @name: The name of the xattr to be changed
704 *
705 * Determine whether an xattr may be removed from an inode, returning 0 if
706 * permission is granted, -ve if denied.
707 *
708 * This is used to make sure security xattrs don't get removed by those who
709 * aren't privileged to remove them.
710 */
711int cap_inode_removexattr(struct dentry *dentry, const char *name)
712{
713 if (!strcmp(name, XATTR_NAME_CAPS)) {
714 if (!capable(CAP_SETFCAP))
715 return -EPERM;
716 return 0;
717 }
718
719 if (!strncmp(name, XATTR_SECURITY_PREFIX,
720 sizeof(XATTR_SECURITY_PREFIX) - 1) &&
721 !capable(CAP_SYS_ADMIN))
722 return -EPERM;
723 return 0;
724}
725
726/*
727 * cap_emulate_setxuid() fixes the effective / permitted capabilities of
728 * a process after a call to setuid, setreuid, or setresuid.
729 *
730 * 1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
731 * {r,e,s}uid != 0, the permitted and effective capabilities are
732 * cleared.
733 *
734 * 2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
735 * capabilities of the process are cleared.
736 *
737 * 3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
738 * capabilities are set to the permitted capabilities.
739 *
740 * fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
741 * never happen.
742 *
743 * -astor
744 *
745 * cevans - New behaviour, Oct '99
746 * A process may, via prctl(), elect to keep its capabilities when it
747 * calls setuid() and switches away from uid==0. Both permitted and
748 * effective sets will be retained.
749 * Without this change, it was impossible for a daemon to drop only some
750 * of its privilege. The call to setuid(!=0) would drop all privileges!
751 * Keeping uid 0 is not an option because uid 0 owns too many vital
752 * files..
753 * Thanks to Olaf Kirch and Peter Benie for spotting this.
754 */
755static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
756{
757 kuid_t root_uid = make_kuid(old->user_ns, 0);
758
759 if ((uid_eq(old->uid, root_uid) ||
760 uid_eq(old->euid, root_uid) ||
761 uid_eq(old->suid, root_uid)) &&
762 (!uid_eq(new->uid, root_uid) &&
763 !uid_eq(new->euid, root_uid) &&
764 !uid_eq(new->suid, root_uid))) {
765 if (!issecure(SECURE_KEEP_CAPS)) {
766 cap_clear(new->cap_permitted);
767 cap_clear(new->cap_effective);
768 }
769
770 /*
771 * Pre-ambient programs expect setresuid to nonroot followed
772 * by exec to drop capabilities. We should make sure that
773 * this remains the case.
774 */
775 cap_clear(new->cap_ambient);
776 }
777 if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
778 cap_clear(new->cap_effective);
779 if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
780 new->cap_effective = new->cap_permitted;
781}
782
783/**
784 * cap_task_fix_setuid - Fix up the results of setuid() call
785 * @new: The proposed credentials
786 * @old: The current task's current credentials
787 * @flags: Indications of what has changed
788 *
789 * Fix up the results of setuid() call before the credential changes are
790 * actually applied, returning 0 to grant the changes, -ve to deny them.
791 */
792int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
793{
794 switch (flags) {
795 case LSM_SETID_RE:
796 case LSM_SETID_ID:
797 case LSM_SETID_RES:
798 /* juggle the capabilities to follow [RES]UID changes unless
799 * otherwise suppressed */
800 if (!issecure(SECURE_NO_SETUID_FIXUP))
801 cap_emulate_setxuid(new, old);
802 break;
803
804 case LSM_SETID_FS:
805 /* juggle the capabilties to follow FSUID changes, unless
806 * otherwise suppressed
807 *
808 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
809 * if not, we might be a bit too harsh here.
810 */
811 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
812 kuid_t root_uid = make_kuid(old->user_ns, 0);
813 if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
814 new->cap_effective =
815 cap_drop_fs_set(new->cap_effective);
816
817 if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
818 new->cap_effective =
819 cap_raise_fs_set(new->cap_effective,
820 new->cap_permitted);
821 }
822 break;
823
824 default:
825 return -EINVAL;
826 }
827
828 return 0;
829}
830
831/*
832 * Rationale: code calling task_setscheduler, task_setioprio, and
833 * task_setnice, assumes that
834 * . if capable(cap_sys_nice), then those actions should be allowed
835 * . if not capable(cap_sys_nice), but acting on your own processes,
836 * then those actions should be allowed
837 * This is insufficient now since you can call code without suid, but
838 * yet with increased caps.
839 * So we check for increased caps on the target process.
840 */
841static int cap_safe_nice(struct task_struct *p)
842{
843 int is_subset, ret = 0;
844
845 rcu_read_lock();
846 is_subset = cap_issubset(__task_cred(p)->cap_permitted,
847 current_cred()->cap_permitted);
848 if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
849 ret = -EPERM;
850 rcu_read_unlock();
851
852 return ret;
853}
854
855/**
856 * cap_task_setscheduler - Detemine if scheduler policy change is permitted
857 * @p: The task to affect
858 *
859 * Detemine if the requested scheduler policy change is permitted for the
860 * specified task, returning 0 if permission is granted, -ve if denied.
861 */
862int cap_task_setscheduler(struct task_struct *p)
863{
864 return cap_safe_nice(p);
865}
866
867/**
868 * cap_task_ioprio - Detemine if I/O priority change is permitted
869 * @p: The task to affect
870 * @ioprio: The I/O priority to set
871 *
872 * Detemine if the requested I/O priority change is permitted for the specified
873 * task, returning 0 if permission is granted, -ve if denied.
874 */
875int cap_task_setioprio(struct task_struct *p, int ioprio)
876{
877 return cap_safe_nice(p);
878}
879
880/**
881 * cap_task_ioprio - Detemine if task priority change is permitted
882 * @p: The task to affect
883 * @nice: The nice value to set
884 *
885 * Detemine if the requested task priority change is permitted for the
886 * specified task, returning 0 if permission is granted, -ve if denied.
887 */
888int cap_task_setnice(struct task_struct *p, int nice)
889{
890 return cap_safe_nice(p);
891}
892
893/*
894 * Implement PR_CAPBSET_DROP. Attempt to remove the specified capability from
895 * the current task's bounding set. Returns 0 on success, -ve on error.
896 */
897static int cap_prctl_drop(unsigned long cap)
898{
899 struct cred *new;
900
901 if (!ns_capable(current_user_ns(), CAP_SETPCAP))
902 return -EPERM;
903 if (!cap_valid(cap))
904 return -EINVAL;
905
906 new = prepare_creds();
907 if (!new)
908 return -ENOMEM;
909 cap_lower(new->cap_bset, cap);
910 return commit_creds(new);
911}
912
913/**
914 * cap_task_prctl - Implement process control functions for this security module
915 * @option: The process control function requested
916 * @arg2, @arg3, @arg4, @arg5: The argument data for this function
917 *
918 * Allow process control functions (sys_prctl()) to alter capabilities; may
919 * also deny access to other functions not otherwise implemented here.
920 *
921 * Returns 0 or +ve on success, -ENOSYS if this function is not implemented
922 * here, other -ve on error. If -ENOSYS is returned, sys_prctl() and other LSM
923 * modules will consider performing the function.
924 */
925int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
926 unsigned long arg4, unsigned long arg5)
927{
928 const struct cred *old = current_cred();
929 struct cred *new;
930
931 switch (option) {
932 case PR_CAPBSET_READ:
933 if (!cap_valid(arg2))
934 return -EINVAL;
935 return !!cap_raised(old->cap_bset, arg2);
936
937 case PR_CAPBSET_DROP:
938 return cap_prctl_drop(arg2);
939
940 /*
941 * The next four prctl's remain to assist with transitioning a
942 * system from legacy UID=0 based privilege (when filesystem
943 * capabilities are not in use) to a system using filesystem
944 * capabilities only - as the POSIX.1e draft intended.
945 *
946 * Note:
947 *
948 * PR_SET_SECUREBITS =
949 * issecure_mask(SECURE_KEEP_CAPS_LOCKED)
950 * | issecure_mask(SECURE_NOROOT)
951 * | issecure_mask(SECURE_NOROOT_LOCKED)
952 * | issecure_mask(SECURE_NO_SETUID_FIXUP)
953 * | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
954 *
955 * will ensure that the current process and all of its
956 * children will be locked into a pure
957 * capability-based-privilege environment.
958 */
959 case PR_SET_SECUREBITS:
960 if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
961 & (old->securebits ^ arg2)) /*[1]*/
962 || ((old->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/
963 || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/
964 || (cap_capable(current_cred(),
965 current_cred()->user_ns, CAP_SETPCAP,
966 SECURITY_CAP_AUDIT) != 0) /*[4]*/
967 /*
968 * [1] no changing of bits that are locked
969 * [2] no unlocking of locks
970 * [3] no setting of unsupported bits
971 * [4] doing anything requires privilege (go read about
972 * the "sendmail capabilities bug")
973 */
974 )
975 /* cannot change a locked bit */
976 return -EPERM;
977
978 new = prepare_creds();
979 if (!new)
980 return -ENOMEM;
981 new->securebits = arg2;
982 return commit_creds(new);
983
984 case PR_GET_SECUREBITS:
985 return old->securebits;
986
987 case PR_GET_KEEPCAPS:
988 return !!issecure(SECURE_KEEP_CAPS);
989
990 case PR_SET_KEEPCAPS:
991 if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
992 return -EINVAL;
993 if (issecure(SECURE_KEEP_CAPS_LOCKED))
994 return -EPERM;
995
996 new = prepare_creds();
997 if (!new)
998 return -ENOMEM;
999 if (arg2)
1000 new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
1001 else
1002 new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
1003 return commit_creds(new);
1004
1005 case PR_CAP_AMBIENT:
1006 if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
1007 if (arg3 | arg4 | arg5)
1008 return -EINVAL;
1009
1010 new = prepare_creds();
1011 if (!new)
1012 return -ENOMEM;
1013 cap_clear(new->cap_ambient);
1014 return commit_creds(new);
1015 }
1016
1017 if (((!cap_valid(arg3)) | arg4 | arg5))
1018 return -EINVAL;
1019
1020 if (arg2 == PR_CAP_AMBIENT_IS_SET) {
1021 return !!cap_raised(current_cred()->cap_ambient, arg3);
1022 } else if (arg2 != PR_CAP_AMBIENT_RAISE &&
1023 arg2 != PR_CAP_AMBIENT_LOWER) {
1024 return -EINVAL;
1025 } else {
1026 if (arg2 == PR_CAP_AMBIENT_RAISE &&
1027 (!cap_raised(current_cred()->cap_permitted, arg3) ||
1028 !cap_raised(current_cred()->cap_inheritable,
1029 arg3) ||
1030 issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
1031 return -EPERM;
1032
1033 new = prepare_creds();
1034 if (!new)
1035 return -ENOMEM;
1036 if (arg2 == PR_CAP_AMBIENT_RAISE)
1037 cap_raise(new->cap_ambient, arg3);
1038 else
1039 cap_lower(new->cap_ambient, arg3);
1040 return commit_creds(new);
1041 }
1042
1043 default:
1044 /* No functionality available - continue with default */
1045 return -ENOSYS;
1046 }
1047}
1048
1049/**
1050 * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
1051 * @mm: The VM space in which the new mapping is to be made
1052 * @pages: The size of the mapping
1053 *
1054 * Determine whether the allocation of a new virtual mapping by the current
1055 * task is permitted, returning 1 if permission is granted, 0 if not.
1056 */
1057int cap_vm_enough_memory(struct mm_struct *mm, long pages)
1058{
1059 int cap_sys_admin = 0;
1060
1061 if (cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
1062 SECURITY_CAP_NOAUDIT) == 0)
1063 cap_sys_admin = 1;
1064 return cap_sys_admin;
1065}
1066
1067/*
1068 * cap_mmap_addr - check if able to map given addr
1069 * @addr: address attempting to be mapped
1070 *
1071 * If the process is attempting to map memory below dac_mmap_min_addr they need
1072 * CAP_SYS_RAWIO. The other parameters to this function are unused by the
1073 * capability security module. Returns 0 if this mapping should be allowed
1074 * -EPERM if not.
1075 */
1076int cap_mmap_addr(unsigned long addr)
1077{
1078 int ret = 0;
1079
1080 if (addr < dac_mmap_min_addr) {
1081 ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1082 SECURITY_CAP_AUDIT);
1083 /* set PF_SUPERPRIV if it turns out we allow the low mmap */
1084 if (ret == 0)
1085 current->flags |= PF_SUPERPRIV;
1086 }
1087 return ret;
1088}
1089
1090int cap_mmap_file(struct file *file, unsigned long reqprot,
1091 unsigned long prot, unsigned long flags)
1092{
1093 return 0;
1094}
1095
1096#ifdef CONFIG_SECURITY
1097
1098struct security_hook_list capability_hooks[] = {
1099 LSM_HOOK_INIT(capable, cap_capable),
1100 LSM_HOOK_INIT(settime, cap_settime),
1101 LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1102 LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1103 LSM_HOOK_INIT(capget, cap_capget),
1104 LSM_HOOK_INIT(capset, cap_capset),
1105 LSM_HOOK_INIT(bprm_set_creds, cap_bprm_set_creds),
1106 LSM_HOOK_INIT(bprm_secureexec, cap_bprm_secureexec),
1107 LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1108 LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
1109 LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1110 LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1111 LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1112 LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1113 LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1114 LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1115 LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1116 LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1117};
1118
1119void __init capability_add_hooks(void)
1120{
1121 security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks));
1122}
1123
1124#endif /* CONFIG_SECURITY */
1125