blob: f789bbba9b8eea645a2b4997f901c963c178af9c
1 | /* |
2 | * This program is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU General Public License as |
4 | * published by the Free Software Foundation, version 2 of the |
5 | * License. |
6 | */ |
7 | |
8 | #include <linux/export.h> |
9 | #include <linux/nsproxy.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/user_namespace.h> |
12 | #include <linux/proc_ns.h> |
13 | #include <linux/highuid.h> |
14 | #include <linux/cred.h> |
15 | #include <linux/securebits.h> |
16 | #include <linux/keyctl.h> |
17 | #include <linux/key-type.h> |
18 | #include <keys/user-type.h> |
19 | #include <linux/seq_file.h> |
20 | #include <linux/fs.h> |
21 | #include <linux/uaccess.h> |
22 | #include <linux/ctype.h> |
23 | #include <linux/projid.h> |
24 | #include <linux/fs_struct.h> |
25 | |
26 | static struct kmem_cache *user_ns_cachep __read_mostly; |
27 | static DEFINE_MUTEX(userns_state_mutex); |
28 | |
29 | static bool new_idmap_permitted(const struct file *file, |
30 | struct user_namespace *ns, int cap_setid, |
31 | struct uid_gid_map *map); |
32 | static void free_user_ns(struct work_struct *work); |
33 | |
34 | static struct ucounts *inc_user_namespaces(struct user_namespace *ns, kuid_t uid) |
35 | { |
36 | return inc_ucount(ns, uid, UCOUNT_USER_NAMESPACES); |
37 | } |
38 | |
39 | static void dec_user_namespaces(struct ucounts *ucounts) |
40 | { |
41 | return dec_ucount(ucounts, UCOUNT_USER_NAMESPACES); |
42 | } |
43 | |
44 | static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) |
45 | { |
46 | /* Start with the same capabilities as init but useless for doing |
47 | * anything as the capabilities are bound to the new user namespace. |
48 | */ |
49 | cred->securebits = SECUREBITS_DEFAULT; |
50 | cred->cap_inheritable = CAP_EMPTY_SET; |
51 | cred->cap_permitted = CAP_FULL_SET; |
52 | cred->cap_effective = CAP_FULL_SET; |
53 | cred->cap_ambient = CAP_EMPTY_SET; |
54 | cred->cap_bset = CAP_FULL_SET; |
55 | #ifdef CONFIG_KEYS |
56 | key_put(cred->request_key_auth); |
57 | cred->request_key_auth = NULL; |
58 | #endif |
59 | /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */ |
60 | cred->user_ns = user_ns; |
61 | } |
62 | |
63 | /* |
64 | * Create a new user namespace, deriving the creator from the user in the |
65 | * passed credentials, and replacing that user with the new root user for the |
66 | * new namespace. |
67 | * |
68 | * This is called by copy_creds(), which will finish setting the target task's |
69 | * credentials. |
70 | */ |
71 | int create_user_ns(struct cred *new) |
72 | { |
73 | struct user_namespace *ns, *parent_ns = new->user_ns; |
74 | kuid_t owner = new->euid; |
75 | kgid_t group = new->egid; |
76 | struct ucounts *ucounts; |
77 | int ret, i; |
78 | |
79 | ret = -ENOSPC; |
80 | if (parent_ns->level > 32) |
81 | goto fail; |
82 | |
83 | ucounts = inc_user_namespaces(parent_ns, owner); |
84 | if (!ucounts) |
85 | goto fail; |
86 | |
87 | /* |
88 | * Verify that we can not violate the policy of which files |
89 | * may be accessed that is specified by the root directory, |
90 | * by verifing that the root directory is at the root of the |
91 | * mount namespace which allows all files to be accessed. |
92 | */ |
93 | ret = -EPERM; |
94 | if (current_chrooted()) |
95 | goto fail_dec; |
96 | |
97 | /* The creator needs a mapping in the parent user namespace |
98 | * or else we won't be able to reasonably tell userspace who |
99 | * created a user_namespace. |
100 | */ |
101 | ret = -EPERM; |
102 | if (!kuid_has_mapping(parent_ns, owner) || |
103 | !kgid_has_mapping(parent_ns, group)) |
104 | goto fail_dec; |
105 | |
106 | ret = -ENOMEM; |
107 | ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); |
108 | if (!ns) |
109 | goto fail_dec; |
110 | |
111 | ret = ns_alloc_inum(&ns->ns); |
112 | if (ret) |
113 | goto fail_free; |
114 | ns->ns.ops = &userns_operations; |
115 | |
116 | atomic_set(&ns->count, 1); |
117 | /* Leave the new->user_ns reference with the new user namespace. */ |
118 | ns->parent = parent_ns; |
119 | ns->level = parent_ns->level + 1; |
120 | ns->owner = owner; |
121 | ns->group = group; |
122 | INIT_WORK(&ns->work, free_user_ns); |
123 | for (i = 0; i < UCOUNT_COUNTS; i++) { |
124 | ns->ucount_max[i] = INT_MAX; |
125 | } |
126 | ns->ucounts = ucounts; |
127 | |
128 | /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ |
129 | mutex_lock(&userns_state_mutex); |
130 | ns->flags = parent_ns->flags; |
131 | mutex_unlock(&userns_state_mutex); |
132 | |
133 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
134 | init_rwsem(&ns->persistent_keyring_register_sem); |
135 | #endif |
136 | ret = -ENOMEM; |
137 | if (!setup_userns_sysctls(ns)) |
138 | goto fail_keyring; |
139 | |
140 | set_cred_user_ns(new, ns); |
141 | return 0; |
142 | fail_keyring: |
143 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
144 | key_put(ns->persistent_keyring_register); |
145 | #endif |
146 | ns_free_inum(&ns->ns); |
147 | fail_free: |
148 | kmem_cache_free(user_ns_cachep, ns); |
149 | fail_dec: |
150 | dec_user_namespaces(ucounts); |
151 | fail: |
152 | return ret; |
153 | } |
154 | |
155 | int unshare_userns(unsigned long unshare_flags, struct cred **new_cred) |
156 | { |
157 | struct cred *cred; |
158 | int err = -ENOMEM; |
159 | |
160 | if (!(unshare_flags & CLONE_NEWUSER)) |
161 | return 0; |
162 | |
163 | cred = prepare_creds(); |
164 | if (cred) { |
165 | err = create_user_ns(cred); |
166 | if (err) |
167 | put_cred(cred); |
168 | else |
169 | *new_cred = cred; |
170 | } |
171 | |
172 | return err; |
173 | } |
174 | |
175 | static void free_user_ns(struct work_struct *work) |
176 | { |
177 | struct user_namespace *parent, *ns = |
178 | container_of(work, struct user_namespace, work); |
179 | |
180 | do { |
181 | struct ucounts *ucounts = ns->ucounts; |
182 | parent = ns->parent; |
183 | retire_userns_sysctls(ns); |
184 | #ifdef CONFIG_PERSISTENT_KEYRINGS |
185 | key_put(ns->persistent_keyring_register); |
186 | #endif |
187 | ns_free_inum(&ns->ns); |
188 | kmem_cache_free(user_ns_cachep, ns); |
189 | dec_user_namespaces(ucounts); |
190 | ns = parent; |
191 | } while (atomic_dec_and_test(&parent->count)); |
192 | } |
193 | |
194 | void __put_user_ns(struct user_namespace *ns) |
195 | { |
196 | schedule_work(&ns->work); |
197 | } |
198 | EXPORT_SYMBOL(__put_user_ns); |
199 | |
200 | static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) |
201 | { |
202 | unsigned idx, extents; |
203 | u32 first, last, id2; |
204 | |
205 | id2 = id + count - 1; |
206 | |
207 | /* Find the matching extent */ |
208 | extents = map->nr_extents; |
209 | smp_rmb(); |
210 | for (idx = 0; idx < extents; idx++) { |
211 | first = map->extent[idx].first; |
212 | last = first + map->extent[idx].count - 1; |
213 | if (id >= first && id <= last && |
214 | (id2 >= first && id2 <= last)) |
215 | break; |
216 | } |
217 | /* Map the id or note failure */ |
218 | if (idx < extents) |
219 | id = (id - first) + map->extent[idx].lower_first; |
220 | else |
221 | id = (u32) -1; |
222 | |
223 | return id; |
224 | } |
225 | |
226 | static u32 map_id_down(struct uid_gid_map *map, u32 id) |
227 | { |
228 | unsigned idx, extents; |
229 | u32 first, last; |
230 | |
231 | /* Find the matching extent */ |
232 | extents = map->nr_extents; |
233 | smp_rmb(); |
234 | for (idx = 0; idx < extents; idx++) { |
235 | first = map->extent[idx].first; |
236 | last = first + map->extent[idx].count - 1; |
237 | if (id >= first && id <= last) |
238 | break; |
239 | } |
240 | /* Map the id or note failure */ |
241 | if (idx < extents) |
242 | id = (id - first) + map->extent[idx].lower_first; |
243 | else |
244 | id = (u32) -1; |
245 | |
246 | return id; |
247 | } |
248 | |
249 | static u32 map_id_up(struct uid_gid_map *map, u32 id) |
250 | { |
251 | unsigned idx, extents; |
252 | u32 first, last; |
253 | |
254 | /* Find the matching extent */ |
255 | extents = map->nr_extents; |
256 | smp_rmb(); |
257 | for (idx = 0; idx < extents; idx++) { |
258 | first = map->extent[idx].lower_first; |
259 | last = first + map->extent[idx].count - 1; |
260 | if (id >= first && id <= last) |
261 | break; |
262 | } |
263 | /* Map the id or note failure */ |
264 | if (idx < extents) |
265 | id = (id - first) + map->extent[idx].first; |
266 | else |
267 | id = (u32) -1; |
268 | |
269 | return id; |
270 | } |
271 | |
272 | /** |
273 | * make_kuid - Map a user-namespace uid pair into a kuid. |
274 | * @ns: User namespace that the uid is in |
275 | * @uid: User identifier |
276 | * |
277 | * Maps a user-namespace uid pair into a kernel internal kuid, |
278 | * and returns that kuid. |
279 | * |
280 | * When there is no mapping defined for the user-namespace uid |
281 | * pair INVALID_UID is returned. Callers are expected to test |
282 | * for and handle INVALID_UID being returned. INVALID_UID |
283 | * may be tested for using uid_valid(). |
284 | */ |
285 | kuid_t make_kuid(struct user_namespace *ns, uid_t uid) |
286 | { |
287 | /* Map the uid to a global kernel uid */ |
288 | return KUIDT_INIT(map_id_down(&ns->uid_map, uid)); |
289 | } |
290 | EXPORT_SYMBOL(make_kuid); |
291 | |
292 | /** |
293 | * from_kuid - Create a uid from a kuid user-namespace pair. |
294 | * @targ: The user namespace we want a uid in. |
295 | * @kuid: The kernel internal uid to start with. |
296 | * |
297 | * Map @kuid into the user-namespace specified by @targ and |
298 | * return the resulting uid. |
299 | * |
300 | * There is always a mapping into the initial user_namespace. |
301 | * |
302 | * If @kuid has no mapping in @targ (uid_t)-1 is returned. |
303 | */ |
304 | uid_t from_kuid(struct user_namespace *targ, kuid_t kuid) |
305 | { |
306 | /* Map the uid from a global kernel uid */ |
307 | return map_id_up(&targ->uid_map, __kuid_val(kuid)); |
308 | } |
309 | EXPORT_SYMBOL(from_kuid); |
310 | |
311 | /** |
312 | * from_kuid_munged - Create a uid from a kuid user-namespace pair. |
313 | * @targ: The user namespace we want a uid in. |
314 | * @kuid: The kernel internal uid to start with. |
315 | * |
316 | * Map @kuid into the user-namespace specified by @targ and |
317 | * return the resulting uid. |
318 | * |
319 | * There is always a mapping into the initial user_namespace. |
320 | * |
321 | * Unlike from_kuid from_kuid_munged never fails and always |
322 | * returns a valid uid. This makes from_kuid_munged appropriate |
323 | * for use in syscalls like stat and getuid where failing the |
324 | * system call and failing to provide a valid uid are not an |
325 | * options. |
326 | * |
327 | * If @kuid has no mapping in @targ overflowuid is returned. |
328 | */ |
329 | uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid) |
330 | { |
331 | uid_t uid; |
332 | uid = from_kuid(targ, kuid); |
333 | |
334 | if (uid == (uid_t) -1) |
335 | uid = overflowuid; |
336 | return uid; |
337 | } |
338 | EXPORT_SYMBOL(from_kuid_munged); |
339 | |
340 | /** |
341 | * make_kgid - Map a user-namespace gid pair into a kgid. |
342 | * @ns: User namespace that the gid is in |
343 | * @gid: group identifier |
344 | * |
345 | * Maps a user-namespace gid pair into a kernel internal kgid, |
346 | * and returns that kgid. |
347 | * |
348 | * When there is no mapping defined for the user-namespace gid |
349 | * pair INVALID_GID is returned. Callers are expected to test |
350 | * for and handle INVALID_GID being returned. INVALID_GID may be |
351 | * tested for using gid_valid(). |
352 | */ |
353 | kgid_t make_kgid(struct user_namespace *ns, gid_t gid) |
354 | { |
355 | /* Map the gid to a global kernel gid */ |
356 | return KGIDT_INIT(map_id_down(&ns->gid_map, gid)); |
357 | } |
358 | EXPORT_SYMBOL(make_kgid); |
359 | |
360 | /** |
361 | * from_kgid - Create a gid from a kgid user-namespace pair. |
362 | * @targ: The user namespace we want a gid in. |
363 | * @kgid: The kernel internal gid to start with. |
364 | * |
365 | * Map @kgid into the user-namespace specified by @targ and |
366 | * return the resulting gid. |
367 | * |
368 | * There is always a mapping into the initial user_namespace. |
369 | * |
370 | * If @kgid has no mapping in @targ (gid_t)-1 is returned. |
371 | */ |
372 | gid_t from_kgid(struct user_namespace *targ, kgid_t kgid) |
373 | { |
374 | /* Map the gid from a global kernel gid */ |
375 | return map_id_up(&targ->gid_map, __kgid_val(kgid)); |
376 | } |
377 | EXPORT_SYMBOL(from_kgid); |
378 | |
379 | /** |
380 | * from_kgid_munged - Create a gid from a kgid user-namespace pair. |
381 | * @targ: The user namespace we want a gid in. |
382 | * @kgid: The kernel internal gid to start with. |
383 | * |
384 | * Map @kgid into the user-namespace specified by @targ and |
385 | * return the resulting gid. |
386 | * |
387 | * There is always a mapping into the initial user_namespace. |
388 | * |
389 | * Unlike from_kgid from_kgid_munged never fails and always |
390 | * returns a valid gid. This makes from_kgid_munged appropriate |
391 | * for use in syscalls like stat and getgid where failing the |
392 | * system call and failing to provide a valid gid are not options. |
393 | * |
394 | * If @kgid has no mapping in @targ overflowgid is returned. |
395 | */ |
396 | gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid) |
397 | { |
398 | gid_t gid; |
399 | gid = from_kgid(targ, kgid); |
400 | |
401 | if (gid == (gid_t) -1) |
402 | gid = overflowgid; |
403 | return gid; |
404 | } |
405 | EXPORT_SYMBOL(from_kgid_munged); |
406 | |
407 | /** |
408 | * make_kprojid - Map a user-namespace projid pair into a kprojid. |
409 | * @ns: User namespace that the projid is in |
410 | * @projid: Project identifier |
411 | * |
412 | * Maps a user-namespace uid pair into a kernel internal kuid, |
413 | * and returns that kuid. |
414 | * |
415 | * When there is no mapping defined for the user-namespace projid |
416 | * pair INVALID_PROJID is returned. Callers are expected to test |
417 | * for and handle handle INVALID_PROJID being returned. INVALID_PROJID |
418 | * may be tested for using projid_valid(). |
419 | */ |
420 | kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid) |
421 | { |
422 | /* Map the uid to a global kernel uid */ |
423 | return KPROJIDT_INIT(map_id_down(&ns->projid_map, projid)); |
424 | } |
425 | EXPORT_SYMBOL(make_kprojid); |
426 | |
427 | /** |
428 | * from_kprojid - Create a projid from a kprojid user-namespace pair. |
429 | * @targ: The user namespace we want a projid in. |
430 | * @kprojid: The kernel internal project identifier to start with. |
431 | * |
432 | * Map @kprojid into the user-namespace specified by @targ and |
433 | * return the resulting projid. |
434 | * |
435 | * There is always a mapping into the initial user_namespace. |
436 | * |
437 | * If @kprojid has no mapping in @targ (projid_t)-1 is returned. |
438 | */ |
439 | projid_t from_kprojid(struct user_namespace *targ, kprojid_t kprojid) |
440 | { |
441 | /* Map the uid from a global kernel uid */ |
442 | return map_id_up(&targ->projid_map, __kprojid_val(kprojid)); |
443 | } |
444 | EXPORT_SYMBOL(from_kprojid); |
445 | |
446 | /** |
447 | * from_kprojid_munged - Create a projiid from a kprojid user-namespace pair. |
448 | * @targ: The user namespace we want a projid in. |
449 | * @kprojid: The kernel internal projid to start with. |
450 | * |
451 | * Map @kprojid into the user-namespace specified by @targ and |
452 | * return the resulting projid. |
453 | * |
454 | * There is always a mapping into the initial user_namespace. |
455 | * |
456 | * Unlike from_kprojid from_kprojid_munged never fails and always |
457 | * returns a valid projid. This makes from_kprojid_munged |
458 | * appropriate for use in syscalls like stat and where |
459 | * failing the system call and failing to provide a valid projid are |
460 | * not an options. |
461 | * |
462 | * If @kprojid has no mapping in @targ OVERFLOW_PROJID is returned. |
463 | */ |
464 | projid_t from_kprojid_munged(struct user_namespace *targ, kprojid_t kprojid) |
465 | { |
466 | projid_t projid; |
467 | projid = from_kprojid(targ, kprojid); |
468 | |
469 | if (projid == (projid_t) -1) |
470 | projid = OVERFLOW_PROJID; |
471 | return projid; |
472 | } |
473 | EXPORT_SYMBOL(from_kprojid_munged); |
474 | |
475 | |
476 | static int uid_m_show(struct seq_file *seq, void *v) |
477 | { |
478 | struct user_namespace *ns = seq->private; |
479 | struct uid_gid_extent *extent = v; |
480 | struct user_namespace *lower_ns; |
481 | uid_t lower; |
482 | |
483 | lower_ns = seq_user_ns(seq); |
484 | if ((lower_ns == ns) && lower_ns->parent) |
485 | lower_ns = lower_ns->parent; |
486 | |
487 | lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first)); |
488 | |
489 | seq_printf(seq, "%10u %10u %10u\n", |
490 | extent->first, |
491 | lower, |
492 | extent->count); |
493 | |
494 | return 0; |
495 | } |
496 | |
497 | static int gid_m_show(struct seq_file *seq, void *v) |
498 | { |
499 | struct user_namespace *ns = seq->private; |
500 | struct uid_gid_extent *extent = v; |
501 | struct user_namespace *lower_ns; |
502 | gid_t lower; |
503 | |
504 | lower_ns = seq_user_ns(seq); |
505 | if ((lower_ns == ns) && lower_ns->parent) |
506 | lower_ns = lower_ns->parent; |
507 | |
508 | lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first)); |
509 | |
510 | seq_printf(seq, "%10u %10u %10u\n", |
511 | extent->first, |
512 | lower, |
513 | extent->count); |
514 | |
515 | return 0; |
516 | } |
517 | |
518 | static int projid_m_show(struct seq_file *seq, void *v) |
519 | { |
520 | struct user_namespace *ns = seq->private; |
521 | struct uid_gid_extent *extent = v; |
522 | struct user_namespace *lower_ns; |
523 | projid_t lower; |
524 | |
525 | lower_ns = seq_user_ns(seq); |
526 | if ((lower_ns == ns) && lower_ns->parent) |
527 | lower_ns = lower_ns->parent; |
528 | |
529 | lower = from_kprojid(lower_ns, KPROJIDT_INIT(extent->lower_first)); |
530 | |
531 | seq_printf(seq, "%10u %10u %10u\n", |
532 | extent->first, |
533 | lower, |
534 | extent->count); |
535 | |
536 | return 0; |
537 | } |
538 | |
539 | static void *m_start(struct seq_file *seq, loff_t *ppos, |
540 | struct uid_gid_map *map) |
541 | { |
542 | struct uid_gid_extent *extent = NULL; |
543 | loff_t pos = *ppos; |
544 | |
545 | if (pos < map->nr_extents) |
546 | extent = &map->extent[pos]; |
547 | |
548 | return extent; |
549 | } |
550 | |
551 | static void *uid_m_start(struct seq_file *seq, loff_t *ppos) |
552 | { |
553 | struct user_namespace *ns = seq->private; |
554 | |
555 | return m_start(seq, ppos, &ns->uid_map); |
556 | } |
557 | |
558 | static void *gid_m_start(struct seq_file *seq, loff_t *ppos) |
559 | { |
560 | struct user_namespace *ns = seq->private; |
561 | |
562 | return m_start(seq, ppos, &ns->gid_map); |
563 | } |
564 | |
565 | static void *projid_m_start(struct seq_file *seq, loff_t *ppos) |
566 | { |
567 | struct user_namespace *ns = seq->private; |
568 | |
569 | return m_start(seq, ppos, &ns->projid_map); |
570 | } |
571 | |
572 | static void *m_next(struct seq_file *seq, void *v, loff_t *pos) |
573 | { |
574 | (*pos)++; |
575 | return seq->op->start(seq, pos); |
576 | } |
577 | |
578 | static void m_stop(struct seq_file *seq, void *v) |
579 | { |
580 | return; |
581 | } |
582 | |
583 | const struct seq_operations proc_uid_seq_operations = { |
584 | .start = uid_m_start, |
585 | .stop = m_stop, |
586 | .next = m_next, |
587 | .show = uid_m_show, |
588 | }; |
589 | |
590 | const struct seq_operations proc_gid_seq_operations = { |
591 | .start = gid_m_start, |
592 | .stop = m_stop, |
593 | .next = m_next, |
594 | .show = gid_m_show, |
595 | }; |
596 | |
597 | const struct seq_operations proc_projid_seq_operations = { |
598 | .start = projid_m_start, |
599 | .stop = m_stop, |
600 | .next = m_next, |
601 | .show = projid_m_show, |
602 | }; |
603 | |
604 | static bool mappings_overlap(struct uid_gid_map *new_map, |
605 | struct uid_gid_extent *extent) |
606 | { |
607 | u32 upper_first, lower_first, upper_last, lower_last; |
608 | unsigned idx; |
609 | |
610 | upper_first = extent->first; |
611 | lower_first = extent->lower_first; |
612 | upper_last = upper_first + extent->count - 1; |
613 | lower_last = lower_first + extent->count - 1; |
614 | |
615 | for (idx = 0; idx < new_map->nr_extents; idx++) { |
616 | u32 prev_upper_first, prev_lower_first; |
617 | u32 prev_upper_last, prev_lower_last; |
618 | struct uid_gid_extent *prev; |
619 | |
620 | prev = &new_map->extent[idx]; |
621 | |
622 | prev_upper_first = prev->first; |
623 | prev_lower_first = prev->lower_first; |
624 | prev_upper_last = prev_upper_first + prev->count - 1; |
625 | prev_lower_last = prev_lower_first + prev->count - 1; |
626 | |
627 | /* Does the upper range intersect a previous extent? */ |
628 | if ((prev_upper_first <= upper_last) && |
629 | (prev_upper_last >= upper_first)) |
630 | return true; |
631 | |
632 | /* Does the lower range intersect a previous extent? */ |
633 | if ((prev_lower_first <= lower_last) && |
634 | (prev_lower_last >= lower_first)) |
635 | return true; |
636 | } |
637 | return false; |
638 | } |
639 | |
640 | static ssize_t map_write(struct file *file, const char __user *buf, |
641 | size_t count, loff_t *ppos, |
642 | int cap_setid, |
643 | struct uid_gid_map *map, |
644 | struct uid_gid_map *parent_map) |
645 | { |
646 | struct seq_file *seq = file->private_data; |
647 | struct user_namespace *ns = seq->private; |
648 | struct uid_gid_map new_map; |
649 | unsigned idx; |
650 | struct uid_gid_extent *extent = NULL; |
651 | char *kbuf = NULL, *pos, *next_line; |
652 | ssize_t ret; |
653 | |
654 | /* Only allow < page size writes at the beginning of the file */ |
655 | if ((*ppos != 0) || (count >= PAGE_SIZE)) |
656 | return -EINVAL; |
657 | |
658 | /* Slurp in the user data */ |
659 | kbuf = memdup_user_nul(buf, count); |
660 | if (IS_ERR(kbuf)) |
661 | return PTR_ERR(kbuf); |
662 | |
663 | /* |
664 | * The userns_state_mutex serializes all writes to any given map. |
665 | * |
666 | * Any map is only ever written once. |
667 | * |
668 | * An id map fits within 1 cache line on most architectures. |
669 | * |
670 | * On read nothing needs to be done unless you are on an |
671 | * architecture with a crazy cache coherency model like alpha. |
672 | * |
673 | * There is a one time data dependency between reading the |
674 | * count of the extents and the values of the extents. The |
675 | * desired behavior is to see the values of the extents that |
676 | * were written before the count of the extents. |
677 | * |
678 | * To achieve this smp_wmb() is used on guarantee the write |
679 | * order and smp_rmb() is guaranteed that we don't have crazy |
680 | * architectures returning stale data. |
681 | */ |
682 | mutex_lock(&userns_state_mutex); |
683 | |
684 | ret = -EPERM; |
685 | /* Only allow one successful write to the map */ |
686 | if (map->nr_extents != 0) |
687 | goto out; |
688 | |
689 | /* |
690 | * Adjusting namespace settings requires capabilities on the target. |
691 | */ |
692 | if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) |
693 | goto out; |
694 | |
695 | /* Parse the user data */ |
696 | ret = -EINVAL; |
697 | pos = kbuf; |
698 | new_map.nr_extents = 0; |
699 | for (; pos; pos = next_line) { |
700 | extent = &new_map.extent[new_map.nr_extents]; |
701 | |
702 | /* Find the end of line and ensure I don't look past it */ |
703 | next_line = strchr(pos, '\n'); |
704 | if (next_line) { |
705 | *next_line = '\0'; |
706 | next_line++; |
707 | if (*next_line == '\0') |
708 | next_line = NULL; |
709 | } |
710 | |
711 | pos = skip_spaces(pos); |
712 | extent->first = simple_strtoul(pos, &pos, 10); |
713 | if (!isspace(*pos)) |
714 | goto out; |
715 | |
716 | pos = skip_spaces(pos); |
717 | extent->lower_first = simple_strtoul(pos, &pos, 10); |
718 | if (!isspace(*pos)) |
719 | goto out; |
720 | |
721 | pos = skip_spaces(pos); |
722 | extent->count = simple_strtoul(pos, &pos, 10); |
723 | if (*pos && !isspace(*pos)) |
724 | goto out; |
725 | |
726 | /* Verify there is not trailing junk on the line */ |
727 | pos = skip_spaces(pos); |
728 | if (*pos != '\0') |
729 | goto out; |
730 | |
731 | /* Verify we have been given valid starting values */ |
732 | if ((extent->first == (u32) -1) || |
733 | (extent->lower_first == (u32) -1)) |
734 | goto out; |
735 | |
736 | /* Verify count is not zero and does not cause the |
737 | * extent to wrap |
738 | */ |
739 | if ((extent->first + extent->count) <= extent->first) |
740 | goto out; |
741 | if ((extent->lower_first + extent->count) <= |
742 | extent->lower_first) |
743 | goto out; |
744 | |
745 | /* Do the ranges in extent overlap any previous extents? */ |
746 | if (mappings_overlap(&new_map, extent)) |
747 | goto out; |
748 | |
749 | new_map.nr_extents++; |
750 | |
751 | /* Fail if the file contains too many extents */ |
752 | if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) && |
753 | (next_line != NULL)) |
754 | goto out; |
755 | } |
756 | /* Be very certaint the new map actually exists */ |
757 | if (new_map.nr_extents == 0) |
758 | goto out; |
759 | |
760 | ret = -EPERM; |
761 | /* Validate the user is allowed to use user id's mapped to. */ |
762 | if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) |
763 | goto out; |
764 | |
765 | /* Map the lower ids from the parent user namespace to the |
766 | * kernel global id space. |
767 | */ |
768 | for (idx = 0; idx < new_map.nr_extents; idx++) { |
769 | u32 lower_first; |
770 | extent = &new_map.extent[idx]; |
771 | |
772 | lower_first = map_id_range_down(parent_map, |
773 | extent->lower_first, |
774 | extent->count); |
775 | |
776 | /* Fail if we can not map the specified extent to |
777 | * the kernel global id space. |
778 | */ |
779 | if (lower_first == (u32) -1) |
780 | goto out; |
781 | |
782 | extent->lower_first = lower_first; |
783 | } |
784 | |
785 | /* Install the map */ |
786 | memcpy(map->extent, new_map.extent, |
787 | new_map.nr_extents*sizeof(new_map.extent[0])); |
788 | smp_wmb(); |
789 | map->nr_extents = new_map.nr_extents; |
790 | |
791 | *ppos = count; |
792 | ret = count; |
793 | out: |
794 | mutex_unlock(&userns_state_mutex); |
795 | kfree(kbuf); |
796 | return ret; |
797 | } |
798 | |
799 | ssize_t proc_uid_map_write(struct file *file, const char __user *buf, |
800 | size_t size, loff_t *ppos) |
801 | { |
802 | struct seq_file *seq = file->private_data; |
803 | struct user_namespace *ns = seq->private; |
804 | struct user_namespace *seq_ns = seq_user_ns(seq); |
805 | |
806 | if (!ns->parent) |
807 | return -EPERM; |
808 | |
809 | if ((seq_ns != ns) && (seq_ns != ns->parent)) |
810 | return -EPERM; |
811 | |
812 | return map_write(file, buf, size, ppos, CAP_SETUID, |
813 | &ns->uid_map, &ns->parent->uid_map); |
814 | } |
815 | |
816 | ssize_t proc_gid_map_write(struct file *file, const char __user *buf, |
817 | size_t size, loff_t *ppos) |
818 | { |
819 | struct seq_file *seq = file->private_data; |
820 | struct user_namespace *ns = seq->private; |
821 | struct user_namespace *seq_ns = seq_user_ns(seq); |
822 | |
823 | if (!ns->parent) |
824 | return -EPERM; |
825 | |
826 | if ((seq_ns != ns) && (seq_ns != ns->parent)) |
827 | return -EPERM; |
828 | |
829 | return map_write(file, buf, size, ppos, CAP_SETGID, |
830 | &ns->gid_map, &ns->parent->gid_map); |
831 | } |
832 | |
833 | ssize_t proc_projid_map_write(struct file *file, const char __user *buf, |
834 | size_t size, loff_t *ppos) |
835 | { |
836 | struct seq_file *seq = file->private_data; |
837 | struct user_namespace *ns = seq->private; |
838 | struct user_namespace *seq_ns = seq_user_ns(seq); |
839 | |
840 | if (!ns->parent) |
841 | return -EPERM; |
842 | |
843 | if ((seq_ns != ns) && (seq_ns != ns->parent)) |
844 | return -EPERM; |
845 | |
846 | /* Anyone can set any valid project id no capability needed */ |
847 | return map_write(file, buf, size, ppos, -1, |
848 | &ns->projid_map, &ns->parent->projid_map); |
849 | } |
850 | |
851 | static bool new_idmap_permitted(const struct file *file, |
852 | struct user_namespace *ns, int cap_setid, |
853 | struct uid_gid_map *new_map) |
854 | { |
855 | const struct cred *cred = file->f_cred; |
856 | /* Don't allow mappings that would allow anything that wouldn't |
857 | * be allowed without the establishment of unprivileged mappings. |
858 | */ |
859 | if ((new_map->nr_extents == 1) && (new_map->extent[0].count == 1) && |
860 | uid_eq(ns->owner, cred->euid)) { |
861 | u32 id = new_map->extent[0].lower_first; |
862 | if (cap_setid == CAP_SETUID) { |
863 | kuid_t uid = make_kuid(ns->parent, id); |
864 | if (uid_eq(uid, cred->euid)) |
865 | return true; |
866 | } else if (cap_setid == CAP_SETGID) { |
867 | kgid_t gid = make_kgid(ns->parent, id); |
868 | if (!(ns->flags & USERNS_SETGROUPS_ALLOWED) && |
869 | gid_eq(gid, cred->egid)) |
870 | return true; |
871 | } |
872 | } |
873 | |
874 | /* Allow anyone to set a mapping that doesn't require privilege */ |
875 | if (!cap_valid(cap_setid)) |
876 | return true; |
877 | |
878 | /* Allow the specified ids if we have the appropriate capability |
879 | * (CAP_SETUID or CAP_SETGID) over the parent user namespace. |
880 | * And the opener of the id file also had the approprpiate capability. |
881 | */ |
882 | if (ns_capable(ns->parent, cap_setid) && |
883 | file_ns_capable(file, ns->parent, cap_setid)) |
884 | return true; |
885 | |
886 | return false; |
887 | } |
888 | |
889 | int proc_setgroups_show(struct seq_file *seq, void *v) |
890 | { |
891 | struct user_namespace *ns = seq->private; |
892 | unsigned long userns_flags = ACCESS_ONCE(ns->flags); |
893 | |
894 | seq_printf(seq, "%s\n", |
895 | (userns_flags & USERNS_SETGROUPS_ALLOWED) ? |
896 | "allow" : "deny"); |
897 | return 0; |
898 | } |
899 | |
900 | ssize_t proc_setgroups_write(struct file *file, const char __user *buf, |
901 | size_t count, loff_t *ppos) |
902 | { |
903 | struct seq_file *seq = file->private_data; |
904 | struct user_namespace *ns = seq->private; |
905 | char kbuf[8], *pos; |
906 | bool setgroups_allowed; |
907 | ssize_t ret; |
908 | |
909 | /* Only allow a very narrow range of strings to be written */ |
910 | ret = -EINVAL; |
911 | if ((*ppos != 0) || (count >= sizeof(kbuf))) |
912 | goto out; |
913 | |
914 | /* What was written? */ |
915 | ret = -EFAULT; |
916 | if (copy_from_user(kbuf, buf, count)) |
917 | goto out; |
918 | kbuf[count] = '\0'; |
919 | pos = kbuf; |
920 | |
921 | /* What is being requested? */ |
922 | ret = -EINVAL; |
923 | if (strncmp(pos, "allow", 5) == 0) { |
924 | pos += 5; |
925 | setgroups_allowed = true; |
926 | } |
927 | else if (strncmp(pos, "deny", 4) == 0) { |
928 | pos += 4; |
929 | setgroups_allowed = false; |
930 | } |
931 | else |
932 | goto out; |
933 | |
934 | /* Verify there is not trailing junk on the line */ |
935 | pos = skip_spaces(pos); |
936 | if (*pos != '\0') |
937 | goto out; |
938 | |
939 | ret = -EPERM; |
940 | mutex_lock(&userns_state_mutex); |
941 | if (setgroups_allowed) { |
942 | /* Enabling setgroups after setgroups has been disabled |
943 | * is not allowed. |
944 | */ |
945 | if (!(ns->flags & USERNS_SETGROUPS_ALLOWED)) |
946 | goto out_unlock; |
947 | } else { |
948 | /* Permanently disabling setgroups after setgroups has |
949 | * been enabled by writing the gid_map is not allowed. |
950 | */ |
951 | if (ns->gid_map.nr_extents != 0) |
952 | goto out_unlock; |
953 | ns->flags &= ~USERNS_SETGROUPS_ALLOWED; |
954 | } |
955 | mutex_unlock(&userns_state_mutex); |
956 | |
957 | /* Report a successful write */ |
958 | *ppos = count; |
959 | ret = count; |
960 | out: |
961 | return ret; |
962 | out_unlock: |
963 | mutex_unlock(&userns_state_mutex); |
964 | goto out; |
965 | } |
966 | |
967 | bool userns_may_setgroups(const struct user_namespace *ns) |
968 | { |
969 | bool allowed; |
970 | |
971 | mutex_lock(&userns_state_mutex); |
972 | /* It is not safe to use setgroups until a gid mapping in |
973 | * the user namespace has been established. |
974 | */ |
975 | allowed = ns->gid_map.nr_extents != 0; |
976 | /* Is setgroups allowed? */ |
977 | allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED); |
978 | mutex_unlock(&userns_state_mutex); |
979 | |
980 | return allowed; |
981 | } |
982 | |
983 | /* |
984 | * Returns true if @ns is the same namespace as or a descendant of |
985 | * @target_ns. |
986 | */ |
987 | bool current_in_userns(const struct user_namespace *target_ns) |
988 | { |
989 | struct user_namespace *ns; |
990 | for (ns = current_user_ns(); ns; ns = ns->parent) { |
991 | if (ns == target_ns) |
992 | return true; |
993 | } |
994 | return false; |
995 | } |
996 | |
997 | static inline struct user_namespace *to_user_ns(struct ns_common *ns) |
998 | { |
999 | return container_of(ns, struct user_namespace, ns); |
1000 | } |
1001 | |
1002 | static struct ns_common *userns_get(struct task_struct *task) |
1003 | { |
1004 | struct user_namespace *user_ns; |
1005 | |
1006 | rcu_read_lock(); |
1007 | user_ns = get_user_ns(__task_cred(task)->user_ns); |
1008 | rcu_read_unlock(); |
1009 | |
1010 | return user_ns ? &user_ns->ns : NULL; |
1011 | } |
1012 | |
1013 | static void userns_put(struct ns_common *ns) |
1014 | { |
1015 | put_user_ns(to_user_ns(ns)); |
1016 | } |
1017 | |
1018 | static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns) |
1019 | { |
1020 | struct user_namespace *user_ns = to_user_ns(ns); |
1021 | struct cred *cred; |
1022 | |
1023 | /* Don't allow gaining capabilities by reentering |
1024 | * the same user namespace. |
1025 | */ |
1026 | if (user_ns == current_user_ns()) |
1027 | return -EINVAL; |
1028 | |
1029 | /* Tasks that share a thread group must share a user namespace */ |
1030 | if (!thread_group_empty(current)) |
1031 | return -EINVAL; |
1032 | |
1033 | if (current->fs->users != 1) |
1034 | return -EINVAL; |
1035 | |
1036 | if (!ns_capable(user_ns, CAP_SYS_ADMIN)) |
1037 | return -EPERM; |
1038 | |
1039 | cred = prepare_creds(); |
1040 | if (!cred) |
1041 | return -ENOMEM; |
1042 | |
1043 | put_user_ns(cred->user_ns); |
1044 | set_cred_user_ns(cred, get_user_ns(user_ns)); |
1045 | |
1046 | return commit_creds(cred); |
1047 | } |
1048 | |
1049 | struct ns_common *ns_get_owner(struct ns_common *ns) |
1050 | { |
1051 | struct user_namespace *my_user_ns = current_user_ns(); |
1052 | struct user_namespace *owner, *p; |
1053 | |
1054 | /* See if the owner is in the current user namespace */ |
1055 | owner = p = ns->ops->owner(ns); |
1056 | for (;;) { |
1057 | if (!p) |
1058 | return ERR_PTR(-EPERM); |
1059 | if (p == my_user_ns) |
1060 | break; |
1061 | p = p->parent; |
1062 | } |
1063 | |
1064 | return &get_user_ns(owner)->ns; |
1065 | } |
1066 | |
1067 | static struct user_namespace *userns_owner(struct ns_common *ns) |
1068 | { |
1069 | return to_user_ns(ns)->parent; |
1070 | } |
1071 | |
1072 | const struct proc_ns_operations userns_operations = { |
1073 | .name = "user", |
1074 | .type = CLONE_NEWUSER, |
1075 | .get = userns_get, |
1076 | .put = userns_put, |
1077 | .install = userns_install, |
1078 | .owner = userns_owner, |
1079 | .get_parent = ns_get_owner, |
1080 | }; |
1081 | |
1082 | static __init int user_namespaces_init(void) |
1083 | { |
1084 | user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC); |
1085 | return 0; |
1086 | } |
1087 | subsys_initcall(user_namespaces_init); |
1088 |