blob: 755119c3c1b9129890eedc6abbe670ac4b88ecdf
1 | #include <linux/capability.h> |
2 | #include <linux/blkdev.h> |
3 | #include <linux/export.h> |
4 | #include <linux/gfp.h> |
5 | #include <linux/blkpg.h> |
6 | #include <linux/hdreg.h> |
7 | #include <linux/backing-dev.h> |
8 | #include <linux/fs.h> |
9 | #include <linux/blktrace_api.h> |
10 | #include <linux/pr.h> |
11 | #include <asm/uaccess.h> |
12 | |
13 | static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user *arg) |
14 | { |
15 | struct block_device *bdevp; |
16 | struct gendisk *disk; |
17 | struct hd_struct *part, *lpart; |
18 | struct blkpg_ioctl_arg a; |
19 | struct blkpg_partition p; |
20 | struct disk_part_iter piter; |
21 | long long start, length; |
22 | int partno; |
23 | |
24 | if (!capable(CAP_SYS_ADMIN)) |
25 | return -EACCES; |
26 | if (copy_from_user(&a, arg, sizeof(struct blkpg_ioctl_arg))) |
27 | return -EFAULT; |
28 | if (copy_from_user(&p, a.data, sizeof(struct blkpg_partition))) |
29 | return -EFAULT; |
30 | disk = bdev->bd_disk; |
31 | if (bdev != bdev->bd_contains) |
32 | return -EINVAL; |
33 | partno = p.pno; |
34 | if (partno <= 0) |
35 | return -EINVAL; |
36 | switch (a.op) { |
37 | case BLKPG_ADD_PARTITION: |
38 | start = p.start >> 9; |
39 | length = p.length >> 9; |
40 | /* check for fit in a hd_struct */ |
41 | if (sizeof(sector_t) == sizeof(long) && |
42 | sizeof(long long) > sizeof(long)) { |
43 | long pstart = start, plength = length; |
44 | if (pstart != start || plength != length |
45 | || pstart < 0 || plength < 0 || partno > 65535) |
46 | return -EINVAL; |
47 | } |
48 | |
49 | mutex_lock(&bdev->bd_mutex); |
50 | |
51 | /* overlap? */ |
52 | disk_part_iter_init(&piter, disk, |
53 | DISK_PITER_INCL_EMPTY); |
54 | while ((part = disk_part_iter_next(&piter))) { |
55 | if (!(start + length <= part->start_sect || |
56 | start >= part->start_sect + part->nr_sects)) { |
57 | disk_part_iter_exit(&piter); |
58 | mutex_unlock(&bdev->bd_mutex); |
59 | return -EBUSY; |
60 | } |
61 | } |
62 | disk_part_iter_exit(&piter); |
63 | |
64 | /* all seems OK */ |
65 | part = add_partition(disk, partno, start, length, |
66 | ADDPART_FLAG_NONE, NULL); |
67 | mutex_unlock(&bdev->bd_mutex); |
68 | return PTR_ERR_OR_ZERO(part); |
69 | case BLKPG_DEL_PARTITION: |
70 | part = disk_get_part(disk, partno); |
71 | if (!part) |
72 | return -ENXIO; |
73 | |
74 | bdevp = bdget(part_devt(part)); |
75 | disk_put_part(part); |
76 | if (!bdevp) |
77 | return -ENOMEM; |
78 | |
79 | mutex_lock(&bdevp->bd_mutex); |
80 | if (bdevp->bd_openers) { |
81 | mutex_unlock(&bdevp->bd_mutex); |
82 | bdput(bdevp); |
83 | return -EBUSY; |
84 | } |
85 | /* all seems OK */ |
86 | fsync_bdev(bdevp); |
87 | invalidate_bdev(bdevp); |
88 | |
89 | mutex_lock_nested(&bdev->bd_mutex, 1); |
90 | delete_partition(disk, partno); |
91 | mutex_unlock(&bdev->bd_mutex); |
92 | mutex_unlock(&bdevp->bd_mutex); |
93 | bdput(bdevp); |
94 | |
95 | return 0; |
96 | case BLKPG_RESIZE_PARTITION: |
97 | start = p.start >> 9; |
98 | /* new length of partition in bytes */ |
99 | length = p.length >> 9; |
100 | /* check for fit in a hd_struct */ |
101 | if (sizeof(sector_t) == sizeof(long) && |
102 | sizeof(long long) > sizeof(long)) { |
103 | long pstart = start, plength = length; |
104 | if (pstart != start || plength != length |
105 | || pstart < 0 || plength < 0) |
106 | return -EINVAL; |
107 | } |
108 | part = disk_get_part(disk, partno); |
109 | if (!part) |
110 | return -ENXIO; |
111 | bdevp = bdget(part_devt(part)); |
112 | if (!bdevp) { |
113 | disk_put_part(part); |
114 | return -ENOMEM; |
115 | } |
116 | mutex_lock(&bdevp->bd_mutex); |
117 | mutex_lock_nested(&bdev->bd_mutex, 1); |
118 | if (start != part->start_sect) { |
119 | mutex_unlock(&bdevp->bd_mutex); |
120 | mutex_unlock(&bdev->bd_mutex); |
121 | bdput(bdevp); |
122 | disk_put_part(part); |
123 | return -EINVAL; |
124 | } |
125 | /* overlap? */ |
126 | disk_part_iter_init(&piter, disk, |
127 | DISK_PITER_INCL_EMPTY); |
128 | while ((lpart = disk_part_iter_next(&piter))) { |
129 | if (lpart->partno != partno && |
130 | !(start + length <= lpart->start_sect || |
131 | start >= lpart->start_sect + lpart->nr_sects) |
132 | ) { |
133 | disk_part_iter_exit(&piter); |
134 | mutex_unlock(&bdevp->bd_mutex); |
135 | mutex_unlock(&bdev->bd_mutex); |
136 | bdput(bdevp); |
137 | disk_put_part(part); |
138 | return -EBUSY; |
139 | } |
140 | } |
141 | disk_part_iter_exit(&piter); |
142 | part_nr_sects_write(part, (sector_t)length); |
143 | i_size_write(bdevp->bd_inode, p.length); |
144 | mutex_unlock(&bdevp->bd_mutex); |
145 | mutex_unlock(&bdev->bd_mutex); |
146 | bdput(bdevp); |
147 | disk_put_part(part); |
148 | return 0; |
149 | default: |
150 | return -EINVAL; |
151 | } |
152 | } |
153 | |
154 | /* |
155 | * This is an exported API for the block driver, and will not |
156 | * acquire bd_mutex. This API should be used in case that |
157 | * caller has held bd_mutex already. |
158 | */ |
159 | int __blkdev_reread_part(struct block_device *bdev) |
160 | { |
161 | struct gendisk *disk = bdev->bd_disk; |
162 | |
163 | if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) |
164 | return -EINVAL; |
165 | if (!capable(CAP_SYS_ADMIN)) |
166 | return -EACCES; |
167 | |
168 | lockdep_assert_held(&bdev->bd_mutex); |
169 | |
170 | return rescan_partitions(disk, bdev); |
171 | } |
172 | EXPORT_SYMBOL(__blkdev_reread_part); |
173 | |
174 | /* |
175 | * This is an exported API for the block driver, and will |
176 | * try to acquire bd_mutex. If bd_mutex has been held already |
177 | * in current context, please call __blkdev_reread_part(). |
178 | * |
179 | * Make sure the held locks in current context aren't required |
180 | * in open()/close() handler and I/O path for avoiding ABBA deadlock: |
181 | * - bd_mutex is held before calling block driver's open/close |
182 | * handler |
183 | * - reading partition table may submit I/O to the block device |
184 | */ |
185 | int blkdev_reread_part(struct block_device *bdev) |
186 | { |
187 | int res; |
188 | |
189 | mutex_lock(&bdev->bd_mutex); |
190 | res = __blkdev_reread_part(bdev); |
191 | mutex_unlock(&bdev->bd_mutex); |
192 | |
193 | return res; |
194 | } |
195 | EXPORT_SYMBOL(blkdev_reread_part); |
196 | |
197 | static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, |
198 | unsigned long arg, unsigned long flags) |
199 | { |
200 | uint64_t range[2]; |
201 | uint64_t start, len; |
202 | |
203 | if (!(mode & FMODE_WRITE)) |
204 | return -EBADF; |
205 | |
206 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) |
207 | return -EFAULT; |
208 | |
209 | start = range[0]; |
210 | len = range[1]; |
211 | |
212 | if (start & 511) |
213 | return -EINVAL; |
214 | if (len & 511) |
215 | return -EINVAL; |
216 | start >>= 9; |
217 | len >>= 9; |
218 | |
219 | if (start + len > (i_size_read(bdev->bd_inode) >> 9)) |
220 | return -EINVAL; |
221 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); |
222 | } |
223 | |
224 | static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, |
225 | unsigned long arg) |
226 | { |
227 | uint64_t range[2]; |
228 | struct address_space *mapping; |
229 | uint64_t start, end, len; |
230 | |
231 | if (!(mode & FMODE_WRITE)) |
232 | return -EBADF; |
233 | |
234 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) |
235 | return -EFAULT; |
236 | |
237 | start = range[0]; |
238 | len = range[1]; |
239 | end = start + len - 1; |
240 | |
241 | if (start & 511) |
242 | return -EINVAL; |
243 | if (len & 511) |
244 | return -EINVAL; |
245 | if (end >= (uint64_t)i_size_read(bdev->bd_inode)) |
246 | return -EINVAL; |
247 | if (end < start) |
248 | return -EINVAL; |
249 | |
250 | /* Invalidate the page cache, including dirty pages */ |
251 | mapping = bdev->bd_inode->i_mapping; |
252 | truncate_inode_pages_range(mapping, start, end); |
253 | |
254 | return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, |
255 | false); |
256 | } |
257 | |
258 | static int put_ushort(unsigned long arg, unsigned short val) |
259 | { |
260 | return put_user(val, (unsigned short __user *)arg); |
261 | } |
262 | |
263 | static int put_int(unsigned long arg, int val) |
264 | { |
265 | return put_user(val, (int __user *)arg); |
266 | } |
267 | |
268 | static int put_uint(unsigned long arg, unsigned int val) |
269 | { |
270 | return put_user(val, (unsigned int __user *)arg); |
271 | } |
272 | |
273 | static int put_long(unsigned long arg, long val) |
274 | { |
275 | return put_user(val, (long __user *)arg); |
276 | } |
277 | |
278 | static int put_ulong(unsigned long arg, unsigned long val) |
279 | { |
280 | return put_user(val, (unsigned long __user *)arg); |
281 | } |
282 | |
283 | static int put_u64(unsigned long arg, u64 val) |
284 | { |
285 | return put_user(val, (u64 __user *)arg); |
286 | } |
287 | |
288 | int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, |
289 | unsigned cmd, unsigned long arg) |
290 | { |
291 | struct gendisk *disk = bdev->bd_disk; |
292 | |
293 | if (disk->fops->ioctl) |
294 | return disk->fops->ioctl(bdev, mode, cmd, arg); |
295 | |
296 | return -ENOTTY; |
297 | } |
298 | /* |
299 | * For the record: _GPL here is only because somebody decided to slap it |
300 | * on the previous export. Sheer idiocy, since it wasn't copyrightable |
301 | * at all and could be open-coded without any exports by anybody who cares. |
302 | */ |
303 | EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); |
304 | |
305 | static int blkdev_pr_register(struct block_device *bdev, |
306 | struct pr_registration __user *arg) |
307 | { |
308 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
309 | struct pr_registration reg; |
310 | |
311 | if (!capable(CAP_SYS_ADMIN)) |
312 | return -EPERM; |
313 | if (!ops || !ops->pr_register) |
314 | return -EOPNOTSUPP; |
315 | if (copy_from_user(®, arg, sizeof(reg))) |
316 | return -EFAULT; |
317 | |
318 | if (reg.flags & ~PR_FL_IGNORE_KEY) |
319 | return -EOPNOTSUPP; |
320 | return ops->pr_register(bdev, reg.old_key, reg.new_key, reg.flags); |
321 | } |
322 | |
323 | static int blkdev_pr_reserve(struct block_device *bdev, |
324 | struct pr_reservation __user *arg) |
325 | { |
326 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
327 | struct pr_reservation rsv; |
328 | |
329 | if (!capable(CAP_SYS_ADMIN)) |
330 | return -EPERM; |
331 | if (!ops || !ops->pr_reserve) |
332 | return -EOPNOTSUPP; |
333 | if (copy_from_user(&rsv, arg, sizeof(rsv))) |
334 | return -EFAULT; |
335 | |
336 | if (rsv.flags & ~PR_FL_IGNORE_KEY) |
337 | return -EOPNOTSUPP; |
338 | return ops->pr_reserve(bdev, rsv.key, rsv.type, rsv.flags); |
339 | } |
340 | |
341 | static int blkdev_pr_release(struct block_device *bdev, |
342 | struct pr_reservation __user *arg) |
343 | { |
344 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
345 | struct pr_reservation rsv; |
346 | |
347 | if (!capable(CAP_SYS_ADMIN)) |
348 | return -EPERM; |
349 | if (!ops || !ops->pr_release) |
350 | return -EOPNOTSUPP; |
351 | if (copy_from_user(&rsv, arg, sizeof(rsv))) |
352 | return -EFAULT; |
353 | |
354 | if (rsv.flags) |
355 | return -EOPNOTSUPP; |
356 | return ops->pr_release(bdev, rsv.key, rsv.type); |
357 | } |
358 | |
359 | static int blkdev_pr_preempt(struct block_device *bdev, |
360 | struct pr_preempt __user *arg, bool abort) |
361 | { |
362 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
363 | struct pr_preempt p; |
364 | |
365 | if (!capable(CAP_SYS_ADMIN)) |
366 | return -EPERM; |
367 | if (!ops || !ops->pr_preempt) |
368 | return -EOPNOTSUPP; |
369 | if (copy_from_user(&p, arg, sizeof(p))) |
370 | return -EFAULT; |
371 | |
372 | if (p.flags) |
373 | return -EOPNOTSUPP; |
374 | return ops->pr_preempt(bdev, p.old_key, p.new_key, p.type, abort); |
375 | } |
376 | |
377 | static int blkdev_pr_clear(struct block_device *bdev, |
378 | struct pr_clear __user *arg) |
379 | { |
380 | const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; |
381 | struct pr_clear c; |
382 | |
383 | if (!capable(CAP_SYS_ADMIN)) |
384 | return -EPERM; |
385 | if (!ops || !ops->pr_clear) |
386 | return -EOPNOTSUPP; |
387 | if (copy_from_user(&c, arg, sizeof(c))) |
388 | return -EFAULT; |
389 | |
390 | if (c.flags) |
391 | return -EOPNOTSUPP; |
392 | return ops->pr_clear(bdev, c.key); |
393 | } |
394 | |
395 | /* |
396 | * Is it an unrecognized ioctl? The correct returns are either |
397 | * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a |
398 | * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl |
399 | * code before returning. |
400 | * |
401 | * Confused drivers sometimes return EINVAL, which is wrong. It |
402 | * means "I understood the ioctl command, but the parameters to |
403 | * it were wrong". |
404 | * |
405 | * We should aim to just fix the broken drivers, the EINVAL case |
406 | * should go away. |
407 | */ |
408 | static inline int is_unrecognized_ioctl(int ret) |
409 | { |
410 | return ret == -EINVAL || |
411 | ret == -ENOTTY || |
412 | ret == -ENOIOCTLCMD; |
413 | } |
414 | |
415 | static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, |
416 | unsigned cmd, unsigned long arg) |
417 | { |
418 | int ret; |
419 | |
420 | if (!capable(CAP_SYS_ADMIN)) |
421 | return -EACCES; |
422 | |
423 | ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); |
424 | if (!is_unrecognized_ioctl(ret)) |
425 | return ret; |
426 | |
427 | fsync_bdev(bdev); |
428 | invalidate_bdev(bdev); |
429 | return 0; |
430 | } |
431 | |
432 | static int blkdev_roset(struct block_device *bdev, fmode_t mode, |
433 | unsigned cmd, unsigned long arg) |
434 | { |
435 | int ret, n; |
436 | |
437 | ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); |
438 | if (!is_unrecognized_ioctl(ret)) |
439 | return ret; |
440 | if (!capable(CAP_SYS_ADMIN)) |
441 | return -EACCES; |
442 | if (get_user(n, (int __user *)arg)) |
443 | return -EFAULT; |
444 | set_device_ro(bdev, n); |
445 | return 0; |
446 | } |
447 | |
448 | static int blkdev_getgeo(struct block_device *bdev, |
449 | struct hd_geometry __user *argp) |
450 | { |
451 | struct gendisk *disk = bdev->bd_disk; |
452 | struct hd_geometry geo; |
453 | int ret; |
454 | |
455 | if (!argp) |
456 | return -EINVAL; |
457 | if (!disk->fops->getgeo) |
458 | return -ENOTTY; |
459 | |
460 | /* |
461 | * We need to set the startsect first, the driver may |
462 | * want to override it. |
463 | */ |
464 | memset(&geo, 0, sizeof(geo)); |
465 | geo.start = get_start_sect(bdev); |
466 | ret = disk->fops->getgeo(bdev, &geo); |
467 | if (ret) |
468 | return ret; |
469 | if (copy_to_user(argp, &geo, sizeof(geo))) |
470 | return -EFAULT; |
471 | return 0; |
472 | } |
473 | |
474 | /* set the logical block size */ |
475 | static int blkdev_bszset(struct block_device *bdev, fmode_t mode, |
476 | int __user *argp) |
477 | { |
478 | int ret, n; |
479 | |
480 | if (!capable(CAP_SYS_ADMIN)) |
481 | return -EACCES; |
482 | if (!argp) |
483 | return -EINVAL; |
484 | if (get_user(n, argp)) |
485 | return -EFAULT; |
486 | |
487 | if (!(mode & FMODE_EXCL)) { |
488 | bdgrab(bdev); |
489 | if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) |
490 | return -EBUSY; |
491 | } |
492 | |
493 | ret = set_blocksize(bdev, n); |
494 | if (!(mode & FMODE_EXCL)) |
495 | blkdev_put(bdev, mode | FMODE_EXCL); |
496 | return ret; |
497 | } |
498 | |
499 | /* |
500 | * always keep this in sync with compat_blkdev_ioctl() |
501 | */ |
502 | int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, |
503 | unsigned long arg) |
504 | { |
505 | struct backing_dev_info *bdi; |
506 | void __user *argp = (void __user *)arg; |
507 | loff_t size; |
508 | unsigned int max_sectors; |
509 | |
510 | switch (cmd) { |
511 | case BLKFLSBUF: |
512 | return blkdev_flushbuf(bdev, mode, cmd, arg); |
513 | case BLKROSET: |
514 | return blkdev_roset(bdev, mode, cmd, arg); |
515 | case BLKDISCARD: |
516 | return blk_ioctl_discard(bdev, mode, arg, 0); |
517 | case BLKSECDISCARD: |
518 | return blk_ioctl_discard(bdev, mode, arg, |
519 | BLKDEV_DISCARD_SECURE); |
520 | case BLKZEROOUT: |
521 | return blk_ioctl_zeroout(bdev, mode, arg); |
522 | case HDIO_GETGEO: |
523 | return blkdev_getgeo(bdev, argp); |
524 | case BLKRAGET: |
525 | case BLKFRAGET: |
526 | if (!arg) |
527 | return -EINVAL; |
528 | bdi = blk_get_backing_dev_info(bdev); |
529 | return put_long(arg, (bdi->ra_pages * PAGE_SIZE) / 512); |
530 | case BLKROGET: |
531 | return put_int(arg, bdev_read_only(bdev) != 0); |
532 | case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ |
533 | return put_int(arg, block_size(bdev)); |
534 | case BLKSSZGET: /* get block device logical block size */ |
535 | return put_int(arg, bdev_logical_block_size(bdev)); |
536 | case BLKPBSZGET: /* get block device physical block size */ |
537 | return put_uint(arg, bdev_physical_block_size(bdev)); |
538 | case BLKIOMIN: |
539 | return put_uint(arg, bdev_io_min(bdev)); |
540 | case BLKIOOPT: |
541 | return put_uint(arg, bdev_io_opt(bdev)); |
542 | case BLKALIGNOFF: |
543 | return put_int(arg, bdev_alignment_offset(bdev)); |
544 | case BLKDISCARDZEROES: |
545 | return put_uint(arg, bdev_discard_zeroes_data(bdev)); |
546 | case BLKSECTGET: |
547 | max_sectors = min_t(unsigned int, USHRT_MAX, |
548 | queue_max_sectors(bdev_get_queue(bdev))); |
549 | return put_ushort(arg, max_sectors); |
550 | case BLKROTATIONAL: |
551 | return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); |
552 | case BLKRASET: |
553 | case BLKFRASET: |
554 | if(!capable(CAP_SYS_ADMIN)) |
555 | return -EACCES; |
556 | bdi = blk_get_backing_dev_info(bdev); |
557 | bdi->ra_pages = (arg * 512) / PAGE_SIZE; |
558 | return 0; |
559 | case BLKBSZSET: |
560 | return blkdev_bszset(bdev, mode, argp); |
561 | case BLKPG: |
562 | return blkpg_ioctl(bdev, argp); |
563 | case BLKRRPART: |
564 | return blkdev_reread_part(bdev); |
565 | case BLKGETSIZE: |
566 | size = i_size_read(bdev->bd_inode); |
567 | if ((size >> 9) > ~0UL) |
568 | return -EFBIG; |
569 | return put_ulong(arg, size >> 9); |
570 | case BLKGETSIZE64: |
571 | return put_u64(arg, i_size_read(bdev->bd_inode)); |
572 | case BLKTRACESTART: |
573 | case BLKTRACESTOP: |
574 | case BLKTRACESETUP: |
575 | case BLKTRACETEARDOWN: |
576 | return blk_trace_ioctl(bdev, cmd, argp); |
577 | case IOC_PR_REGISTER: |
578 | return blkdev_pr_register(bdev, argp); |
579 | case IOC_PR_RESERVE: |
580 | return blkdev_pr_reserve(bdev, argp); |
581 | case IOC_PR_RELEASE: |
582 | return blkdev_pr_release(bdev, argp); |
583 | case IOC_PR_PREEMPT: |
584 | return blkdev_pr_preempt(bdev, argp, false); |
585 | case IOC_PR_PREEMPT_ABORT: |
586 | return blkdev_pr_preempt(bdev, argp, true); |
587 | case IOC_PR_CLEAR: |
588 | return blkdev_pr_clear(bdev, argp); |
589 | default: |
590 | return __blkdev_driver_ioctl(bdev, mode, cmd, arg); |
591 | } |
592 | } |
593 | EXPORT_SYMBOL_GPL(blkdev_ioctl); |
594 |