blob: ab8dd153838174f3d2fbe1b7a31e98401429fd00
1 | /* |
2 | * fs/timerfd.c |
3 | * |
4 | * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> |
5 | * |
6 | * |
7 | * Thanks to Thomas Gleixner for code reviews and useful comments. |
8 | * |
9 | */ |
10 | |
11 | #include <linux/alarmtimer.h> |
12 | #include <linux/file.h> |
13 | #include <linux/poll.h> |
14 | #include <linux/init.h> |
15 | #include <linux/fs.h> |
16 | #include <linux/sched.h> |
17 | #include <linux/kernel.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/list.h> |
20 | #include <linux/spinlock.h> |
21 | #include <linux/time.h> |
22 | #include <linux/hrtimer.h> |
23 | #include <linux/anon_inodes.h> |
24 | #include <linux/timerfd.h> |
25 | #include <linux/syscalls.h> |
26 | #include <linux/compat.h> |
27 | #include <linux/rcupdate.h> |
28 | |
29 | struct timerfd_ctx { |
30 | union { |
31 | struct hrtimer tmr; |
32 | struct alarm alarm; |
33 | } t; |
34 | ktime_t tintv; |
35 | ktime_t moffs; |
36 | wait_queue_head_t wqh; |
37 | u64 ticks; |
38 | int clockid; |
39 | short unsigned expired; |
40 | short unsigned settime_flags; /* to show in fdinfo */ |
41 | struct rcu_head rcu; |
42 | struct list_head clist; |
43 | spinlock_t cancel_lock; |
44 | bool might_cancel; |
45 | }; |
46 | |
47 | static LIST_HEAD(cancel_list); |
48 | static DEFINE_SPINLOCK(cancel_lock); |
49 | |
50 | static inline bool isalarm(struct timerfd_ctx *ctx) |
51 | { |
52 | return ctx->clockid == CLOCK_REALTIME_ALARM || |
53 | ctx->clockid == CLOCK_BOOTTIME_ALARM; |
54 | } |
55 | |
56 | /* |
57 | * This gets called when the timer event triggers. We set the "expired" |
58 | * flag, but we do not re-arm the timer (in case it's necessary, |
59 | * tintv.tv64 != 0) until the timer is accessed. |
60 | */ |
61 | static void timerfd_triggered(struct timerfd_ctx *ctx) |
62 | { |
63 | unsigned long flags; |
64 | |
65 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
66 | ctx->expired = 1; |
67 | ctx->ticks++; |
68 | wake_up_locked(&ctx->wqh); |
69 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
70 | } |
71 | |
72 | static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) |
73 | { |
74 | struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, |
75 | t.tmr); |
76 | timerfd_triggered(ctx); |
77 | return HRTIMER_NORESTART; |
78 | } |
79 | |
80 | static enum alarmtimer_restart timerfd_alarmproc(struct alarm *alarm, |
81 | ktime_t now) |
82 | { |
83 | struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, |
84 | t.alarm); |
85 | timerfd_triggered(ctx); |
86 | return ALARMTIMER_NORESTART; |
87 | } |
88 | |
89 | /* |
90 | * Called when the clock was set to cancel the timers in the cancel |
91 | * list. This will wake up processes waiting on these timers. The |
92 | * wake-up requires ctx->ticks to be non zero, therefore we increment |
93 | * it before calling wake_up_locked(). |
94 | */ |
95 | void timerfd_clock_was_set(void) |
96 | { |
97 | ktime_t moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
98 | struct timerfd_ctx *ctx; |
99 | unsigned long flags; |
100 | |
101 | rcu_read_lock(); |
102 | list_for_each_entry_rcu(ctx, &cancel_list, clist) { |
103 | if (!ctx->might_cancel) |
104 | continue; |
105 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
106 | if (ctx->moffs.tv64 != moffs.tv64) { |
107 | ctx->moffs.tv64 = KTIME_MAX; |
108 | ctx->ticks++; |
109 | wake_up_locked(&ctx->wqh); |
110 | } |
111 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
112 | } |
113 | rcu_read_unlock(); |
114 | } |
115 | |
116 | static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) |
117 | { |
118 | if (ctx->might_cancel) { |
119 | ctx->might_cancel = false; |
120 | spin_lock(&cancel_lock); |
121 | list_del_rcu(&ctx->clist); |
122 | spin_unlock(&cancel_lock); |
123 | } |
124 | } |
125 | |
126 | static void timerfd_remove_cancel(struct timerfd_ctx *ctx) |
127 | { |
128 | spin_lock(&ctx->cancel_lock); |
129 | __timerfd_remove_cancel(ctx); |
130 | spin_unlock(&ctx->cancel_lock); |
131 | } |
132 | |
133 | static bool timerfd_canceled(struct timerfd_ctx *ctx) |
134 | { |
135 | if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) |
136 | return false; |
137 | ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
138 | return true; |
139 | } |
140 | |
141 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) |
142 | { |
143 | spin_lock(&ctx->cancel_lock); |
144 | if ((ctx->clockid == CLOCK_REALTIME || |
145 | ctx->clockid == CLOCK_REALTIME_ALARM) && |
146 | (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { |
147 | if (!ctx->might_cancel) { |
148 | ctx->might_cancel = true; |
149 | spin_lock(&cancel_lock); |
150 | list_add_rcu(&ctx->clist, &cancel_list); |
151 | spin_unlock(&cancel_lock); |
152 | } |
153 | } else { |
154 | __timerfd_remove_cancel(ctx); |
155 | } |
156 | spin_unlock(&ctx->cancel_lock); |
157 | } |
158 | |
159 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
160 | { |
161 | ktime_t remaining; |
162 | |
163 | if (isalarm(ctx)) |
164 | remaining = alarm_expires_remaining(&ctx->t.alarm); |
165 | else |
166 | remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); |
167 | |
168 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
169 | } |
170 | |
171 | static int timerfd_setup(struct timerfd_ctx *ctx, int flags, |
172 | const struct itimerspec *ktmr) |
173 | { |
174 | enum hrtimer_mode htmode; |
175 | ktime_t texp; |
176 | int clockid = ctx->clockid; |
177 | |
178 | htmode = (flags & TFD_TIMER_ABSTIME) ? |
179 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; |
180 | |
181 | texp = timespec_to_ktime(ktmr->it_value); |
182 | ctx->expired = 0; |
183 | ctx->ticks = 0; |
184 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
185 | |
186 | if (isalarm(ctx)) { |
187 | alarm_init(&ctx->t.alarm, |
188 | ctx->clockid == CLOCK_REALTIME_ALARM ? |
189 | ALARM_REALTIME : ALARM_BOOTTIME, |
190 | timerfd_alarmproc); |
191 | } else { |
192 | hrtimer_init(&ctx->t.tmr, clockid, htmode); |
193 | hrtimer_set_expires(&ctx->t.tmr, texp); |
194 | ctx->t.tmr.function = timerfd_tmrproc; |
195 | } |
196 | |
197 | if (texp.tv64 != 0) { |
198 | if (isalarm(ctx)) { |
199 | if (flags & TFD_TIMER_ABSTIME) |
200 | alarm_start(&ctx->t.alarm, texp); |
201 | else |
202 | alarm_start_relative(&ctx->t.alarm, texp); |
203 | } else { |
204 | hrtimer_start(&ctx->t.tmr, texp, htmode); |
205 | } |
206 | |
207 | if (timerfd_canceled(ctx)) |
208 | return -ECANCELED; |
209 | } |
210 | |
211 | ctx->settime_flags = flags & TFD_SETTIME_FLAGS; |
212 | return 0; |
213 | } |
214 | |
215 | static int timerfd_release(struct inode *inode, struct file *file) |
216 | { |
217 | struct timerfd_ctx *ctx = file->private_data; |
218 | |
219 | timerfd_remove_cancel(ctx); |
220 | |
221 | if (isalarm(ctx)) |
222 | alarm_cancel(&ctx->t.alarm); |
223 | else |
224 | hrtimer_cancel(&ctx->t.tmr); |
225 | kfree_rcu(ctx, rcu); |
226 | return 0; |
227 | } |
228 | |
229 | static unsigned int timerfd_poll(struct file *file, poll_table *wait) |
230 | { |
231 | struct timerfd_ctx *ctx = file->private_data; |
232 | unsigned int events = 0; |
233 | unsigned long flags; |
234 | |
235 | poll_wait(file, &ctx->wqh, wait); |
236 | |
237 | spin_lock_irqsave(&ctx->wqh.lock, flags); |
238 | if (ctx->ticks) |
239 | events |= POLLIN; |
240 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); |
241 | |
242 | return events; |
243 | } |
244 | |
245 | static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, |
246 | loff_t *ppos) |
247 | { |
248 | struct timerfd_ctx *ctx = file->private_data; |
249 | ssize_t res; |
250 | u64 ticks = 0; |
251 | |
252 | if (count < sizeof(ticks)) |
253 | return -EINVAL; |
254 | spin_lock_irq(&ctx->wqh.lock); |
255 | if (file->f_flags & O_NONBLOCK) |
256 | res = -EAGAIN; |
257 | else |
258 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
259 | |
260 | /* |
261 | * If clock has changed, we do not care about the |
262 | * ticks and we do not rearm the timer. Userspace must |
263 | * reevaluate anyway. |
264 | */ |
265 | if (timerfd_canceled(ctx)) { |
266 | ctx->ticks = 0; |
267 | ctx->expired = 0; |
268 | res = -ECANCELED; |
269 | } |
270 | |
271 | if (ctx->ticks) { |
272 | ticks = ctx->ticks; |
273 | |
274 | if (ctx->expired && ctx->tintv.tv64) { |
275 | /* |
276 | * If tintv.tv64 != 0, this is a periodic timer that |
277 | * needs to be re-armed. We avoid doing it in the timer |
278 | * callback to avoid DoS attacks specifying a very |
279 | * short timer period. |
280 | */ |
281 | if (isalarm(ctx)) { |
282 | ticks += alarm_forward_now( |
283 | &ctx->t.alarm, ctx->tintv) - 1; |
284 | alarm_restart(&ctx->t.alarm); |
285 | } else { |
286 | ticks += hrtimer_forward_now(&ctx->t.tmr, |
287 | ctx->tintv) - 1; |
288 | hrtimer_restart(&ctx->t.tmr); |
289 | } |
290 | } |
291 | ctx->expired = 0; |
292 | ctx->ticks = 0; |
293 | } |
294 | spin_unlock_irq(&ctx->wqh.lock); |
295 | if (ticks) |
296 | res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks); |
297 | return res; |
298 | } |
299 | |
300 | #ifdef CONFIG_PROC_FS |
301 | static void timerfd_show(struct seq_file *m, struct file *file) |
302 | { |
303 | struct timerfd_ctx *ctx = file->private_data; |
304 | struct itimerspec t; |
305 | |
306 | spin_lock_irq(&ctx->wqh.lock); |
307 | t.it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
308 | t.it_interval = ktime_to_timespec(ctx->tintv); |
309 | spin_unlock_irq(&ctx->wqh.lock); |
310 | |
311 | seq_printf(m, |
312 | "clockid: %d\n" |
313 | "ticks: %llu\n" |
314 | "settime flags: 0%o\n" |
315 | "it_value: (%llu, %llu)\n" |
316 | "it_interval: (%llu, %llu)\n", |
317 | ctx->clockid, |
318 | (unsigned long long)ctx->ticks, |
319 | ctx->settime_flags, |
320 | (unsigned long long)t.it_value.tv_sec, |
321 | (unsigned long long)t.it_value.tv_nsec, |
322 | (unsigned long long)t.it_interval.tv_sec, |
323 | (unsigned long long)t.it_interval.tv_nsec); |
324 | } |
325 | #else |
326 | #define timerfd_show NULL |
327 | #endif |
328 | |
329 | #ifdef CONFIG_CHECKPOINT_RESTORE |
330 | static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) |
331 | { |
332 | struct timerfd_ctx *ctx = file->private_data; |
333 | int ret = 0; |
334 | |
335 | switch (cmd) { |
336 | case TFD_IOC_SET_TICKS: { |
337 | u64 ticks; |
338 | |
339 | if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) |
340 | return -EFAULT; |
341 | if (!ticks) |
342 | return -EINVAL; |
343 | |
344 | spin_lock_irq(&ctx->wqh.lock); |
345 | if (!timerfd_canceled(ctx)) { |
346 | ctx->ticks = ticks; |
347 | wake_up_locked(&ctx->wqh); |
348 | } else |
349 | ret = -ECANCELED; |
350 | spin_unlock_irq(&ctx->wqh.lock); |
351 | break; |
352 | } |
353 | default: |
354 | ret = -ENOTTY; |
355 | break; |
356 | } |
357 | |
358 | return ret; |
359 | } |
360 | #else |
361 | #define timerfd_ioctl NULL |
362 | #endif |
363 | |
364 | static const struct file_operations timerfd_fops = { |
365 | .release = timerfd_release, |
366 | .poll = timerfd_poll, |
367 | .read = timerfd_read, |
368 | .llseek = noop_llseek, |
369 | .show_fdinfo = timerfd_show, |
370 | .unlocked_ioctl = timerfd_ioctl, |
371 | }; |
372 | |
373 | static int timerfd_fget(int fd, struct fd *p) |
374 | { |
375 | struct fd f = fdget(fd); |
376 | if (!f.file) |
377 | return -EBADF; |
378 | if (f.file->f_op != &timerfd_fops) { |
379 | fdput(f); |
380 | return -EINVAL; |
381 | } |
382 | *p = f; |
383 | return 0; |
384 | } |
385 | |
386 | SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) |
387 | { |
388 | int ufd; |
389 | struct timerfd_ctx *ctx; |
390 | |
391 | /* Check the TFD_* constants for consistency. */ |
392 | BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); |
393 | BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); |
394 | |
395 | if ((flags & ~TFD_CREATE_FLAGS) || |
396 | (clockid != CLOCK_MONOTONIC && |
397 | clockid != CLOCK_REALTIME && |
398 | clockid != CLOCK_REALTIME_ALARM && |
399 | clockid != CLOCK_BOOTTIME && |
400 | clockid != CLOCK_BOOTTIME_ALARM)) |
401 | return -EINVAL; |
402 | |
403 | if (!capable(CAP_WAKE_ALARM) && |
404 | (clockid == CLOCK_REALTIME_ALARM || |
405 | clockid == CLOCK_BOOTTIME_ALARM)) |
406 | return -EPERM; |
407 | |
408 | ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); |
409 | if (!ctx) |
410 | return -ENOMEM; |
411 | |
412 | init_waitqueue_head(&ctx->wqh); |
413 | spin_lock_init(&ctx->cancel_lock); |
414 | ctx->clockid = clockid; |
415 | |
416 | if (isalarm(ctx)) |
417 | alarm_init(&ctx->t.alarm, |
418 | ctx->clockid == CLOCK_REALTIME_ALARM ? |
419 | ALARM_REALTIME : ALARM_BOOTTIME, |
420 | timerfd_alarmproc); |
421 | else |
422 | hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); |
423 | |
424 | ctx->moffs = ktime_mono_to_real((ktime_t){ .tv64 = 0 }); |
425 | |
426 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
427 | O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); |
428 | if (ufd < 0) |
429 | kfree(ctx); |
430 | |
431 | return ufd; |
432 | } |
433 | |
434 | static int do_timerfd_settime(int ufd, int flags, |
435 | const struct itimerspec *new, |
436 | struct itimerspec *old) |
437 | { |
438 | struct fd f; |
439 | struct timerfd_ctx *ctx; |
440 | int ret; |
441 | |
442 | if ((flags & ~TFD_SETTIME_FLAGS) || |
443 | !timespec_valid(&new->it_value) || |
444 | !timespec_valid(&new->it_interval)) |
445 | return -EINVAL; |
446 | |
447 | ret = timerfd_fget(ufd, &f); |
448 | if (ret) |
449 | return ret; |
450 | ctx = f.file->private_data; |
451 | |
452 | if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { |
453 | fdput(f); |
454 | return -EPERM; |
455 | } |
456 | |
457 | timerfd_setup_cancel(ctx, flags); |
458 | |
459 | /* |
460 | * We need to stop the existing timer before reprogramming |
461 | * it to the new values. |
462 | */ |
463 | for (;;) { |
464 | spin_lock_irq(&ctx->wqh.lock); |
465 | |
466 | if (isalarm(ctx)) { |
467 | if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) |
468 | break; |
469 | } else { |
470 | if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) |
471 | break; |
472 | } |
473 | spin_unlock_irq(&ctx->wqh.lock); |
474 | cpu_relax(); |
475 | } |
476 | |
477 | /* |
478 | * If the timer is expired and it's periodic, we need to advance it |
479 | * because the caller may want to know the previous expiration time. |
480 | * We do not update "ticks" and "expired" since the timer will be |
481 | * re-programmed again in the following timerfd_setup() call. |
482 | */ |
483 | if (ctx->expired && ctx->tintv.tv64) { |
484 | if (isalarm(ctx)) |
485 | alarm_forward_now(&ctx->t.alarm, ctx->tintv); |
486 | else |
487 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); |
488 | } |
489 | |
490 | old->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
491 | old->it_interval = ktime_to_timespec(ctx->tintv); |
492 | |
493 | /* |
494 | * Re-program the timer to the new value ... |
495 | */ |
496 | ret = timerfd_setup(ctx, flags, new); |
497 | |
498 | spin_unlock_irq(&ctx->wqh.lock); |
499 | fdput(f); |
500 | return ret; |
501 | } |
502 | |
503 | static int do_timerfd_gettime(int ufd, struct itimerspec *t) |
504 | { |
505 | struct fd f; |
506 | struct timerfd_ctx *ctx; |
507 | int ret = timerfd_fget(ufd, &f); |
508 | if (ret) |
509 | return ret; |
510 | ctx = f.file->private_data; |
511 | |
512 | spin_lock_irq(&ctx->wqh.lock); |
513 | if (ctx->expired && ctx->tintv.tv64) { |
514 | ctx->expired = 0; |
515 | |
516 | if (isalarm(ctx)) { |
517 | ctx->ticks += |
518 | alarm_forward_now( |
519 | &ctx->t.alarm, ctx->tintv) - 1; |
520 | alarm_restart(&ctx->t.alarm); |
521 | } else { |
522 | ctx->ticks += |
523 | hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) |
524 | - 1; |
525 | hrtimer_restart(&ctx->t.tmr); |
526 | } |
527 | } |
528 | t->it_value = ktime_to_timespec(timerfd_get_remaining(ctx)); |
529 | t->it_interval = ktime_to_timespec(ctx->tintv); |
530 | spin_unlock_irq(&ctx->wqh.lock); |
531 | fdput(f); |
532 | return 0; |
533 | } |
534 | |
535 | SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
536 | const struct itimerspec __user *, utmr, |
537 | struct itimerspec __user *, otmr) |
538 | { |
539 | struct itimerspec new, old; |
540 | int ret; |
541 | |
542 | if (copy_from_user(&new, utmr, sizeof(new))) |
543 | return -EFAULT; |
544 | ret = do_timerfd_settime(ufd, flags, &new, &old); |
545 | if (ret) |
546 | return ret; |
547 | if (otmr && copy_to_user(otmr, &old, sizeof(old))) |
548 | return -EFAULT; |
549 | |
550 | return ret; |
551 | } |
552 | |
553 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) |
554 | { |
555 | struct itimerspec kotmr; |
556 | int ret = do_timerfd_gettime(ufd, &kotmr); |
557 | if (ret) |
558 | return ret; |
559 | return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0; |
560 | } |
561 | |
562 | #ifdef CONFIG_COMPAT |
563 | COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, |
564 | const struct compat_itimerspec __user *, utmr, |
565 | struct compat_itimerspec __user *, otmr) |
566 | { |
567 | struct itimerspec new, old; |
568 | int ret; |
569 | |
570 | if (get_compat_itimerspec(&new, utmr)) |
571 | return -EFAULT; |
572 | ret = do_timerfd_settime(ufd, flags, &new, &old); |
573 | if (ret) |
574 | return ret; |
575 | if (otmr && put_compat_itimerspec(otmr, &old)) |
576 | return -EFAULT; |
577 | return ret; |
578 | } |
579 | |
580 | COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd, |
581 | struct compat_itimerspec __user *, otmr) |
582 | { |
583 | struct itimerspec kotmr; |
584 | int ret = do_timerfd_gettime(ufd, &kotmr); |
585 | if (ret) |
586 | return ret; |
587 | return put_compat_itimerspec(otmr, &kotmr) ? -EFAULT: 0; |
588 | } |
589 | #endif |
590 |