blob: 9588780ad43e175c795580275f6ddc62be4f670a
1 | /* |
2 | * fs/libfs.c |
3 | * Library for filesystems writers. |
4 | */ |
5 | |
6 | #include <linux/blkdev.h> |
7 | #include <linux/export.h> |
8 | #include <linux/pagemap.h> |
9 | #include <linux/slab.h> |
10 | #include <linux/mount.h> |
11 | #include <linux/vfs.h> |
12 | #include <linux/quotaops.h> |
13 | #include <linux/mutex.h> |
14 | #include <linux/namei.h> |
15 | #include <linux/exportfs.h> |
16 | #include <linux/writeback.h> |
17 | #include <linux/buffer_head.h> /* sync_mapping_buffers */ |
18 | |
19 | #include <asm/uaccess.h> |
20 | |
21 | #include "internal.h" |
22 | |
23 | int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, |
24 | struct kstat *stat) |
25 | { |
26 | struct inode *inode = d_inode(dentry); |
27 | generic_fillattr(inode, stat); |
28 | stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); |
29 | return 0; |
30 | } |
31 | EXPORT_SYMBOL(simple_getattr); |
32 | |
33 | int simple_statfs(struct dentry *dentry, struct kstatfs *buf) |
34 | { |
35 | buf->f_type = dentry->d_sb->s_magic; |
36 | buf->f_bsize = PAGE_SIZE; |
37 | buf->f_namelen = NAME_MAX; |
38 | return 0; |
39 | } |
40 | EXPORT_SYMBOL(simple_statfs); |
41 | |
42 | /* |
43 | * Retaining negative dentries for an in-memory filesystem just wastes |
44 | * memory and lookup time: arrange for them to be deleted immediately. |
45 | */ |
46 | int always_delete_dentry(const struct dentry *dentry) |
47 | { |
48 | return 1; |
49 | } |
50 | EXPORT_SYMBOL(always_delete_dentry); |
51 | |
52 | const struct dentry_operations simple_dentry_operations = { |
53 | .d_delete = always_delete_dentry, |
54 | }; |
55 | EXPORT_SYMBOL(simple_dentry_operations); |
56 | |
57 | /* |
58 | * Lookup the data. This is trivial - if the dentry didn't already |
59 | * exist, we know it is negative. Set d_op to delete negative dentries. |
60 | */ |
61 | struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
62 | { |
63 | if (dentry->d_name.len > NAME_MAX) |
64 | return ERR_PTR(-ENAMETOOLONG); |
65 | if (!dentry->d_sb->s_d_op) |
66 | d_set_d_op(dentry, &simple_dentry_operations); |
67 | d_add(dentry, NULL); |
68 | return NULL; |
69 | } |
70 | EXPORT_SYMBOL(simple_lookup); |
71 | |
72 | int dcache_dir_open(struct inode *inode, struct file *file) |
73 | { |
74 | file->private_data = d_alloc_cursor(file->f_path.dentry); |
75 | |
76 | return file->private_data ? 0 : -ENOMEM; |
77 | } |
78 | EXPORT_SYMBOL(dcache_dir_open); |
79 | |
80 | int dcache_dir_close(struct inode *inode, struct file *file) |
81 | { |
82 | dput(file->private_data); |
83 | return 0; |
84 | } |
85 | EXPORT_SYMBOL(dcache_dir_close); |
86 | |
87 | /* parent is locked at least shared */ |
88 | static struct dentry *next_positive(struct dentry *parent, |
89 | struct list_head *from, |
90 | int count) |
91 | { |
92 | unsigned *seq = &parent->d_inode->i_dir_seq, n; |
93 | struct dentry *res; |
94 | struct list_head *p; |
95 | bool skipped; |
96 | int i; |
97 | |
98 | retry: |
99 | i = count; |
100 | skipped = false; |
101 | n = smp_load_acquire(seq) & ~1; |
102 | res = NULL; |
103 | rcu_read_lock(); |
104 | for (p = from->next; p != &parent->d_subdirs; p = p->next) { |
105 | struct dentry *d = list_entry(p, struct dentry, d_child); |
106 | if (!simple_positive(d)) { |
107 | skipped = true; |
108 | } else if (!--i) { |
109 | res = d; |
110 | break; |
111 | } |
112 | } |
113 | rcu_read_unlock(); |
114 | if (skipped) { |
115 | smp_rmb(); |
116 | if (unlikely(*seq != n)) |
117 | goto retry; |
118 | } |
119 | return res; |
120 | } |
121 | |
122 | static void move_cursor(struct dentry *cursor, struct list_head *after) |
123 | { |
124 | struct dentry *parent = cursor->d_parent; |
125 | unsigned n, *seq = &parent->d_inode->i_dir_seq; |
126 | spin_lock(&parent->d_lock); |
127 | for (;;) { |
128 | n = *seq; |
129 | if (!(n & 1) && cmpxchg(seq, n, n + 1) == n) |
130 | break; |
131 | cpu_relax(); |
132 | } |
133 | __list_del(cursor->d_child.prev, cursor->d_child.next); |
134 | if (after) |
135 | list_add(&cursor->d_child, after); |
136 | else |
137 | list_add_tail(&cursor->d_child, &parent->d_subdirs); |
138 | smp_store_release(seq, n + 2); |
139 | spin_unlock(&parent->d_lock); |
140 | } |
141 | |
142 | loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) |
143 | { |
144 | struct dentry *dentry = file->f_path.dentry; |
145 | switch (whence) { |
146 | case 1: |
147 | offset += file->f_pos; |
148 | case 0: |
149 | if (offset >= 0) |
150 | break; |
151 | default: |
152 | return -EINVAL; |
153 | } |
154 | if (offset != file->f_pos) { |
155 | file->f_pos = offset; |
156 | if (file->f_pos >= 2) { |
157 | struct dentry *cursor = file->private_data; |
158 | struct dentry *to; |
159 | loff_t n = file->f_pos - 2; |
160 | |
161 | inode_lock_shared(dentry->d_inode); |
162 | to = next_positive(dentry, &dentry->d_subdirs, n); |
163 | move_cursor(cursor, to ? &to->d_child : NULL); |
164 | inode_unlock_shared(dentry->d_inode); |
165 | } |
166 | } |
167 | return offset; |
168 | } |
169 | EXPORT_SYMBOL(dcache_dir_lseek); |
170 | |
171 | /* Relationship between i_mode and the DT_xxx types */ |
172 | static inline unsigned char dt_type(struct inode *inode) |
173 | { |
174 | return (inode->i_mode >> 12) & 15; |
175 | } |
176 | |
177 | /* |
178 | * Directory is locked and all positive dentries in it are safe, since |
179 | * for ramfs-type trees they can't go away without unlink() or rmdir(), |
180 | * both impossible due to the lock on directory. |
181 | */ |
182 | |
183 | int dcache_readdir(struct file *file, struct dir_context *ctx) |
184 | { |
185 | struct dentry *dentry = file->f_path.dentry; |
186 | struct dentry *cursor = file->private_data; |
187 | struct list_head *p = &cursor->d_child; |
188 | struct dentry *next; |
189 | bool moved = false; |
190 | |
191 | if (!dir_emit_dots(file, ctx)) |
192 | return 0; |
193 | |
194 | if (ctx->pos == 2) |
195 | p = &dentry->d_subdirs; |
196 | while ((next = next_positive(dentry, p, 1)) != NULL) { |
197 | if (!dir_emit(ctx, next->d_name.name, next->d_name.len, |
198 | d_inode(next)->i_ino, dt_type(d_inode(next)))) |
199 | break; |
200 | moved = true; |
201 | p = &next->d_child; |
202 | ctx->pos++; |
203 | } |
204 | if (moved) |
205 | move_cursor(cursor, p); |
206 | return 0; |
207 | } |
208 | EXPORT_SYMBOL(dcache_readdir); |
209 | |
210 | ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) |
211 | { |
212 | return -EISDIR; |
213 | } |
214 | EXPORT_SYMBOL(generic_read_dir); |
215 | |
216 | const struct file_operations simple_dir_operations = { |
217 | .open = dcache_dir_open, |
218 | .release = dcache_dir_close, |
219 | .llseek = dcache_dir_lseek, |
220 | .read = generic_read_dir, |
221 | .iterate_shared = dcache_readdir, |
222 | .fsync = noop_fsync, |
223 | }; |
224 | EXPORT_SYMBOL(simple_dir_operations); |
225 | |
226 | const struct inode_operations simple_dir_inode_operations = { |
227 | .lookup = simple_lookup, |
228 | }; |
229 | EXPORT_SYMBOL(simple_dir_inode_operations); |
230 | |
231 | static const struct super_operations simple_super_operations = { |
232 | .statfs = simple_statfs, |
233 | }; |
234 | |
235 | /* |
236 | * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that |
237 | * will never be mountable) |
238 | */ |
239 | struct dentry *mount_pseudo_xattr(struct file_system_type *fs_type, char *name, |
240 | const struct super_operations *ops, const struct xattr_handler **xattr, |
241 | const struct dentry_operations *dops, unsigned long magic) |
242 | { |
243 | struct super_block *s; |
244 | struct dentry *dentry; |
245 | struct inode *root; |
246 | struct qstr d_name = QSTR_INIT(name, strlen(name)); |
247 | |
248 | s = sget_userns(fs_type, NULL, set_anon_super, MS_KERNMOUNT|MS_NOUSER, |
249 | &init_user_ns, NULL); |
250 | if (IS_ERR(s)) |
251 | return ERR_CAST(s); |
252 | |
253 | s->s_maxbytes = MAX_LFS_FILESIZE; |
254 | s->s_blocksize = PAGE_SIZE; |
255 | s->s_blocksize_bits = PAGE_SHIFT; |
256 | s->s_magic = magic; |
257 | s->s_op = ops ? ops : &simple_super_operations; |
258 | s->s_xattr = xattr; |
259 | s->s_time_gran = 1; |
260 | root = new_inode(s); |
261 | if (!root) |
262 | goto Enomem; |
263 | /* |
264 | * since this is the first inode, make it number 1. New inodes created |
265 | * after this must take care not to collide with it (by passing |
266 | * max_reserved of 1 to iunique). |
267 | */ |
268 | root->i_ino = 1; |
269 | root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; |
270 | root->i_atime = root->i_mtime = root->i_ctime = current_time(root); |
271 | dentry = __d_alloc(s, &d_name); |
272 | if (!dentry) { |
273 | iput(root); |
274 | goto Enomem; |
275 | } |
276 | d_instantiate(dentry, root); |
277 | s->s_root = dentry; |
278 | s->s_d_op = dops; |
279 | s->s_flags |= MS_ACTIVE; |
280 | return dget(s->s_root); |
281 | |
282 | Enomem: |
283 | deactivate_locked_super(s); |
284 | return ERR_PTR(-ENOMEM); |
285 | } |
286 | EXPORT_SYMBOL(mount_pseudo_xattr); |
287 | |
288 | int simple_open(struct inode *inode, struct file *file) |
289 | { |
290 | if (inode->i_private) |
291 | file->private_data = inode->i_private; |
292 | return 0; |
293 | } |
294 | EXPORT_SYMBOL(simple_open); |
295 | |
296 | int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) |
297 | { |
298 | struct inode *inode = d_inode(old_dentry); |
299 | |
300 | inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); |
301 | inc_nlink(inode); |
302 | ihold(inode); |
303 | dget(dentry); |
304 | d_instantiate(dentry, inode); |
305 | return 0; |
306 | } |
307 | EXPORT_SYMBOL(simple_link); |
308 | |
309 | int simple_empty(struct dentry *dentry) |
310 | { |
311 | struct dentry *child; |
312 | int ret = 0; |
313 | |
314 | spin_lock(&dentry->d_lock); |
315 | list_for_each_entry(child, &dentry->d_subdirs, d_child) { |
316 | spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); |
317 | if (simple_positive(child)) { |
318 | spin_unlock(&child->d_lock); |
319 | goto out; |
320 | } |
321 | spin_unlock(&child->d_lock); |
322 | } |
323 | ret = 1; |
324 | out: |
325 | spin_unlock(&dentry->d_lock); |
326 | return ret; |
327 | } |
328 | EXPORT_SYMBOL(simple_empty); |
329 | |
330 | int simple_unlink(struct inode *dir, struct dentry *dentry) |
331 | { |
332 | struct inode *inode = d_inode(dentry); |
333 | |
334 | inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); |
335 | drop_nlink(inode); |
336 | dput(dentry); |
337 | return 0; |
338 | } |
339 | EXPORT_SYMBOL(simple_unlink); |
340 | |
341 | int simple_rmdir(struct inode *dir, struct dentry *dentry) |
342 | { |
343 | if (!simple_empty(dentry)) |
344 | return -ENOTEMPTY; |
345 | |
346 | drop_nlink(d_inode(dentry)); |
347 | simple_unlink(dir, dentry); |
348 | drop_nlink(dir); |
349 | return 0; |
350 | } |
351 | EXPORT_SYMBOL(simple_rmdir); |
352 | |
353 | int simple_rename(struct inode *old_dir, struct dentry *old_dentry, |
354 | struct inode *new_dir, struct dentry *new_dentry, |
355 | unsigned int flags) |
356 | { |
357 | struct inode *inode = d_inode(old_dentry); |
358 | int they_are_dirs = d_is_dir(old_dentry); |
359 | |
360 | if (flags & ~RENAME_NOREPLACE) |
361 | return -EINVAL; |
362 | |
363 | if (!simple_empty(new_dentry)) |
364 | return -ENOTEMPTY; |
365 | |
366 | if (d_really_is_positive(new_dentry)) { |
367 | simple_unlink(new_dir, new_dentry); |
368 | if (they_are_dirs) { |
369 | drop_nlink(d_inode(new_dentry)); |
370 | drop_nlink(old_dir); |
371 | } |
372 | } else if (they_are_dirs) { |
373 | drop_nlink(old_dir); |
374 | inc_nlink(new_dir); |
375 | } |
376 | |
377 | old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = |
378 | new_dir->i_mtime = inode->i_ctime = current_time(old_dir); |
379 | |
380 | return 0; |
381 | } |
382 | EXPORT_SYMBOL(simple_rename); |
383 | |
384 | /** |
385 | * simple_setattr - setattr for simple filesystem |
386 | * @dentry: dentry |
387 | * @iattr: iattr structure |
388 | * |
389 | * Returns 0 on success, -error on failure. |
390 | * |
391 | * simple_setattr is a simple ->setattr implementation without a proper |
392 | * implementation of size changes. |
393 | * |
394 | * It can either be used for in-memory filesystems or special files |
395 | * on simple regular filesystems. Anything that needs to change on-disk |
396 | * or wire state on size changes needs its own setattr method. |
397 | */ |
398 | int simple_setattr(struct dentry *dentry, struct iattr *iattr) |
399 | { |
400 | struct inode *inode = d_inode(dentry); |
401 | int error; |
402 | |
403 | error = setattr_prepare(dentry, iattr); |
404 | if (error) |
405 | return error; |
406 | |
407 | if (iattr->ia_valid & ATTR_SIZE) |
408 | truncate_setsize(inode, iattr->ia_size); |
409 | setattr_copy(inode, iattr); |
410 | mark_inode_dirty(inode); |
411 | return 0; |
412 | } |
413 | EXPORT_SYMBOL(simple_setattr); |
414 | |
415 | int simple_readpage(struct file *file, struct page *page) |
416 | { |
417 | clear_highpage(page); |
418 | flush_dcache_page(page); |
419 | SetPageUptodate(page); |
420 | unlock_page(page); |
421 | return 0; |
422 | } |
423 | EXPORT_SYMBOL(simple_readpage); |
424 | |
425 | int simple_write_begin(struct file *file, struct address_space *mapping, |
426 | loff_t pos, unsigned len, unsigned flags, |
427 | struct page **pagep, void **fsdata) |
428 | { |
429 | struct page *page; |
430 | pgoff_t index; |
431 | |
432 | index = pos >> PAGE_SHIFT; |
433 | |
434 | page = grab_cache_page_write_begin(mapping, index, flags); |
435 | if (!page) |
436 | return -ENOMEM; |
437 | |
438 | *pagep = page; |
439 | |
440 | if (!PageUptodate(page) && (len != PAGE_SIZE)) { |
441 | unsigned from = pos & (PAGE_SIZE - 1); |
442 | |
443 | zero_user_segments(page, 0, from, from + len, PAGE_SIZE); |
444 | } |
445 | return 0; |
446 | } |
447 | EXPORT_SYMBOL(simple_write_begin); |
448 | |
449 | /** |
450 | * simple_write_end - .write_end helper for non-block-device FSes |
451 | * @available: See .write_end of address_space_operations |
452 | * @file: " |
453 | * @mapping: " |
454 | * @pos: " |
455 | * @len: " |
456 | * @copied: " |
457 | * @page: " |
458 | * @fsdata: " |
459 | * |
460 | * simple_write_end does the minimum needed for updating a page after writing is |
461 | * done. It has the same API signature as the .write_end of |
462 | * address_space_operations vector. So it can just be set onto .write_end for |
463 | * FSes that don't need any other processing. i_mutex is assumed to be held. |
464 | * Block based filesystems should use generic_write_end(). |
465 | * NOTE: Even though i_size might get updated by this function, mark_inode_dirty |
466 | * is not called, so a filesystem that actually does store data in .write_inode |
467 | * should extend on what's done here with a call to mark_inode_dirty() in the |
468 | * case that i_size has changed. |
469 | */ |
470 | int simple_write_end(struct file *file, struct address_space *mapping, |
471 | loff_t pos, unsigned len, unsigned copied, |
472 | struct page *page, void *fsdata) |
473 | { |
474 | struct inode *inode = page->mapping->host; |
475 | loff_t last_pos = pos + copied; |
476 | |
477 | /* zero the stale part of the page if we did a short copy */ |
478 | if (copied < len) { |
479 | unsigned from = pos & (PAGE_SIZE - 1); |
480 | |
481 | zero_user(page, from + copied, len - copied); |
482 | } |
483 | |
484 | if (!PageUptodate(page)) |
485 | SetPageUptodate(page); |
486 | /* |
487 | * No need to use i_size_read() here, the i_size |
488 | * cannot change under us because we hold the i_mutex. |
489 | */ |
490 | if (last_pos > inode->i_size) |
491 | i_size_write(inode, last_pos); |
492 | |
493 | set_page_dirty(page); |
494 | unlock_page(page); |
495 | put_page(page); |
496 | |
497 | return copied; |
498 | } |
499 | EXPORT_SYMBOL(simple_write_end); |
500 | |
501 | /* |
502 | * the inodes created here are not hashed. If you use iunique to generate |
503 | * unique inode values later for this filesystem, then you must take care |
504 | * to pass it an appropriate max_reserved value to avoid collisions. |
505 | */ |
506 | int simple_fill_super(struct super_block *s, unsigned long magic, |
507 | struct tree_descr *files) |
508 | { |
509 | struct inode *inode; |
510 | struct dentry *root; |
511 | struct dentry *dentry; |
512 | int i; |
513 | |
514 | s->s_blocksize = PAGE_SIZE; |
515 | s->s_blocksize_bits = PAGE_SHIFT; |
516 | s->s_magic = magic; |
517 | s->s_op = &simple_super_operations; |
518 | s->s_time_gran = 1; |
519 | |
520 | inode = new_inode(s); |
521 | if (!inode) |
522 | return -ENOMEM; |
523 | /* |
524 | * because the root inode is 1, the files array must not contain an |
525 | * entry at index 1 |
526 | */ |
527 | inode->i_ino = 1; |
528 | inode->i_mode = S_IFDIR | 0755; |
529 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
530 | inode->i_op = &simple_dir_inode_operations; |
531 | inode->i_fop = &simple_dir_operations; |
532 | set_nlink(inode, 2); |
533 | root = d_make_root(inode); |
534 | if (!root) |
535 | return -ENOMEM; |
536 | for (i = 0; !files->name || files->name[0]; i++, files++) { |
537 | if (!files->name) |
538 | continue; |
539 | |
540 | /* warn if it tries to conflict with the root inode */ |
541 | if (unlikely(i == 1)) |
542 | printk(KERN_WARNING "%s: %s passed in a files array" |
543 | "with an index of 1!\n", __func__, |
544 | s->s_type->name); |
545 | |
546 | dentry = d_alloc_name(root, files->name); |
547 | if (!dentry) |
548 | goto out; |
549 | inode = new_inode(s); |
550 | if (!inode) { |
551 | dput(dentry); |
552 | goto out; |
553 | } |
554 | inode->i_mode = S_IFREG | files->mode; |
555 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
556 | inode->i_fop = files->ops; |
557 | inode->i_ino = i; |
558 | d_add(dentry, inode); |
559 | } |
560 | s->s_root = root; |
561 | return 0; |
562 | out: |
563 | d_genocide(root); |
564 | shrink_dcache_parent(root); |
565 | dput(root); |
566 | return -ENOMEM; |
567 | } |
568 | EXPORT_SYMBOL(simple_fill_super); |
569 | |
570 | static DEFINE_SPINLOCK(pin_fs_lock); |
571 | |
572 | int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) |
573 | { |
574 | struct vfsmount *mnt = NULL; |
575 | spin_lock(&pin_fs_lock); |
576 | if (unlikely(!*mount)) { |
577 | spin_unlock(&pin_fs_lock); |
578 | mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, NULL); |
579 | if (IS_ERR(mnt)) |
580 | return PTR_ERR(mnt); |
581 | spin_lock(&pin_fs_lock); |
582 | if (!*mount) |
583 | *mount = mnt; |
584 | } |
585 | mntget(*mount); |
586 | ++*count; |
587 | spin_unlock(&pin_fs_lock); |
588 | mntput(mnt); |
589 | return 0; |
590 | } |
591 | EXPORT_SYMBOL(simple_pin_fs); |
592 | |
593 | void simple_release_fs(struct vfsmount **mount, int *count) |
594 | { |
595 | struct vfsmount *mnt; |
596 | spin_lock(&pin_fs_lock); |
597 | mnt = *mount; |
598 | if (!--*count) |
599 | *mount = NULL; |
600 | spin_unlock(&pin_fs_lock); |
601 | mntput(mnt); |
602 | } |
603 | EXPORT_SYMBOL(simple_release_fs); |
604 | |
605 | /** |
606 | * simple_read_from_buffer - copy data from the buffer to user space |
607 | * @to: the user space buffer to read to |
608 | * @count: the maximum number of bytes to read |
609 | * @ppos: the current position in the buffer |
610 | * @from: the buffer to read from |
611 | * @available: the size of the buffer |
612 | * |
613 | * The simple_read_from_buffer() function reads up to @count bytes from the |
614 | * buffer @from at offset @ppos into the user space address starting at @to. |
615 | * |
616 | * On success, the number of bytes read is returned and the offset @ppos is |
617 | * advanced by this number, or negative value is returned on error. |
618 | **/ |
619 | ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, |
620 | const void *from, size_t available) |
621 | { |
622 | loff_t pos = *ppos; |
623 | size_t ret; |
624 | |
625 | if (pos < 0) |
626 | return -EINVAL; |
627 | if (pos >= available || !count) |
628 | return 0; |
629 | if (count > available - pos) |
630 | count = available - pos; |
631 | ret = copy_to_user(to, from + pos, count); |
632 | if (ret == count) |
633 | return -EFAULT; |
634 | count -= ret; |
635 | *ppos = pos + count; |
636 | return count; |
637 | } |
638 | EXPORT_SYMBOL(simple_read_from_buffer); |
639 | |
640 | /** |
641 | * simple_write_to_buffer - copy data from user space to the buffer |
642 | * @to: the buffer to write to |
643 | * @available: the size of the buffer |
644 | * @ppos: the current position in the buffer |
645 | * @from: the user space buffer to read from |
646 | * @count: the maximum number of bytes to read |
647 | * |
648 | * The simple_write_to_buffer() function reads up to @count bytes from the user |
649 | * space address starting at @from into the buffer @to at offset @ppos. |
650 | * |
651 | * On success, the number of bytes written is returned and the offset @ppos is |
652 | * advanced by this number, or negative value is returned on error. |
653 | **/ |
654 | ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, |
655 | const void __user *from, size_t count) |
656 | { |
657 | loff_t pos = *ppos; |
658 | size_t res; |
659 | |
660 | if (pos < 0) |
661 | return -EINVAL; |
662 | if (pos >= available || !count) |
663 | return 0; |
664 | if (count > available - pos) |
665 | count = available - pos; |
666 | res = copy_from_user(to + pos, from, count); |
667 | if (res == count) |
668 | return -EFAULT; |
669 | count -= res; |
670 | *ppos = pos + count; |
671 | return count; |
672 | } |
673 | EXPORT_SYMBOL(simple_write_to_buffer); |
674 | |
675 | /** |
676 | * memory_read_from_buffer - copy data from the buffer |
677 | * @to: the kernel space buffer to read to |
678 | * @count: the maximum number of bytes to read |
679 | * @ppos: the current position in the buffer |
680 | * @from: the buffer to read from |
681 | * @available: the size of the buffer |
682 | * |
683 | * The memory_read_from_buffer() function reads up to @count bytes from the |
684 | * buffer @from at offset @ppos into the kernel space address starting at @to. |
685 | * |
686 | * On success, the number of bytes read is returned and the offset @ppos is |
687 | * advanced by this number, or negative value is returned on error. |
688 | **/ |
689 | ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, |
690 | const void *from, size_t available) |
691 | { |
692 | loff_t pos = *ppos; |
693 | |
694 | if (pos < 0) |
695 | return -EINVAL; |
696 | if (pos >= available) |
697 | return 0; |
698 | if (count > available - pos) |
699 | count = available - pos; |
700 | memcpy(to, from + pos, count); |
701 | *ppos = pos + count; |
702 | |
703 | return count; |
704 | } |
705 | EXPORT_SYMBOL(memory_read_from_buffer); |
706 | |
707 | /* |
708 | * Transaction based IO. |
709 | * The file expects a single write which triggers the transaction, and then |
710 | * possibly a read which collects the result - which is stored in a |
711 | * file-local buffer. |
712 | */ |
713 | |
714 | void simple_transaction_set(struct file *file, size_t n) |
715 | { |
716 | struct simple_transaction_argresp *ar = file->private_data; |
717 | |
718 | BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); |
719 | |
720 | /* |
721 | * The barrier ensures that ar->size will really remain zero until |
722 | * ar->data is ready for reading. |
723 | */ |
724 | smp_mb(); |
725 | ar->size = n; |
726 | } |
727 | EXPORT_SYMBOL(simple_transaction_set); |
728 | |
729 | char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) |
730 | { |
731 | struct simple_transaction_argresp *ar; |
732 | static DEFINE_SPINLOCK(simple_transaction_lock); |
733 | |
734 | if (size > SIMPLE_TRANSACTION_LIMIT - 1) |
735 | return ERR_PTR(-EFBIG); |
736 | |
737 | ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); |
738 | if (!ar) |
739 | return ERR_PTR(-ENOMEM); |
740 | |
741 | spin_lock(&simple_transaction_lock); |
742 | |
743 | /* only one write allowed per open */ |
744 | if (file->private_data) { |
745 | spin_unlock(&simple_transaction_lock); |
746 | free_page((unsigned long)ar); |
747 | return ERR_PTR(-EBUSY); |
748 | } |
749 | |
750 | file->private_data = ar; |
751 | |
752 | spin_unlock(&simple_transaction_lock); |
753 | |
754 | if (copy_from_user(ar->data, buf, size)) |
755 | return ERR_PTR(-EFAULT); |
756 | |
757 | return ar->data; |
758 | } |
759 | EXPORT_SYMBOL(simple_transaction_get); |
760 | |
761 | ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) |
762 | { |
763 | struct simple_transaction_argresp *ar = file->private_data; |
764 | |
765 | if (!ar) |
766 | return 0; |
767 | return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); |
768 | } |
769 | EXPORT_SYMBOL(simple_transaction_read); |
770 | |
771 | int simple_transaction_release(struct inode *inode, struct file *file) |
772 | { |
773 | free_page((unsigned long)file->private_data); |
774 | return 0; |
775 | } |
776 | EXPORT_SYMBOL(simple_transaction_release); |
777 | |
778 | /* Simple attribute files */ |
779 | |
780 | struct simple_attr { |
781 | int (*get)(void *, u64 *); |
782 | int (*set)(void *, u64); |
783 | char get_buf[24]; /* enough to store a u64 and "\n\0" */ |
784 | char set_buf[24]; |
785 | void *data; |
786 | const char *fmt; /* format for read operation */ |
787 | struct mutex mutex; /* protects access to these buffers */ |
788 | }; |
789 | |
790 | /* simple_attr_open is called by an actual attribute open file operation |
791 | * to set the attribute specific access operations. */ |
792 | int simple_attr_open(struct inode *inode, struct file *file, |
793 | int (*get)(void *, u64 *), int (*set)(void *, u64), |
794 | const char *fmt) |
795 | { |
796 | struct simple_attr *attr; |
797 | |
798 | attr = kmalloc(sizeof(*attr), GFP_KERNEL); |
799 | if (!attr) |
800 | return -ENOMEM; |
801 | |
802 | attr->get = get; |
803 | attr->set = set; |
804 | attr->data = inode->i_private; |
805 | attr->fmt = fmt; |
806 | mutex_init(&attr->mutex); |
807 | |
808 | file->private_data = attr; |
809 | |
810 | return nonseekable_open(inode, file); |
811 | } |
812 | EXPORT_SYMBOL_GPL(simple_attr_open); |
813 | |
814 | int simple_attr_release(struct inode *inode, struct file *file) |
815 | { |
816 | kfree(file->private_data); |
817 | return 0; |
818 | } |
819 | EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ |
820 | |
821 | /* read from the buffer that is filled with the get function */ |
822 | ssize_t simple_attr_read(struct file *file, char __user *buf, |
823 | size_t len, loff_t *ppos) |
824 | { |
825 | struct simple_attr *attr; |
826 | size_t size; |
827 | ssize_t ret; |
828 | |
829 | attr = file->private_data; |
830 | |
831 | if (!attr->get) |
832 | return -EACCES; |
833 | |
834 | ret = mutex_lock_interruptible(&attr->mutex); |
835 | if (ret) |
836 | return ret; |
837 | |
838 | if (*ppos) { /* continued read */ |
839 | size = strlen(attr->get_buf); |
840 | } else { /* first read */ |
841 | u64 val; |
842 | ret = attr->get(attr->data, &val); |
843 | if (ret) |
844 | goto out; |
845 | |
846 | size = scnprintf(attr->get_buf, sizeof(attr->get_buf), |
847 | attr->fmt, (unsigned long long)val); |
848 | } |
849 | |
850 | ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); |
851 | out: |
852 | mutex_unlock(&attr->mutex); |
853 | return ret; |
854 | } |
855 | EXPORT_SYMBOL_GPL(simple_attr_read); |
856 | |
857 | /* interpret the buffer as a number to call the set function with */ |
858 | ssize_t simple_attr_write(struct file *file, const char __user *buf, |
859 | size_t len, loff_t *ppos) |
860 | { |
861 | struct simple_attr *attr; |
862 | u64 val; |
863 | size_t size; |
864 | ssize_t ret; |
865 | |
866 | attr = file->private_data; |
867 | if (!attr->set) |
868 | return -EACCES; |
869 | |
870 | ret = mutex_lock_interruptible(&attr->mutex); |
871 | if (ret) |
872 | return ret; |
873 | |
874 | ret = -EFAULT; |
875 | size = min(sizeof(attr->set_buf) - 1, len); |
876 | if (copy_from_user(attr->set_buf, buf, size)) |
877 | goto out; |
878 | |
879 | attr->set_buf[size] = '\0'; |
880 | val = simple_strtoll(attr->set_buf, NULL, 0); |
881 | ret = attr->set(attr->data, val); |
882 | if (ret == 0) |
883 | ret = len; /* on success, claim we got the whole input */ |
884 | out: |
885 | mutex_unlock(&attr->mutex); |
886 | return ret; |
887 | } |
888 | EXPORT_SYMBOL_GPL(simple_attr_write); |
889 | |
890 | /** |
891 | * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation |
892 | * @sb: filesystem to do the file handle conversion on |
893 | * @fid: file handle to convert |
894 | * @fh_len: length of the file handle in bytes |
895 | * @fh_type: type of file handle |
896 | * @get_inode: filesystem callback to retrieve inode |
897 | * |
898 | * This function decodes @fid as long as it has one of the well-known |
899 | * Linux filehandle types and calls @get_inode on it to retrieve the |
900 | * inode for the object specified in the file handle. |
901 | */ |
902 | struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, |
903 | int fh_len, int fh_type, struct inode *(*get_inode) |
904 | (struct super_block *sb, u64 ino, u32 gen)) |
905 | { |
906 | struct inode *inode = NULL; |
907 | |
908 | if (fh_len < 2) |
909 | return NULL; |
910 | |
911 | switch (fh_type) { |
912 | case FILEID_INO32_GEN: |
913 | case FILEID_INO32_GEN_PARENT: |
914 | inode = get_inode(sb, fid->i32.ino, fid->i32.gen); |
915 | break; |
916 | } |
917 | |
918 | return d_obtain_alias(inode); |
919 | } |
920 | EXPORT_SYMBOL_GPL(generic_fh_to_dentry); |
921 | |
922 | /** |
923 | * generic_fh_to_parent - generic helper for the fh_to_parent export operation |
924 | * @sb: filesystem to do the file handle conversion on |
925 | * @fid: file handle to convert |
926 | * @fh_len: length of the file handle in bytes |
927 | * @fh_type: type of file handle |
928 | * @get_inode: filesystem callback to retrieve inode |
929 | * |
930 | * This function decodes @fid as long as it has one of the well-known |
931 | * Linux filehandle types and calls @get_inode on it to retrieve the |
932 | * inode for the _parent_ object specified in the file handle if it |
933 | * is specified in the file handle, or NULL otherwise. |
934 | */ |
935 | struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, |
936 | int fh_len, int fh_type, struct inode *(*get_inode) |
937 | (struct super_block *sb, u64 ino, u32 gen)) |
938 | { |
939 | struct inode *inode = NULL; |
940 | |
941 | if (fh_len <= 2) |
942 | return NULL; |
943 | |
944 | switch (fh_type) { |
945 | case FILEID_INO32_GEN_PARENT: |
946 | inode = get_inode(sb, fid->i32.parent_ino, |
947 | (fh_len > 3 ? fid->i32.parent_gen : 0)); |
948 | break; |
949 | } |
950 | |
951 | return d_obtain_alias(inode); |
952 | } |
953 | EXPORT_SYMBOL_GPL(generic_fh_to_parent); |
954 | |
955 | /** |
956 | * __generic_file_fsync - generic fsync implementation for simple filesystems |
957 | * |
958 | * @file: file to synchronize |
959 | * @start: start offset in bytes |
960 | * @end: end offset in bytes (inclusive) |
961 | * @datasync: only synchronize essential metadata if true |
962 | * |
963 | * This is a generic implementation of the fsync method for simple |
964 | * filesystems which track all non-inode metadata in the buffers list |
965 | * hanging off the address_space structure. |
966 | */ |
967 | int __generic_file_fsync(struct file *file, loff_t start, loff_t end, |
968 | int datasync) |
969 | { |
970 | struct inode *inode = file->f_mapping->host; |
971 | int err; |
972 | int ret; |
973 | |
974 | err = filemap_write_and_wait_range(inode->i_mapping, start, end); |
975 | if (err) |
976 | return err; |
977 | |
978 | inode_lock(inode); |
979 | ret = sync_mapping_buffers(inode->i_mapping); |
980 | if (!(inode->i_state & I_DIRTY_ALL)) |
981 | goto out; |
982 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
983 | goto out; |
984 | |
985 | err = sync_inode_metadata(inode, 1); |
986 | if (ret == 0) |
987 | ret = err; |
988 | |
989 | out: |
990 | inode_unlock(inode); |
991 | return ret; |
992 | } |
993 | EXPORT_SYMBOL(__generic_file_fsync); |
994 | |
995 | /** |
996 | * generic_file_fsync - generic fsync implementation for simple filesystems |
997 | * with flush |
998 | * @file: file to synchronize |
999 | * @start: start offset in bytes |
1000 | * @end: end offset in bytes (inclusive) |
1001 | * @datasync: only synchronize essential metadata if true |
1002 | * |
1003 | */ |
1004 | |
1005 | int generic_file_fsync(struct file *file, loff_t start, loff_t end, |
1006 | int datasync) |
1007 | { |
1008 | struct inode *inode = file->f_mapping->host; |
1009 | int err; |
1010 | |
1011 | err = __generic_file_fsync(file, start, end, datasync); |
1012 | if (err) |
1013 | return err; |
1014 | return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
1015 | } |
1016 | EXPORT_SYMBOL(generic_file_fsync); |
1017 | |
1018 | /** |
1019 | * generic_check_addressable - Check addressability of file system |
1020 | * @blocksize_bits: log of file system block size |
1021 | * @num_blocks: number of blocks in file system |
1022 | * |
1023 | * Determine whether a file system with @num_blocks blocks (and a |
1024 | * block size of 2**@blocksize_bits) is addressable by the sector_t |
1025 | * and page cache of the system. Return 0 if so and -EFBIG otherwise. |
1026 | */ |
1027 | int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) |
1028 | { |
1029 | u64 last_fs_block = num_blocks - 1; |
1030 | u64 last_fs_page = |
1031 | last_fs_block >> (PAGE_SHIFT - blocksize_bits); |
1032 | |
1033 | if (unlikely(num_blocks == 0)) |
1034 | return 0; |
1035 | |
1036 | if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) |
1037 | return -EINVAL; |
1038 | |
1039 | if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || |
1040 | (last_fs_page > (pgoff_t)(~0ULL))) { |
1041 | return -EFBIG; |
1042 | } |
1043 | return 0; |
1044 | } |
1045 | EXPORT_SYMBOL(generic_check_addressable); |
1046 | |
1047 | /* |
1048 | * No-op implementation of ->fsync for in-memory filesystems. |
1049 | */ |
1050 | int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) |
1051 | { |
1052 | return 0; |
1053 | } |
1054 | EXPORT_SYMBOL(noop_fsync); |
1055 | |
1056 | /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ |
1057 | void kfree_link(void *p) |
1058 | { |
1059 | kfree(p); |
1060 | } |
1061 | EXPORT_SYMBOL(kfree_link); |
1062 | |
1063 | /* |
1064 | * nop .set_page_dirty method so that people can use .page_mkwrite on |
1065 | * anon inodes. |
1066 | */ |
1067 | static int anon_set_page_dirty(struct page *page) |
1068 | { |
1069 | return 0; |
1070 | }; |
1071 | |
1072 | /* |
1073 | * A single inode exists for all anon_inode files. Contrary to pipes, |
1074 | * anon_inode inodes have no associated per-instance data, so we need |
1075 | * only allocate one of them. |
1076 | */ |
1077 | struct inode *alloc_anon_inode(struct super_block *s) |
1078 | { |
1079 | static const struct address_space_operations anon_aops = { |
1080 | .set_page_dirty = anon_set_page_dirty, |
1081 | }; |
1082 | struct inode *inode = new_inode_pseudo(s); |
1083 | |
1084 | if (!inode) |
1085 | return ERR_PTR(-ENOMEM); |
1086 | |
1087 | inode->i_ino = get_next_ino(); |
1088 | inode->i_mapping->a_ops = &anon_aops; |
1089 | |
1090 | /* |
1091 | * Mark the inode dirty from the very beginning, |
1092 | * that way it will never be moved to the dirty |
1093 | * list because mark_inode_dirty() will think |
1094 | * that it already _is_ on the dirty list. |
1095 | */ |
1096 | inode->i_state = I_DIRTY; |
1097 | inode->i_mode = S_IRUSR | S_IWUSR; |
1098 | inode->i_uid = current_fsuid(); |
1099 | inode->i_gid = current_fsgid(); |
1100 | inode->i_flags |= S_PRIVATE; |
1101 | inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); |
1102 | return inode; |
1103 | } |
1104 | EXPORT_SYMBOL(alloc_anon_inode); |
1105 | |
1106 | /** |
1107 | * simple_nosetlease - generic helper for prohibiting leases |
1108 | * @filp: file pointer |
1109 | * @arg: type of lease to obtain |
1110 | * @flp: new lease supplied for insertion |
1111 | * @priv: private data for lm_setup operation |
1112 | * |
1113 | * Generic helper for filesystems that do not wish to allow leases to be set. |
1114 | * All arguments are ignored and it just returns -EINVAL. |
1115 | */ |
1116 | int |
1117 | simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, |
1118 | void **priv) |
1119 | { |
1120 | return -EINVAL; |
1121 | } |
1122 | EXPORT_SYMBOL(simple_nosetlease); |
1123 | |
1124 | const char *simple_get_link(struct dentry *dentry, struct inode *inode, |
1125 | struct delayed_call *done) |
1126 | { |
1127 | return inode->i_link; |
1128 | } |
1129 | EXPORT_SYMBOL(simple_get_link); |
1130 | |
1131 | const struct inode_operations simple_symlink_inode_operations = { |
1132 | .get_link = simple_get_link, |
1133 | .readlink = generic_readlink |
1134 | }; |
1135 | EXPORT_SYMBOL(simple_symlink_inode_operations); |
1136 | |
1137 | /* |
1138 | * Operations for a permanently empty directory. |
1139 | */ |
1140 | static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
1141 | { |
1142 | return ERR_PTR(-ENOENT); |
1143 | } |
1144 | |
1145 | static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1146 | struct kstat *stat) |
1147 | { |
1148 | struct inode *inode = d_inode(dentry); |
1149 | generic_fillattr(inode, stat); |
1150 | return 0; |
1151 | } |
1152 | |
1153 | static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr) |
1154 | { |
1155 | return -EPERM; |
1156 | } |
1157 | |
1158 | static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) |
1159 | { |
1160 | return -EOPNOTSUPP; |
1161 | } |
1162 | |
1163 | static const struct inode_operations empty_dir_inode_operations = { |
1164 | .lookup = empty_dir_lookup, |
1165 | .permission = generic_permission, |
1166 | .setattr = empty_dir_setattr, |
1167 | .getattr = empty_dir_getattr, |
1168 | .listxattr = empty_dir_listxattr, |
1169 | }; |
1170 | |
1171 | static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) |
1172 | { |
1173 | /* An empty directory has two entries . and .. at offsets 0 and 1 */ |
1174 | return generic_file_llseek_size(file, offset, whence, 2, 2); |
1175 | } |
1176 | |
1177 | static int empty_dir_readdir(struct file *file, struct dir_context *ctx) |
1178 | { |
1179 | dir_emit_dots(file, ctx); |
1180 | return 0; |
1181 | } |
1182 | |
1183 | static const struct file_operations empty_dir_operations = { |
1184 | .llseek = empty_dir_llseek, |
1185 | .read = generic_read_dir, |
1186 | .iterate_shared = empty_dir_readdir, |
1187 | .fsync = noop_fsync, |
1188 | }; |
1189 | |
1190 | |
1191 | void make_empty_dir_inode(struct inode *inode) |
1192 | { |
1193 | set_nlink(inode, 2); |
1194 | inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; |
1195 | inode->i_uid = GLOBAL_ROOT_UID; |
1196 | inode->i_gid = GLOBAL_ROOT_GID; |
1197 | inode->i_rdev = 0; |
1198 | inode->i_size = 0; |
1199 | inode->i_blkbits = PAGE_SHIFT; |
1200 | inode->i_blocks = 0; |
1201 | |
1202 | inode->i_op = &empty_dir_inode_operations; |
1203 | inode->i_opflags &= ~IOP_XATTR; |
1204 | inode->i_fop = &empty_dir_operations; |
1205 | } |
1206 | |
1207 | bool is_empty_dir_inode(struct inode *inode) |
1208 | { |
1209 | return (inode->i_fop == &empty_dir_operations) && |
1210 | (inode->i_op == &empty_dir_inode_operations); |
1211 | } |
1212 |