blob: 7c6a63d2c27ff4737afc2f5915edfcffcc356a62
1 | /* |
2 | * Lockless hierarchical page accounting & limiting |
3 | * |
4 | * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner |
5 | */ |
6 | |
7 | #include <linux/page_counter.h> |
8 | #include <linux/atomic.h> |
9 | #include <linux/kernel.h> |
10 | #include <linux/string.h> |
11 | #include <linux/sched.h> |
12 | #include <linux/bug.h> |
13 | #include <asm/page.h> |
14 | |
15 | /** |
16 | * page_counter_cancel - take pages out of the local counter |
17 | * @counter: counter |
18 | * @nr_pages: number of pages to cancel |
19 | */ |
20 | void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) |
21 | { |
22 | long new; |
23 | |
24 | new = atomic_long_sub_return(nr_pages, &counter->count); |
25 | /* More uncharges than charges? */ |
26 | WARN_ON_ONCE(new < 0); |
27 | } |
28 | |
29 | /** |
30 | * page_counter_charge - hierarchically charge pages |
31 | * @counter: counter |
32 | * @nr_pages: number of pages to charge |
33 | * |
34 | * NOTE: This does not consider any configured counter limits. |
35 | */ |
36 | void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) |
37 | { |
38 | struct page_counter *c; |
39 | |
40 | for (c = counter; c; c = c->parent) { |
41 | long new; |
42 | |
43 | new = atomic_long_add_return(nr_pages, &c->count); |
44 | /* |
45 | * This is indeed racy, but we can live with some |
46 | * inaccuracy in the watermark. |
47 | */ |
48 | if (new > c->watermark) |
49 | c->watermark = new; |
50 | } |
51 | } |
52 | |
53 | /** |
54 | * page_counter_try_charge - try to hierarchically charge pages |
55 | * @counter: counter |
56 | * @nr_pages: number of pages to charge |
57 | * @fail: points first counter to hit its limit, if any |
58 | * |
59 | * Returns %true on success, or %false and @fail if the counter or one |
60 | * of its ancestors has hit its configured limit. |
61 | */ |
62 | bool page_counter_try_charge(struct page_counter *counter, |
63 | unsigned long nr_pages, |
64 | struct page_counter **fail) |
65 | { |
66 | struct page_counter *c; |
67 | |
68 | for (c = counter; c; c = c->parent) { |
69 | long new; |
70 | /* |
71 | * Charge speculatively to avoid an expensive CAS. If |
72 | * a bigger charge fails, it might falsely lock out a |
73 | * racing smaller charge and send it into reclaim |
74 | * early, but the error is limited to the difference |
75 | * between the two sizes, which is less than 2M/4M in |
76 | * case of a THP locking out a regular page charge. |
77 | * |
78 | * The atomic_long_add_return() implies a full memory |
79 | * barrier between incrementing the count and reading |
80 | * the limit. When racing with page_counter_limit(), |
81 | * we either see the new limit or the setter sees the |
82 | * counter has changed and retries. |
83 | */ |
84 | new = atomic_long_add_return(nr_pages, &c->count); |
85 | if (new > c->limit) { |
86 | atomic_long_sub(nr_pages, &c->count); |
87 | /* |
88 | * This is racy, but we can live with some |
89 | * inaccuracy in the failcnt. |
90 | */ |
91 | c->failcnt++; |
92 | *fail = c; |
93 | goto failed; |
94 | } |
95 | /* |
96 | * Just like with failcnt, we can live with some |
97 | * inaccuracy in the watermark. |
98 | */ |
99 | if (new > c->watermark) |
100 | c->watermark = new; |
101 | } |
102 | return true; |
103 | |
104 | failed: |
105 | for (c = counter; c != *fail; c = c->parent) |
106 | page_counter_cancel(c, nr_pages); |
107 | |
108 | return false; |
109 | } |
110 | |
111 | /** |
112 | * page_counter_uncharge - hierarchically uncharge pages |
113 | * @counter: counter |
114 | * @nr_pages: number of pages to uncharge |
115 | */ |
116 | void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) |
117 | { |
118 | struct page_counter *c; |
119 | |
120 | for (c = counter; c; c = c->parent) |
121 | page_counter_cancel(c, nr_pages); |
122 | } |
123 | |
124 | /** |
125 | * page_counter_limit - limit the number of pages allowed |
126 | * @counter: counter |
127 | * @limit: limit to set |
128 | * |
129 | * Returns 0 on success, -EBUSY if the current number of pages on the |
130 | * counter already exceeds the specified limit. |
131 | * |
132 | * The caller must serialize invocations on the same counter. |
133 | */ |
134 | int page_counter_limit(struct page_counter *counter, unsigned long limit) |
135 | { |
136 | for (;;) { |
137 | unsigned long old; |
138 | long count; |
139 | |
140 | /* |
141 | * Update the limit while making sure that it's not |
142 | * below the concurrently-changing counter value. |
143 | * |
144 | * The xchg implies two full memory barriers before |
145 | * and after, so the read-swap-read is ordered and |
146 | * ensures coherency with page_counter_try_charge(): |
147 | * that function modifies the count before checking |
148 | * the limit, so if it sees the old limit, we see the |
149 | * modified counter and retry. |
150 | */ |
151 | count = atomic_long_read(&counter->count); |
152 | |
153 | if (count > limit) |
154 | return -EBUSY; |
155 | |
156 | old = xchg(&counter->limit, limit); |
157 | |
158 | if (atomic_long_read(&counter->count) <= count) |
159 | return 0; |
160 | |
161 | counter->limit = old; |
162 | cond_resched(); |
163 | } |
164 | } |
165 | |
166 | /** |
167 | * page_counter_memparse - memparse() for page counter limits |
168 | * @buf: string to parse |
169 | * @max: string meaning maximum possible value |
170 | * @nr_pages: returns the result in number of pages |
171 | * |
172 | * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be |
173 | * limited to %PAGE_COUNTER_MAX. |
174 | */ |
175 | int page_counter_memparse(const char *buf, const char *max, |
176 | unsigned long *nr_pages) |
177 | { |
178 | char *end; |
179 | u64 bytes; |
180 | |
181 | if (!strcmp(buf, max)) { |
182 | *nr_pages = PAGE_COUNTER_MAX; |
183 | return 0; |
184 | } |
185 | |
186 | bytes = memparse(buf, &end); |
187 | if (*end != '\0') |
188 | return -EINVAL; |
189 | |
190 | *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); |
191 | |
192 | return 0; |
193 | } |
194 |