blob: 538998a137d24e069969dcc3ed00cedc6c25616f
1 | /* |
2 | * mm/percpu-vm.c - vmalloc area based chunk allocation |
3 | * |
4 | * Copyright (C) 2010 SUSE Linux Products GmbH |
5 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> |
6 | * |
7 | * This file is released under the GPLv2. |
8 | * |
9 | * Chunks are mapped into vmalloc areas and populated page by page. |
10 | * This is the default chunk allocator. |
11 | */ |
12 | |
13 | static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk, |
14 | unsigned int cpu, int page_idx) |
15 | { |
16 | /* must not be used on pre-mapped chunk */ |
17 | WARN_ON(chunk->immutable); |
18 | |
19 | return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx)); |
20 | } |
21 | |
22 | /** |
23 | * pcpu_get_pages - get temp pages array |
24 | * @chunk: chunk of interest |
25 | * |
26 | * Returns pointer to array of pointers to struct page which can be indexed |
27 | * with pcpu_page_idx(). Note that there is only one array and accesses |
28 | * should be serialized by pcpu_alloc_mutex. |
29 | * |
30 | * RETURNS: |
31 | * Pointer to temp pages array on success. |
32 | */ |
33 | static struct page **pcpu_get_pages(struct pcpu_chunk *chunk_alloc) |
34 | { |
35 | static struct page **pages; |
36 | size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]); |
37 | |
38 | lockdep_assert_held(&pcpu_alloc_mutex); |
39 | |
40 | if (!pages) |
41 | pages = pcpu_mem_zalloc(pages_size); |
42 | return pages; |
43 | } |
44 | |
45 | /** |
46 | * pcpu_free_pages - free pages which were allocated for @chunk |
47 | * @chunk: chunk pages were allocated for |
48 | * @pages: array of pages to be freed, indexed by pcpu_page_idx() |
49 | * @page_start: page index of the first page to be freed |
50 | * @page_end: page index of the last page to be freed + 1 |
51 | * |
52 | * Free pages [@page_start and @page_end) in @pages for all units. |
53 | * The pages were allocated for @chunk. |
54 | */ |
55 | static void pcpu_free_pages(struct pcpu_chunk *chunk, |
56 | struct page **pages, int page_start, int page_end) |
57 | { |
58 | unsigned int cpu; |
59 | int i; |
60 | |
61 | for_each_possible_cpu(cpu) { |
62 | for (i = page_start; i < page_end; i++) { |
63 | struct page *page = pages[pcpu_page_idx(cpu, i)]; |
64 | |
65 | if (page) |
66 | __free_page(page); |
67 | } |
68 | } |
69 | } |
70 | |
71 | /** |
72 | * pcpu_alloc_pages - allocates pages for @chunk |
73 | * @chunk: target chunk |
74 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() |
75 | * @page_start: page index of the first page to be allocated |
76 | * @page_end: page index of the last page to be allocated + 1 |
77 | * |
78 | * Allocate pages [@page_start,@page_end) into @pages for all units. |
79 | * The allocation is for @chunk. Percpu core doesn't care about the |
80 | * content of @pages and will pass it verbatim to pcpu_map_pages(). |
81 | */ |
82 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
83 | struct page **pages, int page_start, int page_end) |
84 | { |
85 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD; |
86 | unsigned int cpu, tcpu; |
87 | int i; |
88 | |
89 | for_each_possible_cpu(cpu) { |
90 | for (i = page_start; i < page_end; i++) { |
91 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; |
92 | |
93 | *pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0); |
94 | if (!*pagep) |
95 | goto err; |
96 | } |
97 | } |
98 | return 0; |
99 | |
100 | err: |
101 | while (--i >= page_start) |
102 | __free_page(pages[pcpu_page_idx(cpu, i)]); |
103 | |
104 | for_each_possible_cpu(tcpu) { |
105 | if (tcpu == cpu) |
106 | break; |
107 | for (i = page_start; i < page_end; i++) |
108 | __free_page(pages[pcpu_page_idx(tcpu, i)]); |
109 | } |
110 | return -ENOMEM; |
111 | } |
112 | |
113 | /** |
114 | * pcpu_pre_unmap_flush - flush cache prior to unmapping |
115 | * @chunk: chunk the regions to be flushed belongs to |
116 | * @page_start: page index of the first page to be flushed |
117 | * @page_end: page index of the last page to be flushed + 1 |
118 | * |
119 | * Pages in [@page_start,@page_end) of @chunk are about to be |
120 | * unmapped. Flush cache. As each flushing trial can be very |
121 | * expensive, issue flush on the whole region at once rather than |
122 | * doing it for each cpu. This could be an overkill but is more |
123 | * scalable. |
124 | */ |
125 | static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk, |
126 | int page_start, int page_end) |
127 | { |
128 | flush_cache_vunmap( |
129 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
130 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
131 | } |
132 | |
133 | static void __pcpu_unmap_pages(unsigned long addr, int nr_pages) |
134 | { |
135 | unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT); |
136 | } |
137 | |
138 | /** |
139 | * pcpu_unmap_pages - unmap pages out of a pcpu_chunk |
140 | * @chunk: chunk of interest |
141 | * @pages: pages array which can be used to pass information to free |
142 | * @page_start: page index of the first page to unmap |
143 | * @page_end: page index of the last page to unmap + 1 |
144 | * |
145 | * For each cpu, unmap pages [@page_start,@page_end) out of @chunk. |
146 | * Corresponding elements in @pages were cleared by the caller and can |
147 | * be used to carry information to pcpu_free_pages() which will be |
148 | * called after all unmaps are finished. The caller should call |
149 | * proper pre/post flush functions. |
150 | */ |
151 | static void pcpu_unmap_pages(struct pcpu_chunk *chunk, |
152 | struct page **pages, int page_start, int page_end) |
153 | { |
154 | unsigned int cpu; |
155 | int i; |
156 | |
157 | for_each_possible_cpu(cpu) { |
158 | for (i = page_start; i < page_end; i++) { |
159 | struct page *page; |
160 | |
161 | page = pcpu_chunk_page(chunk, cpu, i); |
162 | WARN_ON(!page); |
163 | pages[pcpu_page_idx(cpu, i)] = page; |
164 | } |
165 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
166 | page_end - page_start); |
167 | } |
168 | } |
169 | |
170 | /** |
171 | * pcpu_post_unmap_tlb_flush - flush TLB after unmapping |
172 | * @chunk: pcpu_chunk the regions to be flushed belong to |
173 | * @page_start: page index of the first page to be flushed |
174 | * @page_end: page index of the last page to be flushed + 1 |
175 | * |
176 | * Pages [@page_start,@page_end) of @chunk have been unmapped. Flush |
177 | * TLB for the regions. This can be skipped if the area is to be |
178 | * returned to vmalloc as vmalloc will handle TLB flushing lazily. |
179 | * |
180 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
181 | * for the whole region. |
182 | */ |
183 | static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk, |
184 | int page_start, int page_end) |
185 | { |
186 | flush_tlb_kernel_range( |
187 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
188 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
189 | } |
190 | |
191 | static int __pcpu_map_pages(unsigned long addr, struct page **pages, |
192 | int nr_pages) |
193 | { |
194 | return map_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT, |
195 | PAGE_KERNEL, pages); |
196 | } |
197 | |
198 | /** |
199 | * pcpu_map_pages - map pages into a pcpu_chunk |
200 | * @chunk: chunk of interest |
201 | * @pages: pages array containing pages to be mapped |
202 | * @page_start: page index of the first page to map |
203 | * @page_end: page index of the last page to map + 1 |
204 | * |
205 | * For each cpu, map pages [@page_start,@page_end) into @chunk. The |
206 | * caller is responsible for calling pcpu_post_map_flush() after all |
207 | * mappings are complete. |
208 | * |
209 | * This function is responsible for setting up whatever is necessary for |
210 | * reverse lookup (addr -> chunk). |
211 | */ |
212 | static int pcpu_map_pages(struct pcpu_chunk *chunk, |
213 | struct page **pages, int page_start, int page_end) |
214 | { |
215 | unsigned int cpu, tcpu; |
216 | int i, err; |
217 | |
218 | for_each_possible_cpu(cpu) { |
219 | err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start), |
220 | &pages[pcpu_page_idx(cpu, page_start)], |
221 | page_end - page_start); |
222 | if (err < 0) |
223 | goto err; |
224 | |
225 | for (i = page_start; i < page_end; i++) |
226 | pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)], |
227 | chunk); |
228 | } |
229 | return 0; |
230 | err: |
231 | for_each_possible_cpu(tcpu) { |
232 | if (tcpu == cpu) |
233 | break; |
234 | __pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start), |
235 | page_end - page_start); |
236 | } |
237 | pcpu_post_unmap_tlb_flush(chunk, page_start, page_end); |
238 | return err; |
239 | } |
240 | |
241 | /** |
242 | * pcpu_post_map_flush - flush cache after mapping |
243 | * @chunk: pcpu_chunk the regions to be flushed belong to |
244 | * @page_start: page index of the first page to be flushed |
245 | * @page_end: page index of the last page to be flushed + 1 |
246 | * |
247 | * Pages [@page_start,@page_end) of @chunk have been mapped. Flush |
248 | * cache. |
249 | * |
250 | * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once |
251 | * for the whole region. |
252 | */ |
253 | static void pcpu_post_map_flush(struct pcpu_chunk *chunk, |
254 | int page_start, int page_end) |
255 | { |
256 | flush_cache_vmap( |
257 | pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start), |
258 | pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end)); |
259 | } |
260 | |
261 | /** |
262 | * pcpu_populate_chunk - populate and map an area of a pcpu_chunk |
263 | * @chunk: chunk of interest |
264 | * @page_start: the start page |
265 | * @page_end: the end page |
266 | * |
267 | * For each cpu, populate and map pages [@page_start,@page_end) into |
268 | * @chunk. |
269 | * |
270 | * CONTEXT: |
271 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. |
272 | */ |
273 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
274 | int page_start, int page_end) |
275 | { |
276 | struct page **pages; |
277 | |
278 | pages = pcpu_get_pages(chunk); |
279 | if (!pages) |
280 | return -ENOMEM; |
281 | |
282 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) |
283 | return -ENOMEM; |
284 | |
285 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { |
286 | pcpu_free_pages(chunk, pages, page_start, page_end); |
287 | return -ENOMEM; |
288 | } |
289 | pcpu_post_map_flush(chunk, page_start, page_end); |
290 | |
291 | return 0; |
292 | } |
293 | |
294 | /** |
295 | * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk |
296 | * @chunk: chunk to depopulate |
297 | * @page_start: the start page |
298 | * @page_end: the end page |
299 | * |
300 | * For each cpu, depopulate and unmap pages [@page_start,@page_end) |
301 | * from @chunk. |
302 | * |
303 | * CONTEXT: |
304 | * pcpu_alloc_mutex. |
305 | */ |
306 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
307 | int page_start, int page_end) |
308 | { |
309 | struct page **pages; |
310 | |
311 | /* |
312 | * If control reaches here, there must have been at least one |
313 | * successful population attempt so the temp pages array must |
314 | * be available now. |
315 | */ |
316 | pages = pcpu_get_pages(chunk); |
317 | BUG_ON(!pages); |
318 | |
319 | /* unmap and free */ |
320 | pcpu_pre_unmap_flush(chunk, page_start, page_end); |
321 | |
322 | pcpu_unmap_pages(chunk, pages, page_start, page_end); |
323 | |
324 | /* no need to flush tlb, vmalloc will handle it lazily */ |
325 | |
326 | pcpu_free_pages(chunk, pages, page_start, page_end); |
327 | } |
328 | |
329 | static struct pcpu_chunk *pcpu_create_chunk(void) |
330 | { |
331 | struct pcpu_chunk *chunk; |
332 | struct vm_struct **vms; |
333 | |
334 | chunk = pcpu_alloc_chunk(); |
335 | if (!chunk) |
336 | return NULL; |
337 | |
338 | vms = pcpu_get_vm_areas(pcpu_group_offsets, pcpu_group_sizes, |
339 | pcpu_nr_groups, pcpu_atom_size); |
340 | if (!vms) { |
341 | pcpu_free_chunk(chunk); |
342 | return NULL; |
343 | } |
344 | |
345 | chunk->data = vms; |
346 | chunk->base_addr = vms[0]->addr - pcpu_group_offsets[0]; |
347 | return chunk; |
348 | } |
349 | |
350 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk) |
351 | { |
352 | if (chunk && chunk->data) |
353 | pcpu_free_vm_areas(chunk->data, pcpu_nr_groups); |
354 | pcpu_free_chunk(chunk); |
355 | } |
356 | |
357 | static struct page *pcpu_addr_to_page(void *addr) |
358 | { |
359 | return vmalloc_to_page(addr); |
360 | } |
361 | |
362 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai) |
363 | { |
364 | /* no extra restriction */ |
365 | return 0; |
366 | } |
367 |