blob: 1187dea816ac1ed2f140b4d13cc78705fc3bd0a7
1 | /* |
2 | * linux/mm/page_isolation.c |
3 | */ |
4 | |
5 | #include <linux/mm.h> |
6 | #include <linux/page-isolation.h> |
7 | #include <linux/pageblock-flags.h> |
8 | #include <linux/memory.h> |
9 | #include <linux/hugetlb.h> |
10 | #include <linux/page_owner.h> |
11 | #include "internal.h" |
12 | |
13 | #define CREATE_TRACE_POINTS |
14 | #include <trace/events/page_isolation.h> |
15 | |
16 | static int set_migratetype_isolate(struct page *page, |
17 | bool skip_hwpoisoned_pages) |
18 | { |
19 | struct zone *zone; |
20 | unsigned long flags, pfn; |
21 | struct memory_isolate_notify arg; |
22 | int notifier_ret; |
23 | int ret = -EBUSY; |
24 | |
25 | zone = page_zone(page); |
26 | |
27 | spin_lock_irqsave(&zone->lock, flags); |
28 | |
29 | pfn = page_to_pfn(page); |
30 | arg.start_pfn = pfn; |
31 | arg.nr_pages = pageblock_nr_pages; |
32 | arg.pages_found = 0; |
33 | |
34 | /* |
35 | * It may be possible to isolate a pageblock even if the |
36 | * migratetype is not MIGRATE_MOVABLE. The memory isolation |
37 | * notifier chain is used by balloon drivers to return the |
38 | * number of pages in a range that are held by the balloon |
39 | * driver to shrink memory. If all the pages are accounted for |
40 | * by balloons, are free, or on the LRU, isolation can continue. |
41 | * Later, for example, when memory hotplug notifier runs, these |
42 | * pages reported as "can be isolated" should be isolated(freed) |
43 | * by the balloon driver through the memory notifier chain. |
44 | */ |
45 | notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg); |
46 | notifier_ret = notifier_to_errno(notifier_ret); |
47 | if (notifier_ret) |
48 | goto out; |
49 | /* |
50 | * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. |
51 | * We just check MOVABLE pages. |
52 | */ |
53 | if (!has_unmovable_pages(zone, page, arg.pages_found, |
54 | skip_hwpoisoned_pages)) |
55 | ret = 0; |
56 | |
57 | /* |
58 | * immobile means "not-on-lru" pages. If immobile is larger than |
59 | * removable-by-driver pages reported by notifier, we'll fail. |
60 | */ |
61 | |
62 | out: |
63 | if (!ret) { |
64 | unsigned long nr_pages; |
65 | int migratetype = get_pageblock_migratetype(page); |
66 | |
67 | set_pageblock_migratetype(page, MIGRATE_ISOLATE); |
68 | zone->nr_isolate_pageblock++; |
69 | nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); |
70 | |
71 | __mod_zone_freepage_state(zone, -nr_pages, migratetype); |
72 | } |
73 | |
74 | spin_unlock_irqrestore(&zone->lock, flags); |
75 | if (!ret) |
76 | drain_all_pages(zone); |
77 | return ret; |
78 | } |
79 | |
80 | static void unset_migratetype_isolate(struct page *page, unsigned migratetype) |
81 | { |
82 | struct zone *zone; |
83 | unsigned long flags, nr_pages; |
84 | bool isolated_page = false; |
85 | unsigned int order; |
86 | unsigned long page_idx, buddy_idx; |
87 | struct page *buddy; |
88 | |
89 | zone = page_zone(page); |
90 | spin_lock_irqsave(&zone->lock, flags); |
91 | if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
92 | goto out; |
93 | |
94 | /* |
95 | * Because freepage with more than pageblock_order on isolated |
96 | * pageblock is restricted to merge due to freepage counting problem, |
97 | * it is possible that there is free buddy page. |
98 | * move_freepages_block() doesn't care of merge so we need other |
99 | * approach in order to merge them. Isolation and free will make |
100 | * these pages to be merged. |
101 | */ |
102 | if (PageBuddy(page)) { |
103 | order = page_order(page); |
104 | if (order >= pageblock_order) { |
105 | page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); |
106 | buddy_idx = __find_buddy_index(page_idx, order); |
107 | buddy = page + (buddy_idx - page_idx); |
108 | |
109 | if (pfn_valid_within(page_to_pfn(buddy)) && |
110 | !is_migrate_isolate_page(buddy)) { |
111 | __isolate_free_page(page, order); |
112 | isolated_page = true; |
113 | } |
114 | } |
115 | } |
116 | |
117 | /* |
118 | * If we isolate freepage with more than pageblock_order, there |
119 | * should be no freepage in the range, so we could avoid costly |
120 | * pageblock scanning for freepage moving. |
121 | */ |
122 | if (!isolated_page) { |
123 | nr_pages = move_freepages_block(zone, page, migratetype); |
124 | __mod_zone_freepage_state(zone, nr_pages, migratetype); |
125 | } |
126 | set_pageblock_migratetype(page, migratetype); |
127 | zone->nr_isolate_pageblock--; |
128 | out: |
129 | spin_unlock_irqrestore(&zone->lock, flags); |
130 | if (isolated_page) { |
131 | post_alloc_hook(page, order, __GFP_MOVABLE); |
132 | __free_pages(page, order); |
133 | } |
134 | } |
135 | |
136 | static inline struct page * |
137 | __first_valid_page(unsigned long pfn, unsigned long nr_pages) |
138 | { |
139 | int i; |
140 | for (i = 0; i < nr_pages; i++) |
141 | if (pfn_valid_within(pfn + i)) |
142 | break; |
143 | if (unlikely(i == nr_pages)) |
144 | return NULL; |
145 | return pfn_to_page(pfn + i); |
146 | } |
147 | |
148 | /* |
149 | * start_isolate_page_range() -- make page-allocation-type of range of pages |
150 | * to be MIGRATE_ISOLATE. |
151 | * @start_pfn: The lower PFN of the range to be isolated. |
152 | * @end_pfn: The upper PFN of the range to be isolated. |
153 | * @migratetype: migrate type to set in error recovery. |
154 | * |
155 | * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in |
156 | * the range will never be allocated. Any free pages and pages freed in the |
157 | * future will not be allocated again. |
158 | * |
159 | * start_pfn/end_pfn must be aligned to pageblock_order. |
160 | * Returns 0 on success and -EBUSY if any part of range cannot be isolated. |
161 | */ |
162 | int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
163 | unsigned migratetype, bool skip_hwpoisoned_pages) |
164 | { |
165 | unsigned long pfn; |
166 | unsigned long undo_pfn; |
167 | #ifdef CONFIG_AMLOGIC_CMA |
168 | struct page *page = NULL; /* avoid compile error */ |
169 | #else |
170 | struct page *page; |
171 | #endif /* CONFIG_AMLOGIC_CMA */ |
172 | |
173 | BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); |
174 | BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); |
175 | |
176 | for (pfn = start_pfn; |
177 | pfn < end_pfn; |
178 | pfn += pageblock_nr_pages) { |
179 | page = __first_valid_page(pfn, pageblock_nr_pages); |
180 | if (page && |
181 | set_migratetype_isolate(page, skip_hwpoisoned_pages)) { |
182 | undo_pfn = pfn; |
183 | goto undo; |
184 | } |
185 | } |
186 | #ifdef CONFIG_AMLOGIC_CMA |
187 | if (migratetype == MIGRATE_CMA && page) |
188 | mod_zone_page_state(page_zone(page), NR_CMA_ISOLATED, |
189 | end_pfn - start_pfn); |
190 | #endif /* CONFIG_AMLOGIC_CMA */ |
191 | return 0; |
192 | undo: |
193 | for (pfn = start_pfn; |
194 | pfn < undo_pfn; |
195 | pfn += pageblock_nr_pages) |
196 | unset_migratetype_isolate(pfn_to_page(pfn), migratetype); |
197 | |
198 | return -EBUSY; |
199 | } |
200 | |
201 | /* |
202 | * Make isolated pages available again. |
203 | */ |
204 | int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, |
205 | unsigned migratetype) |
206 | { |
207 | unsigned long pfn; |
208 | #ifdef CONFIG_AMLOGIC_CMA |
209 | struct page *page = NULL; /* avoid compile error */ |
210 | #else |
211 | struct page *page; |
212 | #endif |
213 | |
214 | BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); |
215 | BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); |
216 | |
217 | for (pfn = start_pfn; |
218 | pfn < end_pfn; |
219 | pfn += pageblock_nr_pages) { |
220 | page = __first_valid_page(pfn, pageblock_nr_pages); |
221 | if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
222 | continue; |
223 | unset_migratetype_isolate(page, migratetype); |
224 | } |
225 | #ifdef CONFIG_AMLOGIC_CMA |
226 | if (migratetype == MIGRATE_CMA && page) |
227 | mod_zone_page_state(page_zone(page), NR_CMA_ISOLATED, |
228 | start_pfn - end_pfn); |
229 | #endif /* CONFIG_AMLOGIC_CMA */ |
230 | return 0; |
231 | } |
232 | /* |
233 | * Test all pages in the range is free(means isolated) or not. |
234 | * all pages in [start_pfn...end_pfn) must be in the same zone. |
235 | * zone->lock must be held before call this. |
236 | * |
237 | * Returns the last tested pfn. |
238 | */ |
239 | static unsigned long |
240 | __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, |
241 | bool skip_hwpoisoned_pages) |
242 | { |
243 | struct page *page; |
244 | |
245 | while (pfn < end_pfn) { |
246 | if (!pfn_valid_within(pfn)) { |
247 | pfn++; |
248 | continue; |
249 | } |
250 | page = pfn_to_page(pfn); |
251 | if (PageBuddy(page)) |
252 | /* |
253 | * If the page is on a free list, it has to be on |
254 | * the correct MIGRATE_ISOLATE freelist. There is no |
255 | * simple way to verify that as VM_BUG_ON(), though. |
256 | */ |
257 | pfn += 1 << page_order(page); |
258 | else if (skip_hwpoisoned_pages && PageHWPoison(page)) |
259 | /* A HWPoisoned page cannot be also PageBuddy */ |
260 | pfn++; |
261 | else |
262 | break; |
263 | } |
264 | |
265 | return pfn; |
266 | } |
267 | |
268 | /* Caller should ensure that requested range is in a single zone */ |
269 | int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn, |
270 | bool skip_hwpoisoned_pages) |
271 | { |
272 | unsigned long pfn, flags; |
273 | struct page *page; |
274 | struct zone *zone; |
275 | |
276 | /* |
277 | * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages |
278 | * are not aligned to pageblock_nr_pages. |
279 | * Then we just check migratetype first. |
280 | */ |
281 | for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) { |
282 | page = __first_valid_page(pfn, pageblock_nr_pages); |
283 | if (page && get_pageblock_migratetype(page) != MIGRATE_ISOLATE) |
284 | break; |
285 | } |
286 | page = __first_valid_page(start_pfn, end_pfn - start_pfn); |
287 | if ((pfn < end_pfn) || !page) |
288 | return -EBUSY; |
289 | /* Check all pages are free or marked as ISOLATED */ |
290 | zone = page_zone(page); |
291 | spin_lock_irqsave(&zone->lock, flags); |
292 | pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, |
293 | skip_hwpoisoned_pages); |
294 | spin_unlock_irqrestore(&zone->lock, flags); |
295 | |
296 | trace_test_pages_isolated(start_pfn, end_pfn, pfn); |
297 | |
298 | return pfn < end_pfn ? -EBUSY : 0; |
299 | } |
300 | |
301 | struct page *alloc_migrate_target(struct page *page, unsigned long private, |
302 | int **resultp) |
303 | { |
304 | gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE; |
305 | |
306 | /* |
307 | * TODO: allocate a destination hugepage from a nearest neighbor node, |
308 | * accordance with memory policy of the user process if possible. For |
309 | * now as a simple work-around, we use the next node for destination. |
310 | */ |
311 | if (PageHuge(page)) |
312 | return alloc_huge_page_node(page_hstate(compound_head(page)), |
313 | next_node_in(page_to_nid(page), |
314 | node_online_map)); |
315 | |
316 | if (PageHighMem(page)) |
317 | gfp_mask |= __GFP_HIGHMEM; |
318 | |
319 | return alloc_page(gfp_mask); |
320 | } |
321 |