blob: bc6d9a18b9ab9a67256cbab960e76a7f7ea1424c
1 | /* internal.h: mm/ internal definitions |
2 | * |
3 | * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. |
4 | * Written by David Howells (dhowells@redhat.com) |
5 | * |
6 | * This program is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU General Public License |
8 | * as published by the Free Software Foundation; either version |
9 | * 2 of the License, or (at your option) any later version. |
10 | */ |
11 | #ifndef __MM_INTERNAL_H |
12 | #define __MM_INTERNAL_H |
13 | |
14 | #include <linux/fs.h> |
15 | #include <linux/mm.h> |
16 | #include <linux/pagemap.h> |
17 | #include <linux/tracepoint-defs.h> |
18 | |
19 | /* |
20 | * The set of flags that only affect watermark checking and reclaim |
21 | * behaviour. This is used by the MM to obey the caller constraints |
22 | * about IO, FS and watermark checking while ignoring placement |
23 | * hints such as HIGHMEM usage. |
24 | */ |
25 | #define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ |
26 | __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ |
27 | __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC|\ |
28 | __GFP_ATOMIC) |
29 | |
30 | /* The GFP flags allowed during early boot */ |
31 | #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) |
32 | |
33 | /* Control allocation cpuset and node placement constraints */ |
34 | #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) |
35 | |
36 | /* Do not use these with a slab allocator */ |
37 | #define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) |
38 | |
39 | void page_writeback_init(void); |
40 | |
41 | int do_swap_page(struct fault_env *fe, pte_t orig_pte); |
42 | |
43 | void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma, |
44 | unsigned long floor, unsigned long ceiling); |
45 | |
46 | void unmap_page_range(struct mmu_gather *tlb, |
47 | struct vm_area_struct *vma, |
48 | unsigned long addr, unsigned long end, |
49 | struct zap_details *details); |
50 | |
51 | extern int __do_page_cache_readahead(struct address_space *mapping, |
52 | struct file *filp, pgoff_t offset, unsigned long nr_to_read, |
53 | unsigned long lookahead_size); |
54 | |
55 | /* |
56 | * Submit IO for the read-ahead request in file_ra_state. |
57 | */ |
58 | static inline unsigned long ra_submit(struct file_ra_state *ra, |
59 | struct address_space *mapping, struct file *filp) |
60 | { |
61 | return __do_page_cache_readahead(mapping, filp, |
62 | ra->start, ra->size, ra->async_size); |
63 | } |
64 | |
65 | /* |
66 | * Turn a non-refcounted page (->_refcount == 0) into refcounted with |
67 | * a count of one. |
68 | */ |
69 | static inline void set_page_refcounted(struct page *page) |
70 | { |
71 | VM_BUG_ON_PAGE(PageTail(page), page); |
72 | VM_BUG_ON_PAGE(page_ref_count(page), page); |
73 | set_page_count(page, 1); |
74 | } |
75 | |
76 | extern unsigned long highest_memmap_pfn; |
77 | |
78 | /* |
79 | * Maximum number of reclaim retries without progress before the OOM |
80 | * killer is consider the only way forward. |
81 | */ |
82 | #define MAX_RECLAIM_RETRIES 16 |
83 | |
84 | /* |
85 | * in mm/vmscan.c: |
86 | */ |
87 | extern int isolate_lru_page(struct page *page); |
88 | extern void putback_lru_page(struct page *page); |
89 | extern bool pgdat_reclaimable(struct pglist_data *pgdat); |
90 | |
91 | /* |
92 | * in mm/rmap.c: |
93 | */ |
94 | extern pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address); |
95 | |
96 | /* |
97 | * in mm/page_alloc.c |
98 | */ |
99 | |
100 | /* |
101 | * Structure for holding the mostly immutable allocation parameters passed |
102 | * between functions involved in allocations, including the alloc_pages* |
103 | * family of functions. |
104 | * |
105 | * nodemask, migratetype and high_zoneidx are initialized only once in |
106 | * __alloc_pages_nodemask() and then never change. |
107 | * |
108 | * zonelist, preferred_zone and classzone_idx are set first in |
109 | * __alloc_pages_nodemask() for the fast path, and might be later changed |
110 | * in __alloc_pages_slowpath(). All other functions pass the whole strucure |
111 | * by a const pointer. |
112 | */ |
113 | struct alloc_context { |
114 | struct zonelist *zonelist; |
115 | nodemask_t *nodemask; |
116 | struct zoneref *preferred_zoneref; |
117 | int migratetype; |
118 | enum zone_type high_zoneidx; |
119 | bool spread_dirty_pages; |
120 | }; |
121 | |
122 | #define ac_classzone_idx(ac) zonelist_zone_idx(ac->preferred_zoneref) |
123 | |
124 | /* |
125 | * Locate the struct page for both the matching buddy in our |
126 | * pair (buddy1) and the combined O(n+1) page they form (page). |
127 | * |
128 | * 1) Any buddy B1 will have an order O twin B2 which satisfies |
129 | * the following equation: |
130 | * B2 = B1 ^ (1 << O) |
131 | * For example, if the starting buddy (buddy2) is #8 its order |
132 | * 1 buddy is #10: |
133 | * B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10 |
134 | * |
135 | * 2) Any buddy B will have an order O+1 parent P which |
136 | * satisfies the following equation: |
137 | * P = B & ~(1 << O) |
138 | * |
139 | * Assumption: *_mem_map is contiguous at least up to MAX_ORDER |
140 | */ |
141 | static inline unsigned long |
142 | __find_buddy_index(unsigned long page_idx, unsigned int order) |
143 | { |
144 | return page_idx ^ (1 << order); |
145 | } |
146 | |
147 | extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn, |
148 | unsigned long end_pfn, struct zone *zone); |
149 | |
150 | static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn, |
151 | unsigned long end_pfn, struct zone *zone) |
152 | { |
153 | if (zone->contiguous) |
154 | return pfn_to_page(start_pfn); |
155 | |
156 | return __pageblock_pfn_to_page(start_pfn, end_pfn, zone); |
157 | } |
158 | |
159 | extern int __isolate_free_page(struct page *page, unsigned int order); |
160 | extern void __free_pages_bootmem(struct page *page, unsigned long pfn, |
161 | unsigned int order); |
162 | extern void prep_compound_page(struct page *page, unsigned int order); |
163 | extern void post_alloc_hook(struct page *page, unsigned int order, |
164 | gfp_t gfp_flags); |
165 | extern int user_min_free_kbytes; |
166 | |
167 | #if defined CONFIG_COMPACTION || defined CONFIG_CMA |
168 | |
169 | #ifdef CONFIG_AMLOGIC_CMA |
170 | #include <linux/amlogic/aml_cma.h> |
171 | #else |
172 | /* |
173 | * in mm/compaction.c |
174 | */ |
175 | /* |
176 | * compact_control is used to track pages being migrated and the free pages |
177 | * they are being migrated to during memory compaction. The free_pfn starts |
178 | * at the end of a zone and migrate_pfn begins at the start. Movable pages |
179 | * are moved to the end of a zone during a compaction run and the run |
180 | * completes when free_pfn <= migrate_pfn |
181 | */ |
182 | struct compact_control { |
183 | struct list_head freepages; /* List of free pages to migrate to */ |
184 | struct list_head migratepages; /* List of pages being migrated */ |
185 | unsigned long nr_freepages; /* Number of isolated free pages */ |
186 | unsigned long nr_migratepages; /* Number of pages to migrate */ |
187 | unsigned long free_pfn; /* isolate_freepages search base */ |
188 | unsigned long migrate_pfn; /* isolate_migratepages search base */ |
189 | unsigned long last_migrated_pfn;/* Not yet flushed page being freed */ |
190 | enum migrate_mode mode; /* Async or sync migration mode */ |
191 | bool ignore_skip_hint; /* Scan blocks even if marked skip */ |
192 | bool ignore_block_suitable; /* Scan blocks considered unsuitable */ |
193 | bool direct_compaction; /* False from kcompactd or /proc/... */ |
194 | bool whole_zone; /* Whole zone should/has been scanned */ |
195 | int order; /* order a direct compactor needs */ |
196 | const gfp_t gfp_mask; /* gfp mask of a direct compactor */ |
197 | const unsigned int alloc_flags; /* alloc flags of a direct compactor */ |
198 | const int classzone_idx; /* zone index of a direct compactor */ |
199 | struct zone *zone; |
200 | bool contended; /* Signal lock or sched contention */ |
201 | }; |
202 | |
203 | unsigned long |
204 | isolate_freepages_range(struct compact_control *cc, |
205 | unsigned long start_pfn, unsigned long end_pfn); |
206 | unsigned long |
207 | isolate_migratepages_range(struct compact_control *cc, |
208 | unsigned long low_pfn, unsigned long end_pfn); |
209 | #endif /* CONFIG_AMLOGIC_CMA */ |
210 | int find_suitable_fallback(struct free_area *area, unsigned int order, |
211 | int migratetype, bool only_stealable, bool *can_steal); |
212 | |
213 | #endif |
214 | |
215 | /* |
216 | * This function returns the order of a free page in the buddy system. In |
217 | * general, page_zone(page)->lock must be held by the caller to prevent the |
218 | * page from being allocated in parallel and returning garbage as the order. |
219 | * If a caller does not hold page_zone(page)->lock, it must guarantee that the |
220 | * page cannot be allocated or merged in parallel. Alternatively, it must |
221 | * handle invalid values gracefully, and use page_order_unsafe() below. |
222 | */ |
223 | static inline unsigned int page_order(struct page *page) |
224 | { |
225 | /* PageBuddy() must be checked by the caller */ |
226 | return page_private(page); |
227 | } |
228 | |
229 | /* |
230 | * Like page_order(), but for callers who cannot afford to hold the zone lock. |
231 | * PageBuddy() should be checked first by the caller to minimize race window, |
232 | * and invalid values must be handled gracefully. |
233 | * |
234 | * READ_ONCE is used so that if the caller assigns the result into a local |
235 | * variable and e.g. tests it for valid range before using, the compiler cannot |
236 | * decide to remove the variable and inline the page_private(page) multiple |
237 | * times, potentially observing different values in the tests and the actual |
238 | * use of the result. |
239 | */ |
240 | #define page_order_unsafe(page) READ_ONCE(page_private(page)) |
241 | |
242 | static inline bool is_cow_mapping(vm_flags_t flags) |
243 | { |
244 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
245 | } |
246 | |
247 | /* |
248 | * These three helpers classifies VMAs for virtual memory accounting. |
249 | */ |
250 | |
251 | /* |
252 | * Executable code area - executable, not writable, not stack |
253 | */ |
254 | static inline bool is_exec_mapping(vm_flags_t flags) |
255 | { |
256 | return (flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC; |
257 | } |
258 | |
259 | /* |
260 | * Stack area - atomatically grows in one direction |
261 | * |
262 | * VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous: |
263 | * do_mmap() forbids all other combinations. |
264 | */ |
265 | static inline bool is_stack_mapping(vm_flags_t flags) |
266 | { |
267 | return (flags & VM_STACK) == VM_STACK; |
268 | } |
269 | |
270 | /* |
271 | * Data area - private, writable, not stack |
272 | */ |
273 | static inline bool is_data_mapping(vm_flags_t flags) |
274 | { |
275 | return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE; |
276 | } |
277 | |
278 | /* mm/util.c */ |
279 | void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, |
280 | struct vm_area_struct *prev, struct rb_node *rb_parent); |
281 | |
282 | #ifdef CONFIG_MMU |
283 | extern long populate_vma_page_range(struct vm_area_struct *vma, |
284 | unsigned long start, unsigned long end, int *nonblocking); |
285 | extern void munlock_vma_pages_range(struct vm_area_struct *vma, |
286 | unsigned long start, unsigned long end); |
287 | static inline void munlock_vma_pages_all(struct vm_area_struct *vma) |
288 | { |
289 | munlock_vma_pages_range(vma, vma->vm_start, vma->vm_end); |
290 | } |
291 | |
292 | /* |
293 | * must be called with vma's mmap_sem held for read or write, and page locked. |
294 | */ |
295 | extern void mlock_vma_page(struct page *page); |
296 | extern unsigned int munlock_vma_page(struct page *page); |
297 | |
298 | /* |
299 | * Clear the page's PageMlocked(). This can be useful in a situation where |
300 | * we want to unconditionally remove a page from the pagecache -- e.g., |
301 | * on truncation or freeing. |
302 | * |
303 | * It is legal to call this function for any page, mlocked or not. |
304 | * If called for a page that is still mapped by mlocked vmas, all we do |
305 | * is revert to lazy LRU behaviour -- semantics are not broken. |
306 | */ |
307 | extern void clear_page_mlock(struct page *page); |
308 | |
309 | /* |
310 | * mlock_migrate_page - called only from migrate_misplaced_transhuge_page() |
311 | * (because that does not go through the full procedure of migration ptes): |
312 | * to migrate the Mlocked page flag; update statistics. |
313 | */ |
314 | static inline void mlock_migrate_page(struct page *newpage, struct page *page) |
315 | { |
316 | if (TestClearPageMlocked(page)) { |
317 | int nr_pages = hpage_nr_pages(page); |
318 | |
319 | /* Holding pmd lock, no change in irq context: __mod is safe */ |
320 | __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages); |
321 | SetPageMlocked(newpage); |
322 | __mod_zone_page_state(page_zone(newpage), NR_MLOCK, nr_pages); |
323 | } |
324 | } |
325 | |
326 | extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); |
327 | |
328 | /* |
329 | * At what user virtual address is page expected in @vma? |
330 | */ |
331 | static inline unsigned long |
332 | __vma_address(struct page *page, struct vm_area_struct *vma) |
333 | { |
334 | pgoff_t pgoff = page_to_pgoff(page); |
335 | return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); |
336 | } |
337 | |
338 | static inline unsigned long |
339 | vma_address(struct page *page, struct vm_area_struct *vma) |
340 | { |
341 | unsigned long address = __vma_address(page, vma); |
342 | |
343 | /* page should be within @vma mapping range */ |
344 | VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); |
345 | |
346 | return address; |
347 | } |
348 | |
349 | #else /* !CONFIG_MMU */ |
350 | static inline void clear_page_mlock(struct page *page) { } |
351 | static inline void mlock_vma_page(struct page *page) { } |
352 | static inline void mlock_migrate_page(struct page *new, struct page *old) { } |
353 | |
354 | #endif /* !CONFIG_MMU */ |
355 | |
356 | /* |
357 | * Return the mem_map entry representing the 'offset' subpage within |
358 | * the maximally aligned gigantic page 'base'. Handle any discontiguity |
359 | * in the mem_map at MAX_ORDER_NR_PAGES boundaries. |
360 | */ |
361 | static inline struct page *mem_map_offset(struct page *base, int offset) |
362 | { |
363 | if (unlikely(offset >= MAX_ORDER_NR_PAGES)) |
364 | return nth_page(base, offset); |
365 | return base + offset; |
366 | } |
367 | |
368 | /* |
369 | * Iterator over all subpages within the maximally aligned gigantic |
370 | * page 'base'. Handle any discontiguity in the mem_map. |
371 | */ |
372 | static inline struct page *mem_map_next(struct page *iter, |
373 | struct page *base, int offset) |
374 | { |
375 | if (unlikely((offset & (MAX_ORDER_NR_PAGES - 1)) == 0)) { |
376 | unsigned long pfn = page_to_pfn(base) + offset; |
377 | if (!pfn_valid(pfn)) |
378 | return NULL; |
379 | return pfn_to_page(pfn); |
380 | } |
381 | return iter + 1; |
382 | } |
383 | |
384 | /* |
385 | * FLATMEM and DISCONTIGMEM configurations use alloc_bootmem_node, |
386 | * so all functions starting at paging_init should be marked __init |
387 | * in those cases. SPARSEMEM, however, allows for memory hotplug, |
388 | * and alloc_bootmem_node is not used. |
389 | */ |
390 | #ifdef CONFIG_SPARSEMEM |
391 | #define __paginginit __meminit |
392 | #else |
393 | #define __paginginit __init |
394 | #endif |
395 | |
396 | /* Memory initialisation debug and verification */ |
397 | enum mminit_level { |
398 | MMINIT_WARNING, |
399 | MMINIT_VERIFY, |
400 | MMINIT_TRACE |
401 | }; |
402 | |
403 | #ifdef CONFIG_DEBUG_MEMORY_INIT |
404 | |
405 | extern int mminit_loglevel; |
406 | |
407 | #define mminit_dprintk(level, prefix, fmt, arg...) \ |
408 | do { \ |
409 | if (level < mminit_loglevel) { \ |
410 | if (level <= MMINIT_WARNING) \ |
411 | pr_warn("mminit::" prefix " " fmt, ##arg); \ |
412 | else \ |
413 | printk(KERN_DEBUG "mminit::" prefix " " fmt, ##arg); \ |
414 | } \ |
415 | } while (0) |
416 | |
417 | extern void mminit_verify_pageflags_layout(void); |
418 | extern void mminit_verify_zonelist(void); |
419 | #else |
420 | |
421 | static inline void mminit_dprintk(enum mminit_level level, |
422 | const char *prefix, const char *fmt, ...) |
423 | { |
424 | } |
425 | |
426 | static inline void mminit_verify_pageflags_layout(void) |
427 | { |
428 | } |
429 | |
430 | static inline void mminit_verify_zonelist(void) |
431 | { |
432 | } |
433 | #endif /* CONFIG_DEBUG_MEMORY_INIT */ |
434 | |
435 | /* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */ |
436 | #if defined(CONFIG_SPARSEMEM) |
437 | extern void mminit_validate_memmodel_limits(unsigned long *start_pfn, |
438 | unsigned long *end_pfn); |
439 | #else |
440 | static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, |
441 | unsigned long *end_pfn) |
442 | { |
443 | } |
444 | #endif /* CONFIG_SPARSEMEM */ |
445 | |
446 | #define NODE_RECLAIM_NOSCAN -2 |
447 | #define NODE_RECLAIM_FULL -1 |
448 | #define NODE_RECLAIM_SOME 0 |
449 | #define NODE_RECLAIM_SUCCESS 1 |
450 | |
451 | extern int hwpoison_filter(struct page *p); |
452 | |
453 | extern u32 hwpoison_filter_dev_major; |
454 | extern u32 hwpoison_filter_dev_minor; |
455 | extern u64 hwpoison_filter_flags_mask; |
456 | extern u64 hwpoison_filter_flags_value; |
457 | extern u64 hwpoison_filter_memcg; |
458 | extern u32 hwpoison_filter_enable; |
459 | |
460 | extern unsigned long __must_check vm_mmap_pgoff(struct file *, unsigned long, |
461 | unsigned long, unsigned long, |
462 | unsigned long, unsigned long); |
463 | |
464 | extern void set_pageblock_order(void); |
465 | unsigned long reclaim_clean_pages_from_list(struct zone *zone, |
466 | struct list_head *page_list); |
467 | /* The ALLOC_WMARK bits are used as an index to zone->watermark */ |
468 | #define ALLOC_WMARK_MIN WMARK_MIN |
469 | #define ALLOC_WMARK_LOW WMARK_LOW |
470 | #define ALLOC_WMARK_HIGH WMARK_HIGH |
471 | #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */ |
472 | |
473 | /* Mask to get the watermark bits */ |
474 | #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1) |
475 | |
476 | #define ALLOC_HARDER 0x10 /* try to alloc harder */ |
477 | #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ |
478 | #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ |
479 | #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ |
480 | |
481 | enum ttu_flags; |
482 | struct tlbflush_unmap_batch; |
483 | |
484 | #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH |
485 | void try_to_unmap_flush(void); |
486 | void try_to_unmap_flush_dirty(void); |
487 | void flush_tlb_batched_pending(struct mm_struct *mm); |
488 | #else |
489 | static inline void try_to_unmap_flush(void) |
490 | { |
491 | } |
492 | static inline void try_to_unmap_flush_dirty(void) |
493 | { |
494 | } |
495 | static inline void flush_tlb_batched_pending(struct mm_struct *mm) |
496 | { |
497 | } |
498 | #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ |
499 | |
500 | extern const struct trace_print_flags pageflag_names[]; |
501 | extern const struct trace_print_flags vmaflag_names[]; |
502 | extern const struct trace_print_flags gfpflag_names[]; |
503 | |
504 | #endif /* __MM_INTERNAL_H */ |
505 |