summaryrefslogtreecommitdiff
path: root/mm/madvise.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/madvise.c')
-rw-r--r--mm/madvise.c80
1 files changed, 48 insertions, 32 deletions
diff --git a/mm/madvise.c b/mm/madvise.c
index 9ff51650f4f0..d03e149ffe6e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -59,6 +59,7 @@ static int madvise_need_mmap_write(int behavior)
case MADV_FREE:
case MADV_POPULATE_READ:
case MADV_POPULATE_WRITE:
+ case MADV_COLLAPSE:
return 0;
default:
/* be safe, default to 1. list exceptions explicitly */
@@ -600,6 +601,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
struct vm_area_struct *vma = walk->vma;
spinlock_t *ptl;
pte_t *orig_pte, *pte, ptent;
+ struct folio *folio;
struct page *page;
int nr_swap = 0;
unsigned long next;
@@ -644,56 +646,56 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
page = vm_normal_page(vma, addr, ptent);
if (!page || is_zone_device_page(page))
continue;
+ folio = page_folio(page);
/*
- * If pmd isn't transhuge but the page is THP and
+ * If pmd isn't transhuge but the folio is large and
* is owned by only this process, split it and
* deactivate all pages.
*/
- if (PageTransCompound(page)) {
- if (page_mapcount(page) != 1)
+ if (folio_test_large(folio)) {
+ if (folio_mapcount(folio) != 1)
goto out;
- get_page(page);
- if (!trylock_page(page)) {
- put_page(page);
+ folio_get(folio);
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
goto out;
}
pte_unmap_unlock(orig_pte, ptl);
- if (split_huge_page(page)) {
- unlock_page(page);
- put_page(page);
+ if (split_folio(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
orig_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
goto out;
}
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
pte--;
addr -= PAGE_SIZE;
continue;
}
- VM_BUG_ON_PAGE(PageTransCompound(page), page);
-
- if (PageSwapCache(page) || PageDirty(page)) {
- if (!trylock_page(page))
+ if (folio_test_swapcache(folio) || folio_test_dirty(folio)) {
+ if (!folio_trylock(folio))
continue;
/*
- * If page is shared with others, we couldn't clear
- * PG_dirty of the page.
+ * If folio is shared with others, we mustn't clear
+ * the folio's dirty flag.
*/
- if (page_mapcount(page) != 1) {
- unlock_page(page);
+ if (folio_mapcount(folio) != 1) {
+ folio_unlock(folio);
continue;
}
- if (PageSwapCache(page) && !try_to_free_swap(page)) {
- unlock_page(page);
+ if (folio_test_swapcache(folio) &&
+ !folio_free_swap(folio)) {
+ folio_unlock(folio);
continue;
}
- ClearPageDirty(page);
- unlock_page(page);
+ folio_clear_dirty(folio);
+ folio_unlock(folio);
}
if (pte_young(ptent) || pte_dirty(ptent)) {
@@ -711,7 +713,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
set_pte_at(mm, addr, pte, ptent);
tlb_remove_tlb_entry(tlb, pte, addr);
}
- mark_page_lazyfree(page);
+ mark_page_lazyfree(&folio->page);
}
out:
if (nr_swap) {
@@ -770,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
* Application no longer needs these pages. If the pages are dirty,
* it's OK to just throw them away. The app will be more careful about
* data it wants to keep. Be sure to free swap resources too. The
- * zap_page_range call sets things up for shrink_active_list to actually free
- * these pages later if no one else has touched them in the meantime,
+ * zap_page_range_single call sets things up for shrink_active_list to actually
+ * free these pages later if no one else has touched them in the meantime,
* although we could add these pages to a global reuse list for
* shrink_active_list to pick up before reclaiming other pages.
*
@@ -788,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
- zap_page_range(vma, start, end - start);
+ zap_page_range_single(vma, start, end - start, NULL);
return 0;
}
@@ -811,7 +813,14 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
if (start & ~huge_page_mask(hstate_vma(vma)))
return false;
- *end = ALIGN(*end, huge_page_size(hstate_vma(vma)));
+ /*
+ * Madvise callers expect the length to be rounded up to PAGE_SIZE
+ * boundaries, and may be unaware that this VMA uses huge pages.
+ * Avoid unexpected data loss by rounding down the number of
+ * huge pages freed.
+ */
+ *end = ALIGN_DOWN(*end, huge_page_size(hstate_vma(vma)));
+
return true;
}
@@ -826,6 +835,9 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
return -EINVAL;
+ if (start == end)
+ return 0;
+
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
@@ -1060,6 +1072,8 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
if (error)
goto out;
break;
+ case MADV_COLLAPSE:
+ return madvise_collapse(vma, prev, start, end);
}
anon_name = anon_vma_name(vma);
@@ -1153,6 +1167,7 @@ madvise_behavior_valid(int behavior)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
+ case MADV_COLLAPSE:
#endif
case MADV_DONTDUMP:
case MADV_DODUMP:
@@ -1169,13 +1184,13 @@ madvise_behavior_valid(int behavior)
}
}
-static bool
-process_madvise_behavior_valid(int behavior)
+static bool process_madvise_behavior_valid(int behavior)
{
switch (behavior) {
case MADV_COLD:
case MADV_PAGEOUT:
case MADV_WILLNEED:
+ case MADV_COLLAPSE:
return true;
default:
return false;
@@ -1241,7 +1256,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
if (start >= end)
break;
if (prev)
- vma = prev->vm_next;
+ vma = find_vma(mm, prev->vm_end);
else /* madvise_remove dropped mmap_lock */
vma = find_vma(mm, start);
}
@@ -1342,6 +1357,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
* MADV_NOHUGEPAGE - mark the given range as not worth being backed by
* transparent huge pages so the existing pages will not be
* coalesced into THP and new pages will not be allocated as THP.
+ * MADV_COLLAPSE - synchronously coalesce pages into new THP.
* MADV_DONTDUMP - the application wants to prevent pages in the given range
* from being included in its core dump.
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
@@ -1443,7 +1459,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
goto out;
}
- ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
+ ret = import_iovec(ITER_DEST, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
if (ret < 0)
goto out;