Revert "Merge remote branch 'hwpoison/hwpoison'"

This reverts commit 4d55136a4ba430a172d9ae2c8520021976950d46, reversing changes made to 7a574be37ed9b93f5cba4583095bf88359c54c5e.
author: Stephen Rothwell <sfr@canb.auug.org.au> 2010-09-27 15:51:55 +1000
committer: Stephen Rothwell <sfr@canb.auug.org.au> 2010-09-27 15:51:55 +1000
commit: e0742ccf517f272c4617f4f871db52008e7ae359 (patch)
tree: 618fd4cc833248c4a8cf6e4e90d7deb256e111d5
parent: a4dbdfeb44002da48f505611e48b5a8799100931 (diff)
8 files changed, 116 insertions, 515 deletions
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index cc96ee2666f2..ccd951fa94ee 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -478,7 +478,7 @@ static void prepare_hwpoison_fd(void)
 	}
 
 	if (opt_unpoison && !hwpoison_forget_fd) {
-		sprintf(buf, "%s/unpoison-pfn", hwpoison_debug_fs);
+		sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs);
 		hwpoison_forget_fd = checked_open(buf, O_WRONLY);
 	}
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a14328d270e8..113eba3d3c38 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,7 +31,6 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/magic.h>
-#include <linux/migrate.h>
 
 #include <asm/uaccess.h>
 
@@ -574,19 +573,6 @@ static int hugetlbfs_set_page_dirty(struct page *page)
 	return 0;
 }
 
-static int hugetlbfs_migrate_page(struct address_space *mapping,
-				struct page *newpage, struct page *page)
-{
-	int rc;
-
-	rc = migrate_huge_page_move_mapping(mapping, newpage, page);
-	if (rc)
-		return rc;
-	migrate_page_copy(newpage, page);
-
-	return 0;
-}
-
 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -673,7 +659,6 @@ static const struct address_space_operations hugetlbfs_aops = {
 	.write_begin	= hugetlbfs_write_begin,
 	.write_end	= hugetlbfs_write_end,
 	.set_page_dirty	= hugetlbfs_set_page_dirty,
-	.migratepage    = hugetlbfs_migrate_page,
 };
 
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 796f30e00806..f479700df61b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -43,8 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
 						struct vm_area_struct *vma,
 						int acctflags);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
-int dequeue_hwpoisoned_huge_page(struct page *page);
-void copy_huge_page(struct page *dst, struct page *src);
+void __isolate_hwpoisoned_huge_page(struct page *page);
 
 extern unsigned long hugepages_treat_as_movable;
 extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -102,10 +101,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
 #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address)	0
-#define dequeue_hwpoisoned_huge_page(page)	0
-static inline void copy_huge_page(struct page *dst, struct page *src)
-{
-}
+#define __isolate_hwpoisoned_huge_page(page)	0
 
 #define hugetlb_change_protection(vma, address, end, newprot)
 
@@ -232,8 +228,6 @@ struct huge_bootmem_page {
 	struct hstate *hstate;
 };
 
-struct page *alloc_huge_page_node(struct hstate *h, int nid);
-
 /* arch callback */
 int __init alloc_bootmem_huge_page(struct hstate *h);
 
@@ -309,7 +303,6 @@ static inline struct hstate *page_hstate(struct page *page)
 
 #else
 struct hstate {};
-#define alloc_huge_page_node(h, nid) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_vma(v) NULL
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3c1941e40e61..7238231b8dd4 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -14,8 +14,6 @@ extern int migrate_page(struct address_space *,
 			struct page *, struct page *);
 extern int migrate_pages(struct list_head *l, new_page_t x,
 			unsigned long private, int offlining);
-extern int migrate_huge_pages(struct list_head *l, new_page_t x,
-			unsigned long private, int offlining);
 
 extern int fail_migrate_page(struct address_space *,
 			struct page *, struct page *);
@@ -25,17 +23,12 @@ extern int migrate_prep_local(void);
 extern int migrate_vmas(struct mm_struct *mm,
 		const nodemask_t *from, const nodemask_t *to,
 		unsigned long flags);
-extern void migrate_page_copy(struct page *newpage, struct page *page);
-extern int migrate_huge_page_move_mapping(struct address_space *mapping,
-				  struct page *newpage, struct page *page);
 #else
 #define PAGE_MIGRATION 0
 
 static inline void putback_lru_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
 		unsigned long private, int offlining) { return -ENOSYS; }
-static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
-		unsigned long private, int offlining) { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
@@ -47,15 +40,6 @@ static inline int migrate_vmas(struct mm_struct *mm,
 	return -ENOSYS;
 }
 
-static inline void migrate_page_copy(struct page *newpage,
-				     struct page *page) {}
-
-extern int migrate_huge_page_move_mapping(struct address_space *mapping,
-				  struct page *newpage, struct page *page)
-{
-	return -ENOSYS;
-}
-
 /* Possible settings for the migrate_page() method in address_operations */
 #define migrate_page NULL
 #define fail_migrate_page NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7123270bfb38..c03273807182 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -423,14 +423,14 @@ static void clear_huge_page(struct page *page,
 	}
 }
 
-static void copy_user_gigantic_page(struct page *dst, struct page *src,
+static void copy_gigantic_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
 	struct hstate *h = hstate_vma(vma);
 	struct page *dst_base = dst;
 	struct page *src_base = src;
-
+	might_sleep();
 	for (i = 0; i < pages_per_huge_page(h); ) {
 		cond_resched();
 		copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma);
@@ -440,15 +440,14 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
 		src = mem_map_next(src, src_base, i);
 	}
 }
-
-static void copy_user_huge_page(struct page *dst, struct page *src,
+static void copy_huge_page(struct page *dst, struct page *src,
 			   unsigned long addr, struct vm_area_struct *vma)
 {
 	int i;
 	struct hstate *h = hstate_vma(vma);
 
 	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-		copy_user_gigantic_page(dst, src, addr, vma);
+		copy_gigantic_page(dst, src, addr, vma);
 		return;
 	}
 
@@ -459,40 +458,6 @@ static void copy_user_huge_page(struct page *dst, struct page *src,
 	}
 }
 
-static void copy_gigantic_page(struct page *dst, struct page *src)
-{
-	int i;
-	struct hstate *h = page_hstate(src);
-	struct page *dst_base = dst;
-	struct page *src_base = src;
-
-	for (i = 0; i < pages_per_huge_page(h); ) {
-		cond_resched();
-		copy_highpage(dst, src);
-
-		i++;
-		dst = mem_map_next(dst, dst_base, i);
-		src = mem_map_next(src, src_base, i);
-	}
-}
-
-void copy_huge_page(struct page *dst, struct page *src)
-{
-	int i;
-	struct hstate *h = page_hstate(src);
-
-	if (unlikely(pages_per_huge_page(h) > MAX_ORDER_NR_PAGES)) {
-		copy_gigantic_page(dst, src);
-		return;
-	}
-
-	might_sleep();
-	for (i = 0; i < pages_per_huge_page(h); i++) {
-		cond_resched();
-		copy_highpage(dst + i, src + i);
-	}
-}
-
 static void enqueue_huge_page(struct hstate *h, struct page *page)
 {
 	int nid = page_to_nid(page);
@@ -501,24 +466,11 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
-{
-	struct page *page;
-
-	if (list_empty(&h->hugepage_freelists[nid]))
-		return NULL;
-	page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
-	list_del(&page->lru);
-	set_page_refcounted(page);
-	h->free_huge_pages--;
-	h->free_huge_pages_node[nid]--;
-	return page;
-}
-
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
 {
+	int nid;
 	struct page *page = NULL;
 	struct mempolicy *mpol;
 	nodemask_t *nodemask;
@@ -544,13 +496,19 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
-			page = dequeue_huge_page_node(h, zone_to_nid(zone));
-			if (page) {
-				if (!avoid_reserve)
-					decrement_hugepage_resv_vma(h, vma);
-				break;
-			}
+		nid = zone_to_nid(zone);
+		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
+		    !list_empty(&h->hugepage_freelists[nid])) {
+			page = list_entry(h->hugepage_freelists[nid].next,
+					  struct page, lru);
+			list_del(&page->lru);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[nid]--;
+
+			if (!avoid_reserve)
+				decrement_hugepage_resv_vma(h, vma);
+
+			break;
 		}
 	}
 err:
@@ -812,10 +770,11 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 	return ret;
 }
 
-static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
+static struct page *alloc_buddy_huge_page(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
 {
 	struct page *page;
-	unsigned int r_nid;
+	unsigned int nid;
 
 	if (h->order >= MAX_ORDER)
 		return NULL;
@@ -853,14 +812,9 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 	}
 	spin_unlock(&hugetlb_lock);
 
-	if (nid == NUMA_NO_NODE)
-		page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
-				   __GFP_REPEAT|__GFP_NOWARN,
-				   huge_page_order(h));
-	else
-		page = alloc_pages_exact_node(nid,
-			htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
-			__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
+	page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+					__GFP_REPEAT|__GFP_NOWARN,
+					huge_page_order(h));
 
 	if (page && arch_prepare_hugepage(page)) {
 		__free_pages(page, huge_page_order(h));
@@ -869,13 +823,19 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 
 	spin_lock(&hugetlb_lock);
 	if (page) {
-		r_nid = page_to_nid(page);
+		/*
+		 * This page is now managed by the hugetlb allocator and has
+		 * no users -- drop the buddy allocator's reference.
+		 */
+		put_page_testzero(page);
+		VM_BUG_ON(page_count(page));
+		nid = page_to_nid(page);
 		set_compound_page_dtor(page, free_huge_page);
 		/*
 		 * We incremented the global counters already
 		 */
-		h->nr_huge_pages_node[r_nid]++;
-		h->surplus_huge_pages_node[r_nid]++;
+		h->nr_huge_pages_node[nid]++;
+		h->surplus_huge_pages_node[nid]++;
 		__count_vm_event(HTLB_BUDDY_PGALLOC);
 	} else {
 		h->nr_huge_pages--;
@@ -888,25 +848,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 }
 
 /*
- * This allocation function is useful in the context where vma is irrelevant.
- * E.g. soft-offlining uses this function because it only cares physical
- * address of error page.
- */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
-{
-	struct page *page;
-
-	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page_node(h, nid);
-	spin_unlock(&hugetlb_lock);
-
-	if (!page)
-		page = alloc_buddy_huge_page(h, nid);
-
-	return page;
-}
-
-/*
  * Increase the hugetlb pool such that it can accomodate a reservation
  * of size 'delta'.
  */
@@ -930,14 +871,17 @@ static int gather_surplus_pages(struct hstate *h, int delta)
 retry:
 	spin_unlock(&hugetlb_lock);
 	for (i = 0; i < needed; i++) {
-		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
-		if (!page)
+		page = alloc_buddy_huge_page(h, NULL, 0);
+		if (!page) {
 			/*
 			 * We were not able to allocate enough pages to
 			 * satisfy the entire reservation so we free what
 			 * we've allocated so far.
 			 */
+			spin_lock(&hugetlb_lock);
+			needed = 0;
 			goto free;
+		}
 
 		list_add(&page->lru, &surplus_list);
 	}
@@ -964,31 +908,31 @@ retry:
 	needed += allocated;
 	h->resv_huge_pages += delta;
 	ret = 0;
-
-	spin_unlock(&hugetlb_lock);
+free:
 	/* Free the needed pages to the hugetlb pool */
 	list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 		if ((--needed) < 0)
 			break;
 		list_del(&page->lru);
-		/*
-		 * This page is now managed by the hugetlb allocator and has
-		 * no users -- drop the buddy allocator's reference.
-		 */
-		put_page_testzero(page);
-		VM_BUG_ON(page_count(page));
 		enqueue_huge_page(h, page);
 	}
 
 	/* Free unnecessary surplus pages to the buddy allocator */
-free:
 	if (!list_empty(&surplus_list)) {
+		spin_unlock(&hugetlb_lock);
 		list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
 			list_del(&page->lru);
-			put_page(page);
+			/*
+			 * The page has a reference count of zero already, so
+			 * call free_huge_page directly instead of using
+			 * put_page.  This must be done with hugetlb_lock
+			 * unlocked which is safe because free_huge_page takes
+			 * hugetlb_lock before deciding how to free the page.
+			 */
+			free_huge_page(page);
 		}
+		spin_lock(&hugetlb_lock);
 	}
-	spin_lock(&hugetlb_lock);
 
 	return ret;
 }
@@ -1108,13 +1052,14 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
 	spin_unlock(&hugetlb_lock);
 
 	if (!page) {
-		page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+		page = alloc_buddy_huge_page(h, vma, addr);
 		if (!page) {
 			hugetlb_put_quota(inode->i_mapping, chg);
 			return ERR_PTR(-VM_FAULT_SIGBUS);
 		}
 	}
 
+	set_page_refcounted(page);
 	set_page_private(page, (unsigned long) mapping);
 
 	vma_commit_reservation(h, vma, addr);
@@ -2208,19 +2153,6 @@ nomem:
 	return -ENOMEM;
 }
 
-static int is_hugetlb_entry_migration(pte_t pte)
-{
-	swp_entry_t swp;
-
-	if (huge_pte_none(pte) || pte_present(pte))
-		return 0;
-	swp = pte_to_swp_entry(pte);
-	if (non_swap_entry(swp) && is_migration_entry(swp)) {
-		return 1;
-	} else
-		return 0;
-}
-
 static int is_hugetlb_entry_hwpoisoned(pte_t pte)
 {
 	swp_entry_t swp;
@@ -2451,7 +2383,7 @@ retry_avoidcopy:
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
 
-	copy_user_huge_page(new_page, old_page, address, vma);
+	copy_huge_page(new_page, old_page, address, vma);
 	__SetPageUptodate(new_page);
 
 	/*
@@ -2583,19 +2515,22 @@ retry:
 			hugepage_add_new_anon_rmap(page, vma, address);
 		}
 	} else {
-		/*
-		 * If memory error occurs between mmap() and fault, some process
-		 * don't have hwpoisoned swap entry for errored virtual address.
-		 * So we need to block hugepage fault by PG_hwpoison bit check.
-		 */
-		if (unlikely(PageHWPoison(page))) {
-			ret = VM_FAULT_HWPOISON;
-			goto backout_unlocked;
-		}
 		page_dup_rmap(page);
 	}
 
 	/*
+	 * Since memory error handler replaces pte into hwpoison swap entry
+	 * at the time of error handling, a process which reserved but not have
+	 * the mapping to the error hugepage does not have hwpoison swap entry.
+	 * So we need to block accesses from such a process by checking
+	 * PG_hwpoison bit here.
+	 */
+	if (unlikely(PageHWPoison(page))) {
+		ret = VM_FAULT_HWPOISON;
+		goto backout_unlocked;
+	}
+
+	/*
 	 * If we are going to COW a private mapping later, we examine the
 	 * pending reservations for this page now. This will ensure that
 	 * any allocations necessary to record that reservation occur outside
@@ -2652,10 +2587,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	ptep = huge_pte_offset(mm, address);
 	if (ptep) {
 		entry = huge_ptep_get(ptep);
-		if (unlikely(is_hugetlb_entry_migration(entry))) {
-			migration_entry_wait(mm, (pmd_t *)ptep, address);
-			return 0;
-		} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
+		if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
 			return VM_FAULT_HWPOISON;
 	}
 
@@ -2946,40 +2878,18 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
 	hugetlb_acct_memory(h, -(chg - freed));
 }
 
-/* Should be called in hugetlb_lock */
-static int is_hugepage_on_freelist(struct page *hpage)
-{
-	struct page *page;
-	struct page *tmp;
-	struct hstate *h = page_hstate(hpage);
-	int nid = page_to_nid(hpage);
-
-	list_for_each_entry_safe(page, tmp, &h->hugepage_freelists[nid], lru)
-		if (page == hpage)
-			return 1;
-	return 0;
-}
-
-#ifdef CONFIG_MEMORY_FAILURE
 /*
  * This function is called from memory failure code.
  * Assume the caller holds page lock of the head page.
  */
-int dequeue_hwpoisoned_huge_page(struct page *hpage)
+void __isolate_hwpoisoned_huge_page(struct page *hpage)
 {
 	struct hstate *h = page_hstate(hpage);
 	int nid = page_to_nid(hpage);
-	int ret = -EBUSY;
 
 	spin_lock(&hugetlb_lock);
-	if (is_hugepage_on_freelist(hpage)) {
-		list_del(&hpage->lru);
-		set_page_refcounted(hpage);
-		h->free_huge_pages--;
-		h->free_huge_pages_node[nid]--;
-		ret = 0;
-	}
+	list_del(&hpage->lru);
+	h->free_huge_pages--;
+	h->free_huge_pages_node[nid]--;
 	spin_unlock(&hugetlb_lock);
-	return ret;
 }
-#endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 483a59fbff25..9c26eeca1342 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -693,10 +693,11 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
  * Issues:
  * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
  *   To narrow down kill region to one page, we need to break up pmd.
+ * - To support soft-offlining for hugepage, we need to support hugepage
+ *   migration.
  */
 static int me_huge_page(struct page *p, unsigned long pfn)
 {
-	int res = 0;
 	struct page *hpage = compound_head(p);
 	/*
 	 * We can safely recover from error on free or reserved (i.e.
@@ -709,9 +710,8 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 	 * so there is no race between isolation and mapping/unmapping.
 	 */
 	if (!(page_mapping(hpage) || PageAnon(hpage))) {
-		res = dequeue_hwpoisoned_huge_page(hpage);
-		if (!res)
-			return RECOVERED;
+		__isolate_hwpoisoned_huge_page(hpage);
+		return RECOVERED;
 	}
 	return DELAYED;
 }
@@ -981,10 +981,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 	 * We need/can do nothing about count=0 pages.
 	 * 1) it's a free page, and therefore in safe hand:
 	 *    prep_new_page() will be the gate keeper.
-	 * 2) it's a free hugepage, which is also safe:
-	 *    an affected hugepage will be dequeued from hugepage freelist,
-	 *    so there's no concern about reusing it ever after.
-	 * 3) it's part of a non-compound high order page.
+	 * 2) it's part of a non-compound high order page.
 	 *    Implies some kernel user: cannot stop them from
 	 *    R/W the page; let's pray that the page has been
 	 *    used and will be freed some time later.
@@ -996,24 +993,6 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
 		if (is_free_buddy_page(p)) {
 			action_result(pfn, "free buddy", DELAYED);
 			return 0;
-		} else if (PageHuge(hpage)) {
-			/*
-			 * Check "just unpoisoned", "filter hit", and
-			 * "race with other subpage."
-			 */
-			lock_page_nosync(hpage);
-			if (!PageHWPoison(hpage)
-			    || (hwpoison_filter(p) && TestClearPageHWPoison(p))
-			    || (p != hpage && TestSetPageHWPoison(hpage))) {
-				atomic_long_sub(nr_pages, &mce_bad_pages);
-				return 0;
-			}
-			set_page_hwpoison_huge_page(hpage);
-			res = dequeue_hwpoisoned_huge_page(hpage);
-			action_result(pfn, "free huge",
-				      res ? IGNORED : DELAYED);
-			unlock_page(hpage);
-			return res;
 		} else {
 			action_result(pfn, "high order kernel", IGNORED);
 			return -EBUSY;
@@ -1175,16 +1154,6 @@ int unpoison_memory(unsigned long pfn)
 	nr_pages = 1 << compound_order(page);
 
 	if (!get_page_unless_zero(page)) {
-		/*
-		 * Since HWPoisoned hugepage should have non-zero refcount,
-		 * race between memory failure and unpoison seems to happen.
-		 * In such case unpoison fails and memory failure runs
-		 * to the end.
-		 */
-		if (PageHuge(page)) {
-			pr_debug("MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
-			return 0;
-		}
 		if (TestClearPageHWPoison(p))
 			atomic_long_sub(nr_pages, &mce_bad_pages);
 		pr_debug("MCE: Software-unpoisoned free page %#lx\n", pfn);
@@ -1202,9 +1171,9 @@ int unpoison_memory(unsigned long pfn)
 		pr_debug("MCE: Software-unpoisoned page %#lx\n", pfn);
 		atomic_long_sub(nr_pages, &mce_bad_pages);
 		freeit = 1;
-		if (PageHuge(page))
-			clear_page_hwpoison_huge_page(page);
 	}
+	if (PageHuge(p))
+		clear_page_hwpoison_huge_page(page);
 	unlock_page(page);
 
 	put_page(page);
@@ -1218,11 +1187,7 @@ EXPORT_SYMBOL(unpoison_memory);
 static struct page *new_page(struct page *p, unsigned long private, int **x)
 {
 	int nid = page_to_nid(p);
-	if (PageHuge(p))
-		return alloc_huge_page_node(page_hstate(compound_head(p)),
-						   nid);
-	else
-		return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
+	return alloc_pages_exact_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 }
 
 /*
@@ -1250,15 +1215,8 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 	 * was free.
 	 */
 	set_migratetype_isolate(p);
-	/*
-	 * When the target page is a free hugepage, just remove it
-	 * from free hugepage list.
-	 */
 	if (!get_page_unless_zero(compound_head(p))) {
-		if (PageHuge(p)) {
-			pr_debug("get_any_page: %#lx free huge page\n", pfn);
-			ret = dequeue_hwpoisoned_huge_page(compound_head(p));
-		} else if (is_free_buddy_page(p)) {
+		if (is_free_buddy_page(p)) {
 			pr_debug("get_any_page: %#lx free buddy page\n", pfn);
 			/* Set hwpoison bit while page is still isolated */
 			SetPageHWPoison(p);
@@ -1277,45 +1235,6 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
 	return ret;
 }
 
-static int soft_offline_huge_page(struct page *page, int flags)
-{
-	int ret;
-	unsigned long pfn = page_to_pfn(page);
-	struct page *hpage = compound_head(page);
-	LIST_HEAD(pagelist);
-
-	ret = get_any_page(page, pfn, flags);
-	if (ret < 0)
-		return ret;
-	if (ret == 0)
-		goto done;
-
-	if (PageHWPoison(hpage)) {
-		put_page(hpage);
-		pr_debug("soft offline: %#lx hugepage already poisoned\n", pfn);
-		return -EBUSY;
-	}
-
-	/* Keep page count to indicate a given hugepage is isolated. */
-
-	list_add(&hpage->lru, &pagelist);
-	ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0);
-	if (ret) {
-		pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
-			 pfn, ret, page->flags);
-		if (ret > 0)
-			ret = -EIO;
-		return ret;
-	}
-done:
-	if (!PageHWPoison(hpage))
-		atomic_long_add(1 << compound_order(hpage), &mce_bad_pages);
-	set_page_hwpoison_huge_page(hpage);
-	dequeue_hwpoisoned_huge_page(hpage);
-	/* keep elevated page count for bad page */
-	return ret;
-}
-
 /**
  * soft_offline_page - Soft offline a page.
  * @page: page to offline
@@ -1343,9 +1262,6 @@ int soft_offline_page(struct page *page, int flags)
 	int ret;
 	unsigned long pfn = page_to_pfn(page);
 
-	if (PageHuge(page))
-		return soft_offline_huge_page(page, flags);
-
 	ret = get_any_page(page, pfn, flags);
 	if (ret < 0)
 		return ret;
diff --git a/mm/migrate.c b/mm/migrate.c
index 55dbc45880c6..38e7cad782f4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,7 +32,6 @@
 #include <linux/security.h>
 #include <linux/memcontrol.h>
 #include <linux/syscalls.h>
-#include <linux/hugetlb.h>
 #include <linux/gfp.h>
 
 #include "internal.h"
@@ -96,34 +95,26 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	pte_t *ptep, pte;
  	spinlock_t *ptl;
 
-	if (unlikely(PageHuge(new))) {
-		ptep = huge_pte_offset(mm, addr);
-		if (!ptep)
-			goto out;
-		ptl = &mm->page_table_lock;
-	} else {
-		pgd = pgd_offset(mm, addr);
-		if (!pgd_present(*pgd))
-			goto out;
+ 	pgd = pgd_offset(mm, addr);
+	if (!pgd_present(*pgd))
+		goto out;
 
-		pud = pud_offset(pgd, addr);
-		if (!pud_present(*pud))
-			goto out;
+	pud = pud_offset(pgd, addr);
+	if (!pud_present(*pud))
+		goto out;
 
-		pmd = pmd_offset(pud, addr);
-		if (!pmd_present(*pmd))
-			goto out;
+	pmd = pmd_offset(pud, addr);
+	if (!pmd_present(*pmd))
+		goto out;
 
-		ptep = pte_offset_map(pmd, addr);
+	ptep = pte_offset_map(pmd, addr);
 
-		if (!is_swap_pte(*ptep)) {
-			pte_unmap(ptep);
-			goto out;
-		}
-
-		ptl = pte_lockptr(mm, pmd);
-	}
+	if (!is_swap_pte(*ptep)) {
+		pte_unmap(ptep);
+		goto out;
+ 	}
 
+ 	ptl = pte_lockptr(mm, pmd);
  	spin_lock(ptl);
 	pte = *ptep;
 	if (!is_swap_pte(pte))
@@ -139,17 +130,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
 	pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
 	if (is_write_migration_entry(entry))
 		pte = pte_mkwrite(pte);
-	if (PageHuge(new))
-		pte = pte_mkhuge(pte);
 	flush_cache_page(vma, addr, pte_pfn(pte));
 	set_pte_at(mm, addr, ptep, pte);
 
-	if (PageHuge(new)) {
-		if (PageAnon(new))
-			hugepage_add_anon_rmap(new, vma, addr);
-		else
-			page_dup_rmap(new);
-	} else if (PageAnon(new))
+	if (PageAnon(new))
 		page_add_anon_rmap(new, vma, addr);
 	else
 		page_add_file_rmap(new);
@@ -292,59 +276,11 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 }
 
 /*
- * The expected number of remaining references is the same as that
- * of migrate_page_move_mapping().
- */
-int migrate_huge_page_move_mapping(struct address_space *mapping,
-				   struct page *newpage, struct page *page)
-{
-	int expected_count;
-	void **pslot;
-
-	if (!mapping) {
-		if (page_count(page) != 1)
-			return -EAGAIN;
-		return 0;
-	}
-
-	spin_lock_irq(&mapping->tree_lock);
-
-	pslot = radix_tree_lookup_slot(&mapping->page_tree,
-					page_index(page));
-
-	expected_count = 2 + page_has_private(page);
-	if (page_count(page) != expected_count ||
-	    (struct page *)radix_tree_deref_slot(pslot) != page) {
-		spin_unlock_irq(&mapping->tree_lock);
-		return -EAGAIN;
-	}
-
-	if (!page_freeze_refs(page, expected_count)) {
-		spin_unlock_irq(&mapping->tree_lock);
-		return -EAGAIN;
-	}
-
-	get_page(newpage);
-
-	radix_tree_replace_slot(pslot, newpage);
-
-	page_unfreeze_refs(page, expected_count);
-
-	__put_page(page);
-
-	spin_unlock_irq(&mapping->tree_lock);
-	return 0;
-}
-
-/*
  * Copy the page to its new location
  */
-void migrate_page_copy(struct page *newpage, struct page *page)
+static void migrate_page_copy(struct page *newpage, struct page *page)
 {
-	if (PageHuge(page))
-		copy_huge_page(newpage, page);
-	else
-		copy_highpage(newpage, page);
+	copy_highpage(newpage, page);
 
 	if (PageError(page))
 		SetPageError(newpage);
@@ -788,92 +724,6 @@ move_newpage:
 }
 
 /*
- * Counterpart of unmap_and_move_page() for hugepage migration.
- *
- * This function doesn't wait the completion of hugepage I/O
- * because there is no race between I/O and migration for hugepage.
- * Note that currently hugepage I/O occurs only in direct I/O
- * where no lock is held and PG_writeback is irrelevant,
- * and writeback status of all subpages are counted in the reference
- * count of the head page (i.e. if all subpages of a 2MB hugepage are
- * under direct I/O, the reference of the head page is 512 and a bit more.)
- * This means that when we try to migrate hugepage whose subpages are
- * doing direct I/O, some references remain after try_to_unmap() and
- * hugepage migration fails without data corruption.
- *
- * There is also no race when direct I/O is issued on the page under migration,
- * because then pte is replaced with migration swap entry and direct I/O code
- * will wait in the page fault for migration to complete.
- */
-static int unmap_and_move_huge_page(new_page_t get_new_page,
-				unsigned long private, struct page *hpage,
-				int force, int offlining)
-{
-	int rc = 0;
-	int *result = NULL;
-	struct page *new_hpage = get_new_page(hpage, private, &result);
-	int rcu_locked = 0;
-	struct anon_vma *anon_vma = NULL;
-
-	if (!new_hpage)
-		return -ENOMEM;
-
-	rc = -EAGAIN;
-
-	if (!trylock_page(hpage)) {
-		if (!force)
-			goto out;
-		lock_page(hpage);
-	}
-
-	if (PageAnon(hpage)) {
-		rcu_read_lock();
-		rcu_locked = 1;
-
-		if (page_mapped(hpage)) {
-			anon_vma = page_anon_vma(hpage);
-			atomic_inc(&anon_vma->external_refcount);
-		}
-	}
-
-	try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
-
-	if (!page_mapped(hpage))
-		rc = move_to_new_page(new_hpage, hpage, 1);
-
-	if (rc)
-		remove_migration_ptes(hpage, hpage);
-
-	if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
-					    &anon_vma->lock)) {
-		int empty = list_empty(&anon_vma->head);
-		spin_unlock(&anon_vma->lock);
-		if (empty)
-			anon_vma_free(anon_vma);
-	}
-
-	if (rcu_locked)
-		rcu_read_unlock();
-out:
-	unlock_page(hpage);
-
-	if (rc != -EAGAIN) {
-		list_del(&hpage->lru);
-		put_page(hpage);
-	}
-
-	put_page(new_hpage);
-
-	if (result) {
-		if (rc)
-			*result = rc;
-		else
-			*result = page_to_nid(new_hpage);
-	}
-	return rc;
-}
-
-/*
  * migrate_pages
  *
  * The function takes one list of pages to migrate and a function
@@ -938,52 +788,6 @@ out:
 	return nr_failed + retry;
 }
 
-int migrate_huge_pages(struct list_head *from,
-		new_page_t get_new_page, unsigned long private, int offlining)
-{
-	int retry = 1;
-	int nr_failed = 0;
-	int pass = 0;
-	struct page *page;
-	struct page *page2;
-	int rc;
-
-	for (pass = 0; pass < 10 && retry; pass++) {
-		retry = 0;
-
-		list_for_each_entry_safe(page, page2, from, lru) {
-			cond_resched();
-
-			rc = unmap_and_move_huge_page(get_new_page,
-					private, page, pass > 2, offlining);
-
-			switch(rc) {
-			case -ENOMEM:
-				goto out;
-			case -EAGAIN:
-				retry++;
-				break;
-			case 0:
-				break;
-			default:
-				/* Permanent failure */
-				nr_failed++;
-				break;
-			}
-		}
-	}
-	rc = 0;
-out:
-
-	list_for_each_entry_safe(page, page2, from, lru)
-		put_page(page);
-
-	if (rc)
-		return rc;
-
-	return nr_failed + retry;
-}
-
 #ifdef CONFIG_NUMA
 /*
  * Move a list of individual pages
diff --git a/mm/rmap.c b/mm/rmap.c
index 1ad201707d05..87343122f390 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -774,10 +774,10 @@ void page_move_anon_rmap(struct page *page,
 }
 
 /**
- * __page_set_anon_rmap - set up new anonymous rmap
- * @page:	Page to add to rmap	
- * @vma:	VM area to add page to.
- * @address:	User virtual address of the mapping	
+ * __page_set_anon_rmap - setup new anonymous rmap
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
  * @exclusive:	the page is exclusively owned by the current process
  */
 static void __page_set_anon_rmap(struct page *page,
@@ -787,16 +787,25 @@ static void __page_set_anon_rmap(struct page *page,
 
 	BUG_ON(!anon_vma);
 
-	if (PageAnon(page))
-		return;
-
 	/*
 	 * If the page isn't exclusively mapped into this vma,
 	 * we must use the _oldest_ possible anon_vma for the
 	 * page mapping!
 	 */
-	if (!exclusive)
+	if (!exclusive) {
+		if (PageAnon(page))
+			return;
 		anon_vma = anon_vma->root;
+	} else {
+		/*
+		 * In this case, swapped-out-but-not-discarded swap-cache
+		 * is remapped. So, no need to update page->mapping here.
+		 * We convice anon_vma poitned by page->mapping is not obsolete
+		 * because vma->anon_vma is necessary to be a family of it.
+		 */
+		if (PageAnon(page))
+			return;
+	}
 
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
 	page->mapping = (struct address_space *) anon_vma;
author	Stephen Rothwell <sfr@canb.auug.org.au>	2010-09-27 15:51:55 +1000
committer	Stephen Rothwell <sfr@canb.auug.org.au>	2010-09-27 15:51:55 +1000
commit	e0742ccf517f272c4617f4f871db52008e7ae359 (patch)
tree	618fd4cc833248c4a8cf6e4e90d7deb256e111d5
parent	a4dbdfeb44002da48f505611e48b5a8799100931 (diff)