diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a558d67ee86f..fbfd3fad48f2 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -171,6 +171,15 @@ enum pageflags { /* Compound pages. Stored in first tail page's flags */ PG_double_map = PG_workingset, +#ifdef CONFIG_MEMORY_FAILURE + /* + * Compound pages. Stored in first tail page's flags. + * Indicates that at least one subpage is hwpoisoned in the + * THP. + */ + PG_has_hwpoisoned = PG_mappedtodisk, +#endif + /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap) TESTSCFLAG_FALSE(DoubleMap) #endif +#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE) +/* + * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the + * compound page. + * + * This flag is set by hwpoison handler. Cleared by THP split or free page. + */ +PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) + TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND) +#else +PAGEFLAG_FALSE(HasHWPoisoned) + TESTSCFLAG_FALSE(HasHWPoisoned) +#endif + /* * Check if a page is currently marked HWPoisoned. Note that this check is * best effort only and inherently racy: there is no way to synchronize with diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 92192cb086c7..c5142d237e48 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list, /* lock lru list/PageCompound, ref frozen by page_ref_freeze */ lruvec = lock_page_lruvec(head); + ClearPageHasHWPoisoned(head); + for (i = nr - 1; i >= 1; i--) { __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 73f68699e7ab..bdbbb32211a5 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1694,6 +1694,20 @@ try_again: } if (PageTransHuge(hpage)) { + /* + * The flag must be set after the refcount is bumped + * otherwise it may race with THP split. + * And the flag can't be set in get_hwpoison_page() since + * it is called by soft offline too and it is just called + * for !MF_COUNT_INCREASE. So here seems to be the best + * place. + * + * Don't need care about the above error handling paths for + * get_hwpoison_page() since they handle either free page + * or unhandlable page. The refcount is bumped iff the + * page is a valid handlable page. + */ + SetPageHasHWPoisoned(hpage); if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); res = -EBUSY; diff --git a/mm/memory.c b/mm/memory.c index adf9b9ef8277..c52be6d6b605 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (compound_order(page) != HPAGE_PMD_ORDER) return ret; + /* + * Just backoff if any subpage of a THP is corrupted otherwise + * the corrupted page may mapped by PMD silently to escape the + * check. This kind of THP just can be PTE mapped. Access to + * the corrupted subpage should trigger SIGBUS as expected. + */ + if (unlikely(PageHasHWPoisoned(page))) + return ret; + /* * Archs like ppc64 need additional space to store information * related to pte entry. Use the preallocated table for that. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3ec39552d00f..23d3339ac4e8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page, VM_BUG_ON_PAGE(compound && compound_order(page) != order, page); - if (compound) + if (compound) { ClearPageDoubleMap(page); + ClearPageHasHWPoisoned(page); + } for (i = 1; i < (1 << order); i++) { if (compound) bad += free_tail_pages_check(page, page + i);