d744194956
Sebastian reported a crash caused by a jump label mismatch after resume. This happens because we do not save the kernel text section during suspend and therefore also do not restore it during resume, but use the kernel image that restores the old system. This means that after a suspend/resume cycle we lost all modifications done to the kernel text section. The reason for this is the pfn_is_nosave() function, which incorrectly returns that read-only pages don't need to be saved. This is incorrect since we mark the kernel text section read-only. We still need to make sure to not save and restore pages contained within NSS and DCSS segment. To fix this add an extra case for the kernel text section and only save those pages if they are not contained within an NSS segment. Fixes the following crash (and the above bugs as well): Jump label code mismatch at netif_receive_skb_internal+0x28/0xd0 Found: c0 04 00 00 00 00 Expected: c0 f4 00 00 00 11 New: c0 04 00 00 00 00 Kernel panic - not syncing: Corrupted kernel text CPU: 0 PID: 9 Comm: migration/0 Not tainted 3.19.0-01975-gb1b096e70f23 #4 Call Trace: [<0000000000113972>] show_stack+0x72/0xf0 [<000000000081f15e>] dump_stack+0x6e/0x90 [<000000000081c4e8>] panic+0x108/0x2b0 [<000000000081be64>] jump_label_bug.isra.2+0x104/0x108 [<0000000000112176>] __jump_label_transform+0x9e/0xd0 [<00000000001121e6>] __sm_arch_jump_label_transform+0x3e/0x50 [<00000000001d1136>] multi_cpu_stop+0x12e/0x170 [<00000000001d1472>] cpu_stopper_thread+0xb2/0x168 [<000000000015d2ac>] smpboot_thread_fn+0x134/0x1b0 [<0000000000158baa>] kthread+0x10a/0x110 [<0000000000824a86>] kernel_thread_starter+0x6/0xc Reported-and-tested-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
226 lines
6.1 KiB
C
226 lines
6.1 KiB
C
/*
|
|
* Suspend support specific for s390.
|
|
*
|
|
* Copyright IBM Corp. 2009
|
|
*
|
|
* Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
|
|
*/
|
|
|
|
#include <linux/pfn.h>
|
|
#include <linux/suspend.h>
|
|
#include <linux/mm.h>
|
|
#include <asm/ctl_reg.h>
|
|
#include <asm/ipl.h>
|
|
#include <asm/cio.h>
|
|
#include <asm/pci.h>
|
|
#include <asm/sections.h>
|
|
#include "entry.h"
|
|
|
|
/*
|
|
* The restore of the saved pages in an hibernation image will set
|
|
* the change and referenced bits in the storage key for each page.
|
|
* Overindication of the referenced bits after an hibernation cycle
|
|
* does not cause any harm but the overindication of the change bits
|
|
* would cause trouble.
|
|
* Use the ARCH_SAVE_PAGE_KEYS hooks to save the storage key of each
|
|
* page to the most significant byte of the associated page frame
|
|
* number in the hibernation image.
|
|
*/
|
|
|
|
/*
|
|
* Key storage is allocated as a linked list of pages.
|
|
* The size of the keys array is (PAGE_SIZE - sizeof(long))
|
|
*/
|
|
struct page_key_data {
|
|
struct page_key_data *next;
|
|
unsigned char data[];
|
|
};
|
|
|
|
#define PAGE_KEY_DATA_SIZE (PAGE_SIZE - sizeof(struct page_key_data *))
|
|
|
|
static struct page_key_data *page_key_data;
|
|
static struct page_key_data *page_key_rp, *page_key_wp;
|
|
static unsigned long page_key_rx, page_key_wx;
|
|
unsigned long suspend_zero_pages;
|
|
|
|
/*
|
|
* For each page in the hibernation image one additional byte is
|
|
* stored in the most significant byte of the page frame number.
|
|
* On suspend no additional memory is required but on resume the
|
|
* keys need to be memorized until the page data has been restored.
|
|
* Only then can the storage keys be set to their old state.
|
|
*/
|
|
unsigned long page_key_additional_pages(unsigned long pages)
|
|
{
|
|
return DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
|
|
}
|
|
|
|
/*
|
|
* Free page_key_data list of arrays.
|
|
*/
|
|
void page_key_free(void)
|
|
{
|
|
struct page_key_data *pkd;
|
|
|
|
while (page_key_data) {
|
|
pkd = page_key_data;
|
|
page_key_data = pkd->next;
|
|
free_page((unsigned long) pkd);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Allocate page_key_data list of arrays with enough room to store
|
|
* one byte for each page in the hibernation image.
|
|
*/
|
|
int page_key_alloc(unsigned long pages)
|
|
{
|
|
struct page_key_data *pk;
|
|
unsigned long size;
|
|
|
|
size = DIV_ROUND_UP(pages, PAGE_KEY_DATA_SIZE);
|
|
while (size--) {
|
|
pk = (struct page_key_data *) get_zeroed_page(GFP_KERNEL);
|
|
if (!pk) {
|
|
page_key_free();
|
|
return -ENOMEM;
|
|
}
|
|
pk->next = page_key_data;
|
|
page_key_data = pk;
|
|
}
|
|
page_key_rp = page_key_wp = page_key_data;
|
|
page_key_rx = page_key_wx = 0;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Save the storage key into the upper 8 bits of the page frame number.
|
|
*/
|
|
void page_key_read(unsigned long *pfn)
|
|
{
|
|
unsigned long addr;
|
|
|
|
addr = (unsigned long) page_address(pfn_to_page(*pfn));
|
|
*(unsigned char *) pfn = (unsigned char) page_get_storage_key(addr);
|
|
}
|
|
|
|
/*
|
|
* Extract the storage key from the upper 8 bits of the page frame number
|
|
* and store it in the page_key_data list of arrays.
|
|
*/
|
|
void page_key_memorize(unsigned long *pfn)
|
|
{
|
|
page_key_wp->data[page_key_wx] = *(unsigned char *) pfn;
|
|
*(unsigned char *) pfn = 0;
|
|
if (++page_key_wx < PAGE_KEY_DATA_SIZE)
|
|
return;
|
|
page_key_wp = page_key_wp->next;
|
|
page_key_wx = 0;
|
|
}
|
|
|
|
/*
|
|
* Get the next key from the page_key_data list of arrays and set the
|
|
* storage key of the page referred by @address. If @address refers to
|
|
* a "safe" page the swsusp_arch_resume code will transfer the storage
|
|
* key from the buffer page to the original page.
|
|
*/
|
|
void page_key_write(void *address)
|
|
{
|
|
page_set_storage_key((unsigned long) address,
|
|
page_key_rp->data[page_key_rx], 0);
|
|
if (++page_key_rx >= PAGE_KEY_DATA_SIZE)
|
|
return;
|
|
page_key_rp = page_key_rp->next;
|
|
page_key_rx = 0;
|
|
}
|
|
|
|
int pfn_is_nosave(unsigned long pfn)
|
|
{
|
|
unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
|
|
unsigned long nosave_end_pfn = PFN_DOWN(__pa(&__nosave_end));
|
|
unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1;
|
|
unsigned long stext_pfn = PFN_DOWN(__pa(&_stext));
|
|
|
|
/* Always save lowcore pages (LC protection might be enabled). */
|
|
if (pfn <= LC_PAGES)
|
|
return 0;
|
|
if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn)
|
|
return 1;
|
|
/* Skip memory holes and read-only pages (NSS, DCSS, ...). */
|
|
if (pfn >= stext_pfn && pfn <= eshared_pfn)
|
|
return ipl_info.type == IPL_TYPE_NSS ? 1 : 0;
|
|
if (tprot(PFN_PHYS(pfn)))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* PM notifier callback for suspend
|
|
*/
|
|
static int suspend_pm_cb(struct notifier_block *nb, unsigned long action,
|
|
void *ptr)
|
|
{
|
|
switch (action) {
|
|
case PM_SUSPEND_PREPARE:
|
|
case PM_HIBERNATION_PREPARE:
|
|
suspend_zero_pages = __get_free_pages(GFP_KERNEL, LC_ORDER);
|
|
if (!suspend_zero_pages)
|
|
return NOTIFY_BAD;
|
|
break;
|
|
case PM_POST_SUSPEND:
|
|
case PM_POST_HIBERNATION:
|
|
free_pages(suspend_zero_pages, LC_ORDER);
|
|
break;
|
|
default:
|
|
return NOTIFY_DONE;
|
|
}
|
|
return NOTIFY_OK;
|
|
}
|
|
|
|
static int __init suspend_pm_init(void)
|
|
{
|
|
pm_notifier(suspend_pm_cb, 0);
|
|
return 0;
|
|
}
|
|
arch_initcall(suspend_pm_init);
|
|
|
|
void save_processor_state(void)
|
|
{
|
|
/* swsusp_arch_suspend() actually saves all cpu register contents.
|
|
* Machine checks must be disabled since swsusp_arch_suspend() stores
|
|
* register contents to their lowcore save areas. That's the same
|
|
* place where register contents on machine checks would be saved.
|
|
* To avoid register corruption disable machine checks.
|
|
* We must also disable machine checks in the new psw mask for
|
|
* program checks, since swsusp_arch_suspend() may generate program
|
|
* checks. Disabling machine checks for all other new psw masks is
|
|
* just paranoia.
|
|
*/
|
|
local_mcck_disable();
|
|
/* Disable lowcore protection */
|
|
__ctl_clear_bit(0,28);
|
|
S390_lowcore.external_new_psw.mask &= ~PSW_MASK_MCHECK;
|
|
S390_lowcore.svc_new_psw.mask &= ~PSW_MASK_MCHECK;
|
|
S390_lowcore.io_new_psw.mask &= ~PSW_MASK_MCHECK;
|
|
S390_lowcore.program_new_psw.mask &= ~PSW_MASK_MCHECK;
|
|
}
|
|
|
|
void restore_processor_state(void)
|
|
{
|
|
S390_lowcore.external_new_psw.mask |= PSW_MASK_MCHECK;
|
|
S390_lowcore.svc_new_psw.mask |= PSW_MASK_MCHECK;
|
|
S390_lowcore.io_new_psw.mask |= PSW_MASK_MCHECK;
|
|
S390_lowcore.program_new_psw.mask |= PSW_MASK_MCHECK;
|
|
/* Enable lowcore protection */
|
|
__ctl_set_bit(0,28);
|
|
local_mcck_enable();
|
|
}
|
|
|
|
/* Called at the end of swsusp_arch_resume */
|
|
void s390_early_resume(void)
|
|
{
|
|
lgr_info_log();
|
|
channel_subsystem_reinit();
|
|
zpci_rescan();
|
|
}
|