From 8cfd9e923be54ef66ce174a93f4592b444b96407 Mon Sep 17 00:00:00 2001 From: Russell King <rmk@dyn-67.arm.linux.org.uk> Date: Sun, 22 Feb 2009 12:36:55 +0000 Subject: [PATCH 01/25] [ARM] RiscPC: Fix etherh oops The 8390 driver was structured by Al Viro to allow the flexibility required by platforms. lib8390.c contains the core code which drivers explicitly include: - 8390.c includes lib8390.c to provide the standard ISA based driver. - etherh.c includes it with the accessors defined for RiscPC platforms, where it is addressed via the MMIO accessors with a device dependent register spacing. Other platform drivers do something similar. However, b9a9b4b caused the kernel to contain not only the etherh private build of lib8390 (included in etherh.c) but also lib8390.c itself, and referred the new net_device_ops methods to the ISA version. The result of this is is not pretty: Unable to handle kernel paging request at virtual address 12032030 pgd = c8330000 [12032030] *pgd=00000000 Internal error: Oops: 18331805 [#1] Modules linked in: ipv6 CPU: 0 Not tainted (2.6.29-rc3 #167) PC is at do_set_multicast_list+0xd0/0x190 LR is at bitrev32+0x28/0x34 pc : [<c017aab4>] lr : [<c0139120>] psr: a0000093 sp : c8321d9c ip : c8321d84 fp : c8321dbc r10: c80c6800 r9 : 00000000 r8 : c80c6b60 r7 : c80c6b80 r6 : cc80c800 r5 : c80c6800 r4 : 00000000 r3 : cc80c80c r2 : 00000004 r1 : 00000007 r0 : e0000000 Flags: NzCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user ... Fix up b9a9b4b by making etherh's net_device_ops refer to the internal lib8390 functions, and remove the build of the ISA 8390.c driver. Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- drivers/net/arm/Makefile | 2 +- drivers/net/arm/etherh.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/arm/Makefile b/drivers/net/arm/Makefile index c69c0cdba4a2..811a3ccd14c1 100644 --- a/drivers/net/arm/Makefile +++ b/drivers/net/arm/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_ARM_AM79C961A) += am79c961a.o -obj-$(CONFIG_ARM_ETHERH) += etherh.o ../8390.o +obj-$(CONFIG_ARM_ETHERH) += etherh.o obj-$(CONFIG_ARM_ETHER3) += ether3.o obj-$(CONFIG_ARM_ETHER1) += ether1.o obj-$(CONFIG_ARM_AT91_ETHER) += at91_ether.o diff --git a/drivers/net/arm/etherh.c b/drivers/net/arm/etherh.c index 54b52e5b1821..f52f668c49bf 100644 --- a/drivers/net/arm/etherh.c +++ b/drivers/net/arm/etherh.c @@ -641,15 +641,15 @@ static const struct net_device_ops etherh_netdev_ops = { .ndo_open = etherh_open, .ndo_stop = etherh_close, .ndo_set_config = etherh_set_config, - .ndo_start_xmit = ei_start_xmit, - .ndo_tx_timeout = ei_tx_timeout, - .ndo_get_stats = ei_get_stats, - .ndo_set_multicast_list = ei_set_multicast_list, + .ndo_start_xmit = __ei_start_xmit, + .ndo_tx_timeout = __ei_tx_timeout, + .ndo_get_stats = __ei_get_stats, + .ndo_set_multicast_list = __ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = ei_poll, + .ndo_poll_controller = __ei_poll, #endif }; From d82ad6d6833ee8248e6c34e1b9fc3c4e61e3f035 Mon Sep 17 00:00:00 2001 From: Andrei Birjukov <andrei.birjukov@artecdesign.ee> Date: Sun, 22 Feb 2009 22:37:21 +0000 Subject: [PATCH 02/25] [ARM] at91: fix for Atmel AT91 powersaving We've discovered that our AT91SAM9260 board consumed too much power when returning from a slowclock low-power mode. RAM self-refresh is enabled in a bootloader in our case, this is how we saw a difference. Estimated ca. 30mA more on 4V battery than the same state before powersaving. After a small research we found that there seems to be a bogus sdram_selfrefresh_disable() call at the end of at91_pm_enter() call, which overwrites the LPR register with uninitialized value. Please find the suggested patch attached. This patch fixes correct restoring of LPR register of the Atmel AT91 SDRAM controller when returning from a power saving mode. Signed-off-by: Andrei Birjukov <andrei.birjukov@artecdesign.ee> Acked-by: Andrew Victor <linux@maxim.org.za> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/mach-at91/pm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 9bb4f043aa22..7ac812dc055a 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -332,7 +332,6 @@ static int at91_pm_enter(suspend_state_t state) at91_sys_read(AT91_AIC_IPR) & at91_sys_read(AT91_AIC_IMR)); error: - sdram_selfrefresh_disable(); target_state = PM_SUSPEND_ON; at91_irq_resume(); at91_gpio_resume(); From c8532db7f2661b63f658b9a08cf4053a3e6abb78 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@sirena.org.uk> Date: Tue, 24 Feb 2009 15:55:48 +0100 Subject: [PATCH 03/25] [ARM] 5411/1: S3C64XX: Fix EINT unmask Currently the unmask function for EINT interrupts was setting the mask bit rather than clearing it. This was also previously reported and fixed by Kyungmin Park <kyungmin.park@samsung.com> and others. Acked-By: Ben Dooks <ben-linux@fluff.org> Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/plat-s3c64xx/irq-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-s3c64xx/irq-eint.c b/arch/arm/plat-s3c64xx/irq-eint.c index 1f7cc0067f5c..ebb305ce7689 100644 --- a/arch/arm/plat-s3c64xx/irq-eint.c +++ b/arch/arm/plat-s3c64xx/irq-eint.c @@ -55,7 +55,7 @@ static void s3c_irq_eint_unmask(unsigned int irq) u32 mask; mask = __raw_readl(S3C64XX_EINT0MASK); - mask |= eint_irq_to_bit(irq); + mask &= ~eint_irq_to_bit(irq); __raw_writel(mask, S3C64XX_EINT0MASK); } From a682604838763981613e42015cd0e39f2989d6bb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Date: Wed, 25 Feb 2009 18:03:42 -0800 Subject: [PATCH 04/25] rcu: Teach RCU that idle task is not quiscent state at boot This patch fixes a bug located by Vegard Nossum with the aid of kmemcheck, updated based on review comments from Nick Piggin, Ingo Molnar, and Andrew Morton. And cleans up the variable-name and function-name language. ;-) The boot CPU runs in the context of its idle thread during boot-up. During this time, idle_cpu(0) will always return nonzero, which will fool Classic and Hierarchical RCU into deciding that a large chunk of the boot-up sequence is a big long quiescent state. This in turn causes RCU to prematurely end grace periods during this time. This patch changes the rcutree.c and rcuclassic.c rcu_check_callbacks() function to ignore the idle task as a quiescent state until the system has started up the scheduler in rest_init(), introducing a new non-API function rcu_idle_now_means_idle() to inform RCU of this transition. RCU maintains an internal rcu_idle_cpu_truthful variable to track this state, which is then used by rcu_check_callback() to determine if it should believe idle_cpu(). Because this patch has the effect of disallowing RCU grace periods during long stretches of the boot-up sequence, this patch also introduces Josh Triplett's UP-only optimization that makes synchronize_rcu() be a no-op if num_online_cpus() returns 1. This allows boot-time code that calls synchronize_rcu() to proceed normally. Note, however, that RCU callbacks registered by call_rcu() will likely queue up until later in the boot sequence. Although rcuclassic and rcutree can also use this same optimization after boot completes, rcupreempt must restrict its use of this optimization to the portion of the boot sequence before the scheduler starts up, given that an rcupreempt RCU read-side critical section may be preeempted. In addition, this patch takes Nick Piggin's suggestion to make the system_state global variable be __read_mostly. Changes since v4: o Changes the name of the introduced function and variable to be less emotional. ;-) Changes since v3: o WARN_ON(nr_context_switches() > 0) to verify that RCU switches out of boot-time mode before the first context switch, as suggested by Nick Piggin. Changes since v2: o Created rcu_blocking_is_gp() internal-to-RCU API that determines whether a call to synchronize_rcu() is itself a grace period. o The definition of rcu_blocking_is_gp() for rcuclassic and rcutree checks to see if but a single CPU is online. o The definition of rcu_blocking_is_gp() for rcupreempt checks to see both if but a single CPU is online and if the system is still in early boot. This allows rcupreempt to again work correctly if running on a single CPU after booting is complete. o Added check to rcupreempt's synchronize_sched() for there being but one online CPU. Tested all three variants both SMP and !SMP, booted fine, passed a short rcutorture test on both x86 and Power. Located-by: Vegard Nossum <vegard.nossum@gmail.com> Tested-by: Vegard Nossum <vegard.nossum@gmail.com> Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- include/linux/rcuclassic.h | 6 ++++++ include/linux/rcupdate.h | 4 ++++ include/linux/rcupreempt.h | 15 +++++++++++++++ include/linux/rcutree.h | 6 ++++++ init/main.c | 3 ++- kernel/rcuclassic.c | 4 ++-- kernel/rcupdate.c | 12 ++++++++++++ kernel/rcupreempt.c | 3 +++ kernel/rcutree.c | 4 ++-- 9 files changed, 52 insertions(+), 5 deletions(-) diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index f3f697df1d71..80044a4f3ab9 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -181,4 +181,10 @@ extern long rcu_batches_completed_bh(void); #define rcu_enter_nohz() do { } while (0) #define rcu_exit_nohz() do { } while (0) +/* A context switch is a grace period for rcuclassic. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 921340a7b71c..528343e6da51 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,6 +52,9 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; +/* Internal to kernel, but needed by rcupreempt.h. */ +extern int rcu_scheduler_active; + #if defined(CONFIG_CLASSIC_RCU) #include <linux/rcuclassic.h> #elif defined(CONFIG_TREE_RCU) @@ -265,6 +268,7 @@ extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); +extern void rcu_scheduler_starting(void); extern int rcu_needs_cpu(int cpu); #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 3e05c09b54a2..74304b4538d8 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -142,4 +142,19 @@ static inline void rcu_exit_nohz(void) #define rcu_exit_nohz() do { } while (0) #endif /* CONFIG_NO_HZ */ +/* + * A context switch is a grace period for rcupreempt synchronize_rcu() + * only during early boot, before the scheduler has been initialized. + * So, how the heck do we get a context switch? Well, if the caller + * invokes synchronize_rcu(), they are willing to accept a context + * switch, so we simply pretend that one happened. + * + * After boot, there might be a blocked or preempted task in an RCU + * read-side critical section, so we cannot then take the fastpath. + */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1 && !rcu_scheduler_active; +} + #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d4368b7975c3..a722fb67bb2d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -326,4 +326,10 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ +/* A context switch is a grace period for rcutree. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUTREE_H */ diff --git a/init/main.c b/init/main.c index 844209453c02..83697e160b3a 100644 --- a/init/main.c +++ b/init/main.c @@ -97,7 +97,7 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif -enum system_states system_state; +enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); /* @@ -463,6 +463,7 @@ static noinline void __init_refok rest_init(void) * at least once to get things moving: */ init_idle_bootup_task(current); + rcu_scheduler_starting(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index bd5a9003497c..654c640a6b9c 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index d92a76a881aa..cae8a059cf47 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,6 +44,7 @@ #include <linux/cpu.h> #include <linux/mutex.h> #include <linux/module.h> +#include <linux/kernel_stat.h> enum rcu_barrier { RCU_BARRIER_STD, @@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; +int rcu_scheduler_active __read_mostly; /* * Awaken the corresponding synchronize_rcu() instance now that a @@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head) void synchronize_rcu(void) { struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); @@ -175,3 +181,9 @@ void __init rcu_init(void) __rcu_init(); } +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 33cfc50781f9..5d59e850fb71 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1181,6 +1181,9 @@ void __synchronize_sched(void) { struct rcu_synchronize rcu; + if (num_online_cpus() == 1) + return; /* blocking is gp if only one CPU! */ + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fd602a6f6f..97ce31579ec0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user From a760a6656e6f00bb0144a42a048cf0266646e22c Mon Sep 17 00:00:00 2001 From: Herbert Xu <herbert@gondor.apana.org.au> Date: Thu, 26 Feb 2009 14:06:31 +0800 Subject: [PATCH 05/25] crypto: api - Fix module load deadlock with fallback algorithms With the mandatory algorithm testing at registration, we have now created a deadlock with algorithms requiring fallbacks. This can happen if the module containing the algorithm requiring fallback is loaded first, without the fallback module being loaded first. The system will then try to test the new algorithm, find that it needs to load a fallback, and then try to load that. As both algorithms share the same module alias, it can attempt to load the original algorithm again and block indefinitely. As algorithms requiring fallbacks are a special case, we can fix this by giving them a different module alias than the rest. Then it's just a matter of using the right aliases according to what algorithms we're trying to find. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- arch/s390/crypto/aes_s390.c | 2 +- crypto/api.c | 15 +++++++++++++-- drivers/crypto/padlock-aes.c | 2 +- drivers/crypto/padlock-sha.c | 4 ++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index c42cd898f68b..6118890c946d 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -556,7 +556,7 @@ static void __exit aes_s390_fini(void) module_init(aes_s390_init); module_exit(aes_s390_fini); -MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-all"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); diff --git a/crypto/api.c b/crypto/api.c index efe77df6863f..38a2bc02a98c 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -215,8 +215,19 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask) mask &= ~(CRYPTO_ALG_LARVAL | CRYPTO_ALG_DEAD); type &= mask; - alg = try_then_request_module(crypto_alg_lookup(name, type, mask), - name); + alg = crypto_alg_lookup(name, type, mask); + if (!alg) { + char tmp[CRYPTO_MAX_ALG_NAME]; + + request_module(name); + + if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask) && + snprintf(tmp, sizeof(tmp), "%s-all", name) < sizeof(tmp)) + request_module(tmp); + + alg = crypto_alg_lookup(name, type, mask); + } + if (alg) return crypto_is_larval(alg) ? crypto_larval_wait(alg) : alg; diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 856b3cc25583..3f0fdd18255d 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -489,4 +489,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-all"); diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index a7fbadebf623..a2c8e8514b63 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -304,7 +304,7 @@ MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Michal Ludvig"); -MODULE_ALIAS("sha1"); -MODULE_ALIAS("sha256"); +MODULE_ALIAS("sha1-all"); +MODULE_ALIAS("sha256-all"); MODULE_ALIAS("sha1-padlock"); MODULE_ALIAS("sha256-padlock"); From cac64d00c256e65776d575e82aaf540632b66178 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> Date: Wed, 25 Feb 2009 09:59:26 -0800 Subject: [PATCH 06/25] sched_rt: don't start timer when rt bandwidth disabled Impact: fix incorrect condition check No need to start rt bandwidth timer when rt bandwidth is disabled. If this timer starts, it may stop at sched_rt_period_timer() on the first time. Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 410eec404133..c3baa9653d1d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -223,7 +223,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) { ktime_t now; - if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF) + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) return; if (hrtimer_active(&rt_b->rt_period_timer)) From 54e991242850edc8c53f71fa5aa3ba7a93ce38f5 Mon Sep 17 00:00:00 2001 From: Dhaval Giani <dhaval@linux.vnet.ibm.com> Date: Fri, 27 Feb 2009 15:13:54 +0530 Subject: [PATCH 07/25] sched: don't allow setuid to succeed if the user does not have rt bandwidth Impact: fix hung task with certain (non-default) rt-limit settings Corey Hickey reported that on using setuid to change the uid of a rt process, the process would be unkillable and not be running. This is because there was no rt runtime for that user group. Add in a check to see if a user can attach an rt task to its task group. On failure, return EINVAL, which is also returned in CONFIG_CGROUP_SCHED. Reported-by: Corey Hickey <bugfood-ml@fatooh.org> Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- include/linux/sched.h | 4 ++++ kernel/sched.c | 13 +++++++++++-- kernel/sys.c | 31 ++++++++++++++++++++----------- kernel/user.c | 18 ++++++++++++++++++ 4 files changed, 53 insertions(+), 13 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8981e52c714f..8c216e057c94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,9 +2291,13 @@ extern long sched_group_rt_runtime(struct task_group *tg); extern int sched_group_set_rt_period(struct task_group *tg, long rt_period_us); extern long sched_group_rt_period(struct task_group *tg); +extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif #endif +extern int task_can_switch_user(struct user_struct *up, + struct task_struct *tsk); + #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { diff --git a/kernel/sched.c b/kernel/sched.c index c3baa9653d1d..8e2558c2ba67 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9224,6 +9224,16 @@ static int sched_rt_global_constraints(void) return ret; } + +int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +{ + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + return 0; + + return 1; +} + #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9317,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) + if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */ diff --git a/kernel/sys.c b/kernel/sys.c index f145c415bc16..37f458e6882a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -559,7 +559,7 @@ error: abort_creds(new); return retval; } - + /* * change the user struct in a credentials set to match the new UID */ @@ -571,6 +571,11 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + if (!task_can_switch_user(new_user, current)) { + free_uid(new_user); + return -EINVAL; + } + if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { @@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) goto error; } - retval = -EAGAIN; - if (new->uid != old->uid && set_user(new) < 0) - goto error; - + if (new->uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old->uid)) new->suid = new->euid; @@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (capable(CAP_SETUID)) { new->suid = new->uid = uid; - if (uid != old->uid && set_user(new) < 0) { - retval = -EAGAIN; - goto error; + if (uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; } } else if (uid != old->uid && uid != new->suid) { goto error; @@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) goto error; } - retval = -EAGAIN; if (ruid != (uid_t) -1) { new->uid = ruid; - if (ruid != old->uid && set_user(new) < 0) - goto error; + if (ruid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } } if (euid != (uid_t) -1) new->euid = euid; diff --git a/kernel/user.c b/kernel/user.c index 3551ac742395..6a9b696128c8 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -362,6 +362,24 @@ static void free_user(struct user_struct *up, unsigned long flags) #endif +#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) +/* + * We need to check if a setuid can take place. This function should be called + * before successfully completing the setuid. + */ +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + + return sched_rt_can_attach(up->tg, tsk); + +} +#else +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + return 1; +} +#endif + /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). From fab852aaf761a00cfe16330429b7cac15cceaeb9 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sun, 1 Mar 2009 16:09:14 +0200 Subject: [PATCH 08/25] x86: count errors in testmmiotrace.ko Check the read values against the written values in the MMIO read/write test. This test shows if the given MMIO test area really works as memory, which is a prerequisite for a successful mmiotrace test. Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Stuart Bennett <stuart@freedesktop.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/testmmiotrace.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index ab50a8d7402c..4b29f3b0ee01 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -1,5 +1,5 @@ /* - * Written by Pekka Paalanen, 2008 <pq@iki.fi> + * Written by Pekka Paalanen, 2008-2009 <pq@iki.fi> */ #include <linux/module.h> #include <linux/io.h> @@ -11,28 +11,51 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +static unsigned v16(unsigned i) +{ + return i * 12 + 7; +} + +static unsigned v32(unsigned i) +{ + return i * 212371 + 13; +} + static void do_write_test(void __iomem *p) { unsigned int i; mmiotrace_printk("Write test.\n"); + for (i = 0; i < 256; i++) iowrite8(i, p + i); + for (i = 1024; i < (5 * 1024); i += 2) - iowrite16(i * 12 + 7, p + i); + iowrite16(v16(i), p + i); + for (i = (5 * 1024); i < (16 * 1024); i += 4) - iowrite32(i * 212371 + 13, p + i); + iowrite32(v32(i), p + i); } static void do_read_test(void __iomem *p) { unsigned int i; + unsigned errs[3] = { 0 }; mmiotrace_printk("Read test.\n"); + for (i = 0; i < 256; i++) - ioread8(p + i); + if (ioread8(p + i) != i) + ++errs[0]; + for (i = 1024; i < (5 * 1024); i += 2) - ioread16(p + i); + if (ioread16(p + i) != v16(i)) + ++errs[1]; + for (i = (5 * 1024); i < (16 * 1024); i += 4) - ioread32(p + i); + if (ioread32(p + i) != v32(i)) + ++errs[2]; + + mmiotrace_printk("Read errors: 8-bit %d, 16-bit %d, 32-bit %d.\n", + errs[0], errs[1], errs[2]); } static void do_test(void) From 5ff93697fcfe1e2b9e61db82961d8f50d1ad5d57 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sun, 1 Mar 2009 16:10:08 +0200 Subject: [PATCH 09/25] x86: add far read test to testmmiotrace Apparently pages far into an ioremapped region might not actually be mapped during ioremap(). Add an optional read test to try to trigger a multiply faulting MMIO access. Also add more messages to the kernel log to help debugging. This patch is based on a patch suggested by Stuart Bennett <stuart@freedesktop.org> who discovered bugs in mmiotrace related to normal kernel space faults. Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Stuart Bennett <stuart@freedesktop.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/testmmiotrace.c | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index 4b29f3b0ee01..427fd1b56df5 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -9,7 +9,13 @@ static unsigned long mmio_address; module_param(mmio_address, ulong, 0); -MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); +MODULE_PARM_DESC(mmio_address, " Start address of the mapping of 16 kB " + "(or 8 MB if read_far is non-zero)."); + +static unsigned long read_far = 0x400100; +module_param(read_far, ulong, 0); +MODULE_PARM_DESC(read_far, " Offset of a 32-bit read within 8 MB " + "(default: 0x400100)."); static unsigned v16(unsigned i) { @@ -24,6 +30,7 @@ static unsigned v32(unsigned i) static void do_write_test(void __iomem *p) { unsigned int i; + pr_info(MODULE_NAME ": write test.\n"); mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) @@ -40,6 +47,7 @@ static void do_read_test(void __iomem *p) { unsigned int i; unsigned errs[3] = { 0 }; + pr_info(MODULE_NAME ": read test.\n"); mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) @@ -58,9 +66,17 @@ static void do_read_test(void __iomem *p) errs[0], errs[1], errs[2]); } -static void do_test(void) +static void do_read_far_test(void __iomem *p) { - void __iomem *p = ioremap_nocache(mmio_address, 0x4000); + pr_info(MODULE_NAME ": read far test.\n"); + mmiotrace_printk("Read far test.\n"); + + ioread32(p + read_far); +} + +static void do_test(unsigned long size) +{ + void __iomem *p = ioremap_nocache(mmio_address, size); if (!p) { pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; @@ -68,11 +84,15 @@ static void do_test(void) mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); + if (read_far && read_far < size - 4) + do_read_far_test(p); iounmap(p); } static int __init init(void) { + unsigned long size = (read_far) ? (8 << 20) : (16 << 10); + if (mmio_address == 0) { pr_err(MODULE_NAME ": you have to use the module argument " "mmio_address.\n"); @@ -81,10 +101,11 @@ static int __init init(void) return -ENXIO; } - pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " - "in PCI address space, and writing " - "rubbish in there.\n", mmio_address); - do_test(); + pr_warning(MODULE_NAME ": WARNING: mapping %lu kB @ 0x%08lx in PCI " + "address space, and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); + do_test(size); + pr_info(MODULE_NAME ": All done.\n"); return 0; } From e9d54cae8f03e7f963a12f44bd50d68f49b9ea36 Mon Sep 17 00:00:00 2001 From: Stuart Bennett <stuart@freedesktop.org> Date: Fri, 30 Jan 2009 17:38:59 +0000 Subject: [PATCH 10/25] x86 mmiotrace: WARN_ONCE if dis/arming a page fails Print a full warning once, if arming or disarming a page fails. Also, if initial arming fails, do not handle the page further. This avoids the possibility of a page failing to arm and then later claiming to have handled any fault on that page. WARN_ONCE added by Pekka Paalanen. Signed-off-by: Stuart Bennett <stuart@freedesktop.org> Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/kmmio.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 93d82038af4b..fb1f11546fcd 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -105,7 +105,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static void set_page_present(unsigned long addr, bool present, +static int set_page_present(unsigned long addr, bool present, unsigned int *pglevel) { pteval_t pteval; @@ -116,7 +116,7 @@ static void set_page_present(unsigned long addr, bool present, if (!pte) { pr_err("kmmio: no pte for page 0x%08lx\n", addr); - return; + return -1; } if (pglevel) @@ -140,22 +140,27 @@ static void set_page_present(unsigned long addr, bool present, default: pr_err("kmmio: unexpected page level 0x%x.\n", level); - return; + return -1; } __flush_tlb_one(addr); + + return 0; } /** Mark the given page as not present. Access to it will trigger a fault. */ -static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, false, pglevel); + int ret = set_page_present(page & PAGE_MASK, false, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + return ret; } /** Mark the given page as present. */ static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) { - set_page_present(page & PAGE_MASK, true, pglevel); + int ret = set_page_present(page & PAGE_MASK, true, pglevel); + WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); } /* @@ -326,9 +331,13 @@ static int add_kmmio_fault_page(unsigned long page) f->count = 1; f->page = page; - list_add_rcu(&f->list, kmmio_page_list(f->page)); - arm_kmmio_fault_page(f->page, NULL); + if (arm_kmmio_fault_page(f->page, NULL)) { + kfree(f); + return -1; + } + + list_add_rcu(&f->list, kmmio_page_list(f->page)); return 0; } From 5359b585fb5edb3db34d6cd491e1475b098c61d3 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sun, 1 Mar 2009 16:11:58 +0200 Subject: [PATCH 11/25] x86 mmiotrace: fix save/restore page table state From baa99e2b32449ec7bf147c234adfa444caecac8a Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sun, 22 Feb 2009 20:02:43 +0200 Blindly setting _PAGE_PRESENT in disarm_kmmio_fault_page() overlooks the possibility, that the page was not present when it was armed. Make arm_kmmio_fault_page() store the previous page presence in struct kmmio_fault_page and use it on disarm. This patch was originally written by Stuart Bennett, but Pekka Paalanen rewrote it a little different. Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Stuart Bennett <stuart@freedesktop.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/kmmio.c | 66 ++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index fb1f11546fcd..be361eb828c8 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -32,6 +32,8 @@ struct kmmio_fault_page { struct list_head list; struct kmmio_fault_page *release_next; unsigned long page; /* location of the fault page */ + bool old_presence; /* page presence prior to arming */ + bool armed; /* * Number of times this page has been registered as a part @@ -105,8 +107,7 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } -static int set_page_present(unsigned long addr, bool present, - unsigned int *pglevel) +static int set_page_presence(unsigned long addr, bool present, bool *old) { pteval_t pteval; pmdval_t pmdval; @@ -119,20 +120,21 @@ static int set_page_present(unsigned long addr, bool present, return -1; } - if (pglevel) - *pglevel = level; - switch (level) { case PG_LEVEL_2M: pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; + pmdval = pmd_val(*pmd); + *old = !!(pmdval & _PAGE_PRESENT); + pmdval &= ~_PAGE_PRESENT; if (present) pmdval |= _PAGE_PRESENT; set_pmd(pmd, __pmd(pmdval)); break; case PG_LEVEL_4K: - pteval = pte_val(*pte) & ~_PAGE_PRESENT; + pteval = pte_val(*pte); + *old = !!(pteval & _PAGE_PRESENT); + pteval &= ~_PAGE_PRESENT; if (present) pteval |= _PAGE_PRESENT; set_pte_atomic(pte, __pte(pteval)); @@ -148,19 +150,39 @@ static int set_page_present(unsigned long addr, bool present, return 0; } -/** Mark the given page as not present. Access to it will trigger a fault. */ -static int arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/* + * Mark the given page as not present. Access to it will trigger a fault. + * + * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the + * protection is ignored here. RCU read lock is assumed held, so the struct + * will not disappear unexpectedly. Furthermore, the caller must guarantee, + * that double arming the same virtual address (page) cannot occur. + * + * Double disarming on the other hand is allowed, and may occur when a fault + * and mmiotrace shutdown happen simultaneously. + */ +static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, false, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", page); + int ret; + WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + if (f->armed) { + pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, f->old_presence); + } + ret = set_page_presence(f->page, false, &f->old_presence); + WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + f->armed = true; return ret; } -/** Mark the given page as present. */ -static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) +/** Restore the given page to saved presence state. */ +static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) { - int ret = set_page_present(page & PAGE_MASK, true, pglevel); - WARN_ONCE(ret < 0, KERN_ERR "kmmio disarming 0x%08lx failed.\n", page); + bool tmp; + int ret = set_page_presence(f->page, f->old_presence, &tmp); + WARN_ONCE(ret < 0, + KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); + f->armed = false; } /* @@ -207,7 +229,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage->page, NULL); + disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* * On SMP we sometimes get recursive probe hits on the @@ -249,7 +271,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) regs->flags &= ~X86_EFLAGS_IF; /* Now we set present bit in PTE and single step. */ - disarm_kmmio_fault_page(ctx->fpage->page, NULL); + disarm_kmmio_fault_page(ctx->fpage); /* * If another cpu accesses the same page while we are stepping, @@ -288,7 +310,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage->page, NULL); + arm_kmmio_fault_page(ctx->fpage); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; @@ -320,19 +342,19 @@ static int add_kmmio_fault_page(unsigned long page) f = get_kmmio_fault_page(page); if (f) { if (!f->count) - arm_kmmio_fault_page(f->page, NULL); + arm_kmmio_fault_page(f); f->count++; return 0; } - f = kmalloc(sizeof(*f), GFP_ATOMIC); + f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return -1; f->count = 1; f->page = page; - if (arm_kmmio_fault_page(f->page, NULL)) { + if (arm_kmmio_fault_page(f)) { kfree(f); return -1; } @@ -356,7 +378,7 @@ static void release_kmmio_fault_page(unsigned long page, f->count--; BUG_ON(f->count < 0); if (!f->count) { - disarm_kmmio_fault_page(f->page, NULL); + disarm_kmmio_fault_page(f); f->release_next = *release_list; *release_list = f; } From 0b700a6a253b6a3b3059bb9a9247a73490ee33fb Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sun, 1 Mar 2009 16:12:48 +0200 Subject: [PATCH 12/25] x86 mmiotrace: split set_page_presence() From 36772dcb6ffbbb68254cbfc379a103acd2fbfefc Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Sat, 28 Feb 2009 21:34:59 +0200 Split set_page_presence() in kmmio.c into two more functions set_pmd_presence() and set_pte_presence(). Purely code reorganization, no functional changes. Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Stuart Bennett <stuart@freedesktop.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/kmmio.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index be361eb828c8..d29777520af3 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -107,12 +107,29 @@ static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) return NULL; } +static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) +{ + pmdval_t v = pmd_val(*pmd); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pmd(pmd, __pmd(v)); +} + +static void set_pte_presence(pte_t *pte, bool present, bool *old) +{ + pteval_t v = pte_val(*pte); + *old = !!(v & _PAGE_PRESENT); + v &= ~_PAGE_PRESENT; + if (present) + v |= _PAGE_PRESENT; + set_pte_atomic(pte, __pte(v)); +} + static int set_page_presence(unsigned long addr, bool present, bool *old) { - pteval_t pteval; - pmdval_t pmdval; unsigned int level; - pmd_t *pmd; pte_t *pte = lookup_address(addr, &level); if (!pte) { @@ -122,31 +139,17 @@ static int set_page_presence(unsigned long addr, bool present, bool *old) switch (level) { case PG_LEVEL_2M: - pmd = (pmd_t *)pte; - pmdval = pmd_val(*pmd); - *old = !!(pmdval & _PAGE_PRESENT); - pmdval &= ~_PAGE_PRESENT; - if (present) - pmdval |= _PAGE_PRESENT; - set_pmd(pmd, __pmd(pmdval)); + set_pmd_presence((pmd_t *)pte, present, old); break; - case PG_LEVEL_4K: - pteval = pte_val(*pte); - *old = !!(pteval & _PAGE_PRESENT); - pteval &= ~_PAGE_PRESENT; - if (present) - pteval |= _PAGE_PRESENT; - set_pte_atomic(pte, __pte(pteval)); + set_pte_presence(pte, present, old); break; - default: pr_err("kmmio: unexpected page level 0x%x.\n", level); return -1; } __flush_tlb_one(addr); - return 0; } From 3e39aa156a24ce386da378784edd0f748c770087 Mon Sep 17 00:00:00 2001 From: Stuart Bennett <stuart@freedesktop.org> Date: Thu, 5 Feb 2009 11:02:02 +0000 Subject: [PATCH 13/25] x86 mmiotrace: improve handling of secondary faults Upgrade some kmmio.c debug messages to warnings. Allow secondary faults on probed pages to fall through, and only log secondary faults that are not due to non-present pages. Patch edited by Pekka Paalanen. Signed-off-by: Stuart Bennett <stuart@freedesktop.org> Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/kmmio.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index d29777520af3..4c66bd3a240d 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -232,28 +232,32 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) ctx = &get_cpu_var(kmmio_ctx); if (ctx->active) { - disarm_kmmio_fault_page(faultpage); if (addr == ctx->addr) { /* - * On SMP we sometimes get recursive probe hits on the - * same address. Context is already saved, fall out. + * A second fault on the same page means some other + * condition needs handling by do_page_fault(), the + * page really not being present is the most common. */ - pr_debug("kmmio: duplicate probe hit on CPU %d, for " - "address 0x%08lx.\n", - smp_processor_id(), addr); - ret = 1; - goto no_kmmio_ctx; - } - /* - * Prevent overwriting already in-flight context. - * This should not happen, let's hope disarming at least - * prevents a panic. - */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " + pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); + + if (!faultpage->old_presence) + pr_info("kmmio: unexpected secondary hit for " + "address 0x%08lx on CPU %d.\n", addr, + smp_processor_id()); + } else { + /* + * Prevent overwriting already in-flight context. + * This should not happen, let's hope disarming at + * least prevents a panic. + */ + pr_emerg("kmmio: recursive probe hit on CPU %d, " "for address 0x%08lx. Ignoring.\n", smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("kmmio: previous hit was at 0x%08lx.\n", + ctx->addr); + disarm_kmmio_fault_page(faultpage); + } goto no_kmmio_ctx; } ctx->active++; @@ -305,7 +309,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); if (!ctx->active) { - pr_debug("kmmio: spurious debug trap on CPU %d.\n", + pr_warning("kmmio: spurious debug trap on CPU %d.\n", smp_processor_id()); goto out; } From 340430c572f7b2b275d39965e88bafa71693cb23 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen <pq@iki.fi> Date: Tue, 24 Feb 2009 21:44:15 +0200 Subject: [PATCH 14/25] x86 mmiotrace: fix race with release_kmmio_fault_page() There was a theoretical possibility to a race between arming a page in post_kmmio_handler() and disarming the page in release_kmmio_fault_page(): cpu0 cpu1 ------------------------------------------------------------------ mmiotrace shutdown enter release_kmmio_fault_page fault on the page disarm the page disarm the page handle the MMIO access re-arm the page put the page on release list remove_kmmio_fault_pages() fault on the page page not known to mmiotrace fall back to do_page_fault() *KABOOM* (This scenario also shows the double disarm case which is allowed.) Fixed by acquiring kmmio_lock in post_kmmio_handler() and checking if the page is being released from mmiotrace. Signed-off-by: Pekka Paalanen <pq@iki.fi> Cc: Stuart Bennett <stuart@freedesktop.org> Cc: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/mm/kmmio.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 4c66bd3a240d..9f205030d9aa 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -38,7 +38,8 @@ struct kmmio_fault_page { /* * Number of times this page has been registered as a part * of a probe. If zero, page is disarmed and this may be freed. - * Used only by writers (RCU). + * Used only by writers (RCU) and post_kmmio_handler(). + * Protected by kmmio_lock, when linked into kmmio_page_table. */ int count; }; @@ -317,7 +318,11 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) if (ctx->probe && ctx->probe->post_handler) ctx->probe->post_handler(ctx->probe, condition, regs); - arm_kmmio_fault_page(ctx->fpage); + /* Prevent racing against release_kmmio_fault_page(). */ + spin_lock(&kmmio_lock); + if (ctx->fpage->count) + arm_kmmio_fault_page(ctx->fpage); + spin_unlock(&kmmio_lock); regs->flags &= ~X86_EFLAGS_TF; regs->flags |= ctx->saved_flags; From 25ef4a67e78e1322d55f0a38783537ed89addc02 Mon Sep 17 00:00:00 2001 From: Seth Forshee <seth.forshee@gmail.com> Date: Mon, 2 Mar 2009 22:39:36 +0100 Subject: [PATCH 15/25] [ARM] 5416/1: Use unused address in v6_early_abort The target of the strex instruction to clear the exlusive monitor is currently the top of the stack. If the store succeeeds this corrupts r0 in pt_regs. Use the next stack location instead of the current one to prevent any chance of corrupting an in-use address. Signed-off-by: Seth Forshee <seth.forshee@gmail.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/mm/abort-ev6.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 8a7f65ba14b7..94077fbd96b7 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -23,7 +23,8 @@ ENTRY(v6_early_abort) #ifdef CONFIG_CPU_32v6K clrex #else - strex r0, r1, [sp] @ Clear the exclusive monitor + sub r1, sp, #4 @ Get unused stack location + strex r0, r1, [r1] @ Clear the exclusive monitor #endif mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR From b57ee99fab25dbc12150fe66fe54dc52bc6de784 Mon Sep 17 00:00:00 2001 From: Catalin Marinas <catalin.marinas@arm.com> Date: Tue, 3 Mar 2009 11:44:12 +0100 Subject: [PATCH 16/25] [ARM] 5417/1: Set the correct cacheid for ARMv6 CPUs with ARMv7 style MMU The cacheid_init() function assumes that if cpu_architecture() returns 7, the caches are VIPT_NONALIASING. The cpu_architecture() function returns the version of the supported MMU features (e.g. TEX remapping) but it doesn't make any assumptions about the cache type. The patch adds the checking of the Cache Type Register for the ARMv7 format. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/kernel/setup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 7049815d66d5..68d6494c0389 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -233,12 +233,13 @@ static void __init cacheid_init(void) unsigned int cachetype = read_cpuid_cachetype(); unsigned int arch = cpu_architecture(); - if (arch >= CPU_ARCH_ARMv7) { - cacheid = CACHEID_VIPT_NONALIASING; - if ((cachetype & (3 << 14)) == 1 << 14) - cacheid |= CACHEID_ASID_TAGGED; - } else if (arch >= CPU_ARCH_ARMv6) { - if (cachetype & (1 << 23)) + if (arch >= CPU_ARCH_ARMv6) { + if ((cachetype & (7 << 29)) == 4 << 29) { + /* ARMv7 register format */ + cacheid = CACHEID_VIPT_NONALIASING; + if ((cachetype & (3 << 14)) == 1 << 14) + cacheid |= CACHEID_ASID_TAGGED; + } else if (cachetype & (1 << 23)) cacheid = CACHEID_VIPT_ALIASING; else cacheid = CACHEID_VIPT_NONALIASING; From bdf602bd737eb07d63d6fa2da826b4751fdf9bab Mon Sep 17 00:00:00 2001 From: Russell King <rmk@dyn-67.arm.linux.org.uk> Date: Tue, 3 Mar 2009 13:43:47 +0000 Subject: [PATCH 17/25] [ARM] fix lots of ARM __devexit sillyness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `iop_adma_remove' referenced in section `.data' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o `mv_xor_remove' referenced in section `.data' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o `mv64xxx_i2c_unmap_regs' referenced in section `.devinit.text' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o `mv64xxx_i2c_remove' referenced in section `.data' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o `orion_nand_remove' referenced in section `.data' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o `pxafb_remove' referenced in section `.data' of drivers/built-in.o: defined in discarded section `.devexit.text' of drivers/built-in.o Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- drivers/dma/iop-adma.c | 2 +- drivers/dma/mv_xor.c | 2 +- drivers/i2c/busses/i2c-mv64xxx.c | 4 ++-- drivers/mtd/nand/orion_nand.c | 2 +- drivers/video/pxafb.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index ea5440dd10dc..647374acba94 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -1401,7 +1401,7 @@ MODULE_ALIAS("platform:iop-adma"); static struct platform_driver iop_adma_driver = { .probe = iop_adma_probe, - .remove = iop_adma_remove, + .remove = __devexit_p(iop_adma_remove), .driver = { .owner = THIS_MODULE, .name = "iop-adma", diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index d35cbd1ff0b3..5d5d5b31867f 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1287,7 +1287,7 @@ mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, static struct platform_driver mv_xor_driver = { .probe = mv_xor_probe, - .remove = mv_xor_remove, + .remove = __devexit_p(mv_xor_remove), .driver = { .owner = THIS_MODULE, .name = MV_XOR_NAME, diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index eeda276f8f16..7f186bbcb99d 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -482,7 +482,7 @@ mv64xxx_i2c_map_regs(struct platform_device *pd, return 0; } -static void __devexit +static void mv64xxx_i2c_unmap_regs(struct mv64xxx_i2c_data *drv_data) { if (drv_data->reg_base) { @@ -577,7 +577,7 @@ mv64xxx_i2c_remove(struct platform_device *dev) static struct platform_driver mv64xxx_i2c_driver = { .probe = mv64xxx_i2c_probe, - .remove = mv64xxx_i2c_remove, + .remove = __devexit_p(mv64xxx_i2c_remove), .driver = { .owner = THIS_MODULE, .name = MV64XXX_I2C_CTLR_NAME, diff --git a/drivers/mtd/nand/orion_nand.c b/drivers/mtd/nand/orion_nand.c index 917cf8d3ae95..c2dfd3ea353d 100644 --- a/drivers/mtd/nand/orion_nand.c +++ b/drivers/mtd/nand/orion_nand.c @@ -149,7 +149,7 @@ static int __devexit orion_nand_remove(struct platform_device *pdev) static struct platform_driver orion_nand_driver = { .probe = orion_nand_probe, - .remove = orion_nand_remove, + .remove = __devexit_p(orion_nand_remove), .driver = { .name = "orion_nand", .owner = THIS_MODULE, diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c index 48ff701d3a72..2552b9f325ee 100644 --- a/drivers/video/pxafb.c +++ b/drivers/video/pxafb.c @@ -2230,7 +2230,7 @@ static int __devexit pxafb_remove(struct platform_device *dev) static struct platform_driver pxafb_driver = { .probe = pxafb_probe, - .remove = pxafb_remove, + .remove = __devexit_p(pxafb_remove), .suspend = pxafb_suspend, .resume = pxafb_resume, .driver = { From 1777f1a978153e8b887c1e1eb5160ac46098b142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Ha=C5=82asa?= <khc@pm.waw.pl> Date: Wed, 4 Mar 2009 08:01:22 +0800 Subject: [PATCH 18/25] crypto: ixp4xx - Fix qmgr_request_queue build failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is another user of IXP4xx queue manager, fix it. Signed-off-by: Krzysztof Hałasa <khc@pm.waw.pl> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/ixp4xx_crypto.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 2d637e0fbc03..d9e751be8c5f 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -457,10 +457,12 @@ static int init_ixp_crypto(void) if (!ctx_pool) { goto err; } - ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0); + ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0, + "ixp_crypto:out", NULL); if (ret) goto err; - ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0); + ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0, + "ixp_crypto:in", NULL); if (ret) { qmgr_release_queue(SEND_QID); goto err; From fec6c6fec3e20637bee5d276fb61dd8b49a3f9cc Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Tue, 3 Mar 2009 17:05:22 -0800 Subject: [PATCH 19/25] Linux 2.6.29-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index df6ce3e80090..d04ee0ad1dcc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 29 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Erotic Pickled Herring # *DOCUMENTATION* From 368a12117dd8abf6eaefa37c21ac313b517128b9 Mon Sep 17 00:00:00 2001 From: Tony Breeds <tony@bakeyournoodle.com> Date: Tue, 3 Mar 2009 17:59:30 +0000 Subject: [PATCH 20/25] powerpc: Run sbc610 USB fixup code only on the appropriate platform. commit a969e76a7101bf5f3d369563df1ca1253dd6131b (powerpc: Correct USB support for GE Fanuc SBC610) introduced a fixup for NEC usb controllers. This fixup should only run on GEF SBC610 boards. Fixes Fedora bug #486511. (https://bugzilla.redhat.com/show_bug.cgi?id=486511) Signed-off-by: Tony Breeds <tony@bakeyournoodle.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- arch/powerpc/platforms/86xx/gef_sbc610.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index fb371f5ce132..d6b772ba3b8f 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -142,6 +142,10 @@ static void __init gef_sbc610_nec_fixup(struct pci_dev *pdev) { unsigned int val; + /* Do not do the fixup on other platforms! */ + if (!machine_is(gef_sbc610)) + return; + printk(KERN_INFO "Running NEC uPD720101 Fixup\n"); /* Ensure ports 1, 2, 3, 4 & 5 are enabled */ From 5ad8b7d12605e88d1e532061699102797fdefe08 Mon Sep 17 00:00:00 2001 From: Helge Bahmann <helge.bahmann@secunet.com> Date: Wed, 4 Mar 2009 21:49:14 +1000 Subject: [PATCH 21/25] drm: fix double lock typo [airlied: you shall not retype patches from other trees half asleep] Signed-of-by: Dave Airlie <airlied@redhat.com> --- drivers/gpu/drm/drm_stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c index 096e2a37446d..7c8b15b22bf2 100644 --- a/drivers/gpu/drm/drm_stub.c +++ b/drivers/gpu/drm/drm_stub.c @@ -168,7 +168,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, file_priv->minor->master != file_priv->master) { mutex_lock(&dev->struct_mutex); file_priv->minor->master = drm_master_get(file_priv->master); - mutex_lock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); } return 0; From ff0c0874905fb312ca1491bbdac2653b0b48c20b Mon Sep 17 00:00:00 2001 From: Brian Maly <bmaly@redhat.com> Date: Tue, 3 Mar 2009 21:55:31 -0500 Subject: [PATCH 22/25] x86: fix DMI on EFI Impact: reactivate DMI quirks on EFI hardware DMI tables are loaded by EFI, so the dmi calls must happen after efi_init() and not before. Currently Apple hardware uses DMI to determine the framebuffer mappings for efifb. Without DMI working you also have no video on MacBook Pro. This patch resolves the DMI issue for EFI hardware (DMI is now properly detected at boot), and additionally efifb now loads on Apple hardware (i.e. video works). Signed-off-by: Brian Maly <bmaly@redhat> Acked-by: Yinghai Lu <yinghai@kernel.org> Cc: ying.huang@intel.com LKML-Reference: <49ADEDA3.1030406@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) --- arch/x86/kernel/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c461f6d69074..6a8811a69324 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -770,6 +770,9 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); + if (efi_enabled) + efi_init(); + dmi_scan_machine(); dmi_check_system(bad_bios_dmi_table); @@ -789,8 +792,6 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - if (efi_enabled) - efi_init(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { From dd39ecf522ba86c70809715af46e6557f6491131 Mon Sep 17 00:00:00 2001 From: Huang Ying <ying.huang@intel.com> Date: Wed, 4 Mar 2009 10:58:33 +0800 Subject: [PATCH 23/25] x86: EFI: Back efi_ioremap with init_memory_mapping instead of FIX_MAP Impact: Fix boot failure on EFI system with large runtime memory range Brian Maly reported that some EFI system with large runtime memory range can not boot. Because the FIX_MAP used to map runtime memory range is smaller than run time memory range. This patch fixes this issue by re-implement efi_ioremap() with init_memory_mapping(). Reported-and-tested-by: Brian Maly <bmaly@redhat.com> Signed-off-by: Huang Ying <ying.huang@intel.com> Cc: Brian Maly <bmaly@redhat.com> Cc: Yinghai Lu <yinghai@kernel.org> LKML-Reference: <1236135513.6204.306.camel@yhuang-dev.sh.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/efi.h | 2 -- arch/x86/include/asm/fixmap_64.h | 4 ---- arch/x86/kernel/efi.c | 7 +++++-- arch/x86/kernel/efi_64.c | 21 ++++----------------- 4 files changed, 9 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index ca5ffb2856b6..edc90f23e708 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -37,8 +37,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #else /* !CONFIG_X86_32 */ -#define MAX_EFI_IO_PAGES 100 - extern u64 efi_call0(void *fp); extern u64 efi_call1(void *fp, u64 arg1); extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); diff --git a/arch/x86/include/asm/fixmap_64.h b/arch/x86/include/asm/fixmap_64.h index 00a30ab9b1a5..8be740977db8 100644 --- a/arch/x86/include/asm/fixmap_64.h +++ b/arch/x86/include/asm/fixmap_64.h @@ -16,7 +16,6 @@ #include <asm/apicdef.h> #include <asm/page.h> #include <asm/vsyscall.h> -#include <asm/efi.h> /* * Here we define all the compile-time 'special' virtual @@ -43,9 +42,6 @@ enum fixed_addresses { FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ FIX_IO_APIC_BASE_0, FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, - FIX_EFI_IO_MAP_LAST_PAGE, - FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE - + MAX_EFI_IO_PAGES - 1, #ifdef CONFIG_PARAVIRT FIX_PARAVIRT_BOOTMAP, #endif diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1119d247fe11..eb1ef3b67dd5 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -467,7 +467,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab, addr, npages; + u64 end, systab, addr, npages, end_pfn; void *p, *va; efi.systab = NULL; @@ -479,7 +479,10 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if (PFN_UP(end) <= max_low_pfn_mapped) + end_pfn = PFN_UP(end); + if (end_pfn <= max_low_pfn_mapped + || (end_pfn > (1UL << (32 - PAGE_SHIFT)) + && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 652c5287215f..cb783b92c50c 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -99,24 +99,11 @@ void __init efi_call_phys_epilog(void) void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped __initdata; - unsigned i, pages; - unsigned long offset; + unsigned long last_map_pfn; - pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); - offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; - - if (pages_mapped + pages > MAX_EFI_IO_PAGES) + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); + if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; - for (i = 0; i < pages; i++) { - __set_fixmap(FIX_EFI_IO_MAP_FIRST_PAGE - pages_mapped, - phys_addr, PAGE_KERNEL); - phys_addr += PAGE_SIZE; - pages_mapped++; - } - - return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)) + offset; + return (void __iomem *)__va(phys_addr); } From ab9e18587f4cdb5f3fb3854c732f27a36f98e8f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= <dg@emlix.com> Date: Wed, 4 Mar 2009 19:42:27 +0100 Subject: [PATCH 24/25] x86, math-emu: fix init_fpu for task != current MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: fix math-emu related crash while using GDB/ptrace init_fpu() calls finit to initialize a task's xstate, while finit always works on the current task. If we use PTRACE_GETFPREGS on another process and both processes did not already use floating point, we get a null pointer exception in finit. This patch creates a new function finit_task that takes a task_struct parameter. finit becomes a wrapper that simply calls finit_task with current. On the plus side this avoids many calls to get_current which would each resolve to an inline assembler mov instruction. An empty finit_task has been added to i387.h to avoid linker errors in case the compiler still emits the call in init_fpu when CONFIG_MATH_EMULATION is not defined. The declaration of finit in i387.h has been removed as the remaining code using this function gets its prototype from fpu_proto.h. Signed-off-by: Daniel Glöckner <dg@emlix.com> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: "Pallipadi Venkatesh" <venkatesh.pallipadi@intel.com> Cc: Arjan van de Ven <arjan@infradead.org> Cc: Bill Metzenthen <billm@melbpc.org.au> LKML-Reference: <E1Lew31-0004il-Fg@mailer.emlix.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/i387.h | 8 +++++++- arch/x86/kernel/i387.c | 2 +- arch/x86/math-emu/fpu_aux.c | 31 ++++++++++++++++++++----------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 48f0004db8c9..71c9e5183982 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -172,7 +172,13 @@ static inline void __save_init_fpu(struct task_struct *tsk) #else /* CONFIG_X86_32 */ -extern void finit(void); +#ifdef CONFIG_MATH_EMULATION +extern void finit_task(struct task_struct *tsk); +#else +static inline void finit_task(struct task_struct *tsk) +{ +} +#endif static inline void tolerant_fwait(void) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index b0f61f0dcd0a..f2f8540a7f3d 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -136,7 +136,7 @@ int init_fpu(struct task_struct *tsk) #ifdef CONFIG_X86_32 if (!HAVE_HWFP) { memset(tsk->thread.xstate, 0, xstate_size); - finit(); + finit_task(tsk); set_stopped_child_used_math(tsk); return 0; } diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index 491e737ce547..aa0987088774 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -30,20 +30,29 @@ static void fclex(void) } /* Needs to be externally visible */ -void finit(void) +void finit_task(struct task_struct *tsk) { - control_word = 0x037f; - partial_status = 0; - top = 0; /* We don't keep top in the status word internally. */ - fpu_tag_word = 0xffff; + struct i387_soft_struct *soft = &tsk->thread.xstate->soft; + struct address *oaddr, *iaddr; + soft->cwd = 0x037f; + soft->swd = 0; + soft->ftop = 0; /* We don't keep top in the status word internally. */ + soft->twd = 0xffff; /* The behaviour is different from that detailed in Section 15.1.6 of the Intel manual */ - operand_address.offset = 0; - operand_address.selector = 0; - instruction_address.offset = 0; - instruction_address.selector = 0; - instruction_address.opcode = 0; - no_ip_update = 1; + oaddr = (struct address *)&soft->foo; + oaddr->offset = 0; + oaddr->selector = 0; + iaddr = (struct address *)&soft->fip; + iaddr->offset = 0; + iaddr->selector = 0; + iaddr->opcode = 0; + soft->no_update = 1; +} + +void finit(void) +{ + finit_task(current); } /* From dd4124a8a06bca89c077a16437edac010f0bb993 Mon Sep 17 00:00:00 2001 From: Leann Ogasawara <leann.ogasawara@canonical.com> Date: Wed, 4 Mar 2009 11:53:00 -0800 Subject: [PATCH 25/25] x86: add Dell XPS710 reboot quirk Dell XPS710 will hang on reboot. This is resolved by adding a quirk to set bios reboot. Signed-off-by: Leann Ogasawara <leann.ogasawara@canonical.com> Signed-off-by: Tim Gardner <tim.gardner@canonical.com> Cc: "manoj.iyer" <manoj.iyer@canonical.com> Cc: <stable@kernel.org> LKML-Reference: <1236196380.3231.89.camel@emiko> Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b46eb41643b..4526b3a75ed2 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -217,6 +217,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"), }, }, + { /* Handle problems with rebooting on Dell XPS710 */ + .callback = set_bios_reboot, + .ident = "Dell XPS710", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Dell XPS710"), + }, + }, { } };