mirror of
https://github.com/torvalds/linux.git
synced 2024-11-24 05:02:12 +00:00
45f81b1c96
On i386 (not x86_64) early implementations of gcc would have a bug
with asm goto causing it to produce code like the following:
(This was noticed by Peter Zijlstra)
56 pushl 0
67 nopl jmp 0x6f
popl
jmp 0x8c
6f mov
test
je 0x8c
8c mov
call *(%esp)
The jump added in the asm goto skipped over the popl that matched
the pushl 0, which lead up to a quick crash of the system when
the jump was enabled. The nopl is defined in the asm goto () statement
and when tracepoints are enabled, the nop changes to a jump to the label
that was specified by the asm goto. asm goto is suppose to tell gcc that
the code in the asm might jump to an external label. Here gcc obviously
fails to make that work.
The bug report for gcc is here:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226
The bug only appears on x86 when not compiled with
-maccumulate-outgoing-args. This option is always set on x86_64 and it
is also the work around for a function graph tracer i386 bug.
(See commit: 746357d6a5
)
This explains why the bug only showed up on i386 when function graph
tracer was not enabled.
This patch now adds a CONFIG_JUMP_LABEL option that is default
off instead of using jump labels by default. When jump labels are
enabled, the -maccumulate-outgoing-args will be used (causing a
slightly larger kernel image on i386). This option will exist
until we have a way to detect if the gcc compiler in use is safe
to use on all configurations without the work around.
Note, there exists such a test, but for now we will keep the enabling
of jump label as a manual option.
Archs that know the compiler is safe with asm goto, may choose to
select JUMP_LABEL and enable it by default.
Reported-by: Ingo Molnar <mingo@elte.hu>
Cause-discovered-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Baron <jbaron@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Miller <davem@davemloft.net>
Cc: Richard Henderson <rth@redhat.com>
LKML-Reference: <1288028746.3673.11.camel@laptop>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
72 lines
3.2 KiB
Plaintext
72 lines
3.2 KiB
Plaintext
# CPU tuning section - shared with UML.
|
|
# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
|
|
|
|
#-mtune exists since gcc 3.4
|
|
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
|
|
ifeq ($(HAS_MTUNE),y)
|
|
tune = $(call cc-option,-mtune=$(1),$(2))
|
|
else
|
|
tune = $(call cc-option,-mcpu=$(1),$(2))
|
|
endif
|
|
|
|
align := $(cc-option-align)
|
|
cflags-$(CONFIG_M386) += -march=i386
|
|
cflags-$(CONFIG_M486) += -march=i486
|
|
cflags-$(CONFIG_M586) += -march=i586
|
|
cflags-$(CONFIG_M586TSC) += -march=i586
|
|
cflags-$(CONFIG_M586MMX) += -march=pentium-mmx
|
|
cflags-$(CONFIG_M686) += -march=i686
|
|
cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2)
|
|
cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3)
|
|
cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3)
|
|
cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4)
|
|
cflags-$(CONFIG_MK6) += -march=k6
|
|
# Please note, that patches that add -march=athlon-xp and friends are pointless.
|
|
# They make zero difference whatsosever to performance at this time.
|
|
cflags-$(CONFIG_MK7) += -march=athlon
|
|
cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
|
|
cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
|
cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
|
cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
|
|
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
|
|
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
|
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
|
|
cflags-$(CONFIG_MVIAC7) += -march=i686
|
|
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
|
|
cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
|
|
$(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
|
|
|
|
# AMD Elan support
|
|
cflags-$(CONFIG_X86_ELAN) += -march=i486
|
|
|
|
# Geode GX1 support
|
|
cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
|
|
cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
|
|
# add at the end to overwrite eventual tuning options from earlier
|
|
# cpu entries
|
|
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
|
|
|
|
# Work around the pentium-mmx code generator madness of gcc4.4.x which
|
|
# does stack alignment by generating horrible code _before_ the mcount
|
|
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
|
|
# tracer assumptions. For i686, generic, core2 this is set by the
|
|
# compiler anyway
|
|
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
|
|
ADD_ACCUMULATE_OUTGOING_ARGS := y
|
|
endif
|
|
|
|
# Work around to a bug with asm goto with first implementations of it
|
|
# in gcc causing gcc to mess up the push and pop of the stack in some
|
|
# uses of asm goto.
|
|
ifeq ($(CONFIG_JUMP_LABEL), y)
|
|
ADD_ACCUMULATE_OUTGOING_ARGS := y
|
|
endif
|
|
|
|
cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)
|
|
|
|
# Bug fix for binutils: this option is required in order to keep
|
|
# binutils from generating NOPL instructions against our will.
|
|
ifneq ($(CONFIG_X86_P6_NOP),y)
|
|
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
|
|
endif
|