From cfb361f13c8136de78c406745abc4e4456e6d480 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 18 Sep 2008 15:49:14 +0800 Subject: [PATCH 01/41] [IA64] utrace syscall.h support for ia64 Add asm/syscall.h for IA64. Utrace requires this. Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/include/asm/ptrace.h | 6 +++ arch/ia64/include/asm/syscall.h | 92 +++++++++++++++++++++++++++++++++ arch/ia64/kernel/ptrace.c | 65 +++++++++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 arch/ia64/include/asm/syscall.h diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index 15f8dcfe6eee..14055c636adf 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -240,6 +240,12 @@ struct switch_stack { */ # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + /* FIXME: should this be bspstore + nr_dirty regs? */ + return regs->ar_bspstore; +} + #define regs_return_value(regs) ((regs)->r8) /* Conserve space in histogram by encoding slot bits in address diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h new file mode 100644 index 000000000000..3fd4fa6c48dd --- /dev/null +++ b/arch/ia64/include/asm/syscall.h @@ -0,0 +1,92 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Intel Corp. Shaohua Li + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + + if ((long)regs->cr_ifs < 0) /* Not a syscall */ + return -1; + return regs->r15; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + + return regs->r10 == -1 ? regs->r8:0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + + return regs->r8; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + + if (error) { + /* error < 0, but ia64 uses > 0 return value */ + regs->r8 = -error; + regs->r10 = -1; + } else { + regs->r8 = val; + regs->r10 = 0; + } +} + +extern void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw); +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + BUG_ON(i + n > 6); + + ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(IS_IA32_PROCESS(regs)); + BUG_ON(i + n > 6); + + ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); +} +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2a9943b5947f..12b1e9f0b7ae 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2199,3 +2199,68 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) #endif return &user_ia64_view; } + +struct syscall_get_set_args { + unsigned int i; + unsigned int n; + unsigned long *args; + struct pt_regs *regs; + int rw; +}; + +static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) +{ + struct syscall_get_set_args *args = data; + struct pt_regs *pt = args->regs; + unsigned long *krbs, cfm, ndirty; + int i, count; + + if (unw_unwind_to_user(info) < 0) + return; + + cfm = pt->cr_ifs; + krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + count = 0; + if (in_syscall(pt)) + count = min_t(int, args->n, cfm & 0x7f); + + for (i = 0; i < count; i++) { + if (args->rw) + *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = + args->args[i]; + else + args->args[i] = *ia64_rse_skip_regs(krbs, + ndirty + i + args->i); + } + + if (!args->rw) { + while (i < args->n) { + args->args[i] = 0; + i++; + } + } +} + +void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw) +{ + struct syscall_get_set_args data = { + .i = i, + .n = n, + .args = args, + .regs = regs, + .rw = rw, + }; + + if (task == current) + unw_init_running(syscall_get_set_args_cb, &data); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, task); + syscall_get_set_args_cb(&ufi, &data); + } +} From f14488ccfe0f41207e40520fab60dce356ed9e57 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 6 Oct 2008 10:43:06 -0700 Subject: [PATCH 02/41] [IA64] utrace use generic trace hook Make IA64 use generic trace hook in some paths. Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/ia32/ia32_entry.S | 5 +++ arch/ia64/include/asm/thread_info.h | 3 -- arch/ia64/kernel/entry.S | 5 +++ arch/ia64/kernel/perfmon.c | 7 ++--- arch/ia64/kernel/process.c | 21 ++++--------- arch/ia64/kernel/ptrace.c | 47 +++++++++-------------------- arch/ia64/kernel/signal.c | 8 +++++ 7 files changed, 41 insertions(+), 55 deletions(-) diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index ff88c48c5d19..b905dcb791ff 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -108,6 +108,11 @@ GLOBAL_ENTRY(ia32_trace_syscall) ;; st8 [r2]=r3 // initialize return code to -ENOSYS br.call.sptk.few rp=syscall_trace_enter // give parent a chance to catch syscall args + cmp.lt p6,p0=r8,r0 // check tracehook + adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8 + ;; +(p6) st8.spill [r2]=r8 // store return value in slot for r8 +(p6) br.spnt.few .ret4 .ret2: // Need to reload arguments (they may be changed by the tracing process) adds r2=IA64_PT_REGS_R1_OFFSET+16,sp // r2 = &pt_regs.r1 adds r3=IA64_PT_REGS_R13_OFFSET+16,sp // r3 = &pt_regs.r13 diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 7c60fcdd2efd..ae6922626bf4 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -87,9 +87,6 @@ struct thread_info { #define alloc_task_struct() ((struct task_struct *)__get_free_pages(GFP_KERNEL | __GFP_COMP, KERNEL_STACK_SIZE_ORDER)) #define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) -#define tsk_set_notify_resume(tsk) \ - set_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME) -extern void tsk_clear_notify_resume(struct task_struct *tsk); #endif /* !__ASSEMBLY */ /* diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0dd6c1419d8d..7ef0c594f5ed 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -534,6 +534,11 @@ GLOBAL_ENTRY(ia64_trace_syscall) stf.spill [r16]=f10 stf.spill [r17]=f11 br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args + cmp.lt p6,p0=r8,r0 // check tracehook + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> adds r16=PT(F6)+16,sp adds r17=PT(F7)+16,sp ;; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index fc8f3509df27..ada4605d1223 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -3684,7 +3685,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) PFM_SET_WORK_PENDING(task, 1); - tsk_set_notify_resume(task); + set_notify_resume(task); /* * XXX: send reschedule if task runs on another CPU @@ -5044,8 +5045,6 @@ pfm_handle_work(void) PFM_SET_WORK_PENDING(current, 0); - tsk_clear_notify_resume(current); - regs = task_pt_regs(current); /* @@ -5414,7 +5413,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str * when coming from ctxsw, current still points to the * previous task, therefore we must work with task and not current. */ - tsk_set_notify_resume(task); + set_notify_resume(task); } /* * defer until state is changed (shorten spin window). the context is locked diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3ab8373103ec..341a0319a5ba 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -160,21 +161,6 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } -void tsk_clear_notify_resume(struct task_struct *tsk) -{ -#ifdef CONFIG_PERFMON - if (tsk->thread.pfm_needs_checking) - return; -#endif - if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) - return; - clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); -} - -/* - * do_notify_resume_user(): - * Called from notify_resume_user at entry.S, with interrupts disabled. - */ void do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) { @@ -203,6 +189,11 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) ia64_do_signal(scr, in_syscall); } + if (test_thread_flag(TIF_NOTIFY_RESUME)) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(&scr->pt); + } + /* copy user rbs to kernel rbs */ if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { local_irq_enable(); /* force interrupt enable */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 12b1e9f0b7ae..92c9689b7d97 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -603,7 +604,7 @@ void ia64_ptrace_stop(void) { if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) return; - tsk_set_notify_resume(current); + set_notify_resume(current); unw_init_running(do_sync_rbs, ia64_sync_user_rbs); } @@ -613,7 +614,6 @@ void ia64_ptrace_stop(void) void ia64_sync_krbs(void) { clear_tsk_thread_flag(current, TIF_RESTORE_RSE); - tsk_clear_notify_resume(current); unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); } @@ -644,7 +644,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) spin_lock_irq(&child->sighand->siglock); if (child->state == TASK_STOPPED && !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { - tsk_set_notify_resume(child); + set_notify_resume(child); child->state = TASK_TRACED; stopped = 1; @@ -1232,37 +1232,16 @@ arch_ptrace (struct task_struct *child, long request, long addr, long data) } -static void -syscall_trace (void) -{ - /* - * The 0x80 provides a way for the tracing parent to - * distinguish between a syscall stop and SIGTRAP delivery. - */ - ptrace_notify(SIGTRAP - | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); - - /* - * This isn't the same as continuing with a signal, but it - * will do for normal use. strace only continues with a - * signal if the stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} - /* "asmlinkage" so the input arguments are preserved... */ -asmlinkage void +asmlinkage long syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (tracehook_report_syscall_entry(®s)) + return -ENOSYS; /* copy user rbs to kernel rbs */ if (test_thread_flag(TIF_RESTORE_RSE)) @@ -1283,6 +1262,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3); } + return 0; } /* "asmlinkage" so the input arguments are preserved... */ @@ -1292,6 +1272,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { + int step; + if (unlikely(current->audit_context)) { int success = AUDITSC_RESULT(regs.r10); long result = regs.r8; @@ -1301,10 +1283,9 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, audit_syscall_exit(success, result); } - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(®s, step); /* copy user rbs to kernel rbs */ if (test_thread_flag(TIF_RESTORE_RSE)) @@ -1940,7 +1921,7 @@ gpregs_writeback(struct task_struct *target, { if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) return 0; - tsk_set_notify_resume(target); + set_notify_resume(target); return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, NULL, NULL); } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 19c5a78636fc..e12500a9c443 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -439,6 +440,13 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(sig, info, ka, &scr->pt, + test_thread_flag(TIF_SINGLESTEP)); + return 1; } From 9690ad031290d86979b284bd6243313f58271bcc Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 1 Oct 2008 13:57:14 -0700 Subject: [PATCH 03/41] [IA64] utrace Enable trace hook config Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 48e496fe1e75..a36b014ee662 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -21,6 +21,7 @@ config IA64 select HAVE_KRETPROBES select HAVE_DMA_ATTRS select HAVE_KVM + select HAVE_ARCH_TRACEHOOK default y help The Itanium Processor Family is Intel's 64-bit successor to From 680973edf122fd95735ecfc077cf79645d2e5081 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 18 Sep 2008 15:50:26 +0800 Subject: [PATCH 04/41] [IA64] utrace Convert compat ptrace to use compat_sys_ptrace Convert IA64 32-bit ptrace to use compat_sys_ptrace. Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/ia32/sys_ia32.c | 83 +++--------------------------- arch/ia64/include/asm/ptrace.h | 2 + arch/ia64/include/asm/syscall.h | 89 +++++++++++++++++++++++++++++---- 4 files changed, 89 insertions(+), 87 deletions(-) diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index b905dcb791ff..a73ec0089d43 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -204,7 +204,7 @@ ia32_syscall_table: data8 sys_setuid /* 16-bit version */ data8 sys_getuid /* 16-bit version */ data8 compat_sys_stime /* 25 */ - data8 sys32_ptrace + data8 compat_sys_ptrace data8 sys32_alarm data8 sys_ni_syscall data8 sys32_pause diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index bf196cbb3796..5df5e4c90e4d 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1300,25 +1300,6 @@ sys32_waitpid (int pid, unsigned int *stat_addr, int options) return compat_sys_wait4(pid, stat_addr, options, NULL); } -static unsigned int -ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val) -{ - size_t copied; - unsigned int ret; - - copied = access_process_vm(child, addr, val, sizeof(*val), 0); - return (copied != sizeof(ret)) ? -EIO : 0; -} - -static unsigned int -ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val) -{ - - if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val)) - return -EIO; - return 0; -} - /* * The order in which registers are stored in the ptrace regs structure */ @@ -1616,49 +1597,15 @@ restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __u return 0; } -asmlinkage long -sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - struct task_struct *child; - unsigned int value, tmp; + unsigned long addr = caddr; + unsigned long data = cdata; + unsigned int tmp; long i, ret; - lock_kernel(); - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); - goto out; - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) { - ret = PTR_ERR(child); - goto out; - } - - if (request == PTRACE_ATTACH) { - ret = sys_ptrace(request, pid, addr, data); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: /* read word at location addr */ - ret = ia32_peek(child, addr, &value); - if (ret == 0) - ret = put_user(value, (unsigned int __user *) compat_ptr(data)); - else - ret = -EIO; - goto out_tsk; - - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: /* write the word at location addr */ - ret = ia32_poke(child, addr, data); - goto out_tsk; - case PTRACE_PEEKUSR: /* read word at addr in USER area */ ret = -EIO; if ((addr & 3) || addr > 17*sizeof(int)) @@ -1723,27 +1670,9 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data) compat_ptr(data)); break; - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data)); - break; - - case PTRACE_SYSCALL: /* continue, stop after next syscall */ - case PTRACE_CONT: /* restart after signal. */ - case PTRACE_KILL: - case PTRACE_SINGLESTEP: /* execute chile for one instruction */ - case PTRACE_DETACH: /* detach a process */ - ret = sys_ptrace(request, pid, addr, data); - break; - default: - ret = ptrace_request(child, request, addr, data); - break; - + return compat_ptrace_request(child, request, caddr, cdata); } - out_tsk: - put_task_struct(child); - out: - unlock_kernel(); return ret; } diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h index 14055c636adf..6417c1ecb44e 100644 --- a/arch/ia64/include/asm/ptrace.h +++ b/arch/ia64/include/asm/ptrace.h @@ -325,6 +325,8 @@ static inline unsigned long user_stack_pointer(struct pt_regs *regs) #define arch_has_block_step() (1) extern void user_enable_block_step(struct task_struct *); +#define __ARCH_WANT_COMPAT_SYS_PTRACE + #endif /* !__KERNEL__ */ /* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 3fd4fa6c48dd..2f758a42f94b 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -19,24 +19,35 @@ static inline long syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { - BUG_ON(IS_IA32_PROCESS(regs)); - if ((long)regs->cr_ifs < 0) /* Not a syscall */ return -1; + +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) + return regs->r1; +#endif + return regs->r15; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { - BUG_ON(IS_IA32_PROCESS(regs)); +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) + regs->r8 = regs->r1; +#endif + /* do nothing */ } static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - BUG_ON(IS_IA32_PROCESS(regs)); +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) + return regs->r8; +#endif return regs->r10 == -1 ? regs->r8:0; } @@ -44,8 +55,6 @@ static inline long syscall_get_error(struct task_struct *task, static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { - BUG_ON(IS_IA32_PROCESS(regs)); - return regs->r8; } @@ -53,7 +62,12 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - BUG_ON(IS_IA32_PROCESS(regs)); +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) { + regs->r8 = (long) error ? error : val; + return; + } +#endif if (error) { /* error < 0, but ia64 uses > 0 return value */ @@ -73,9 +87,39 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(IS_IA32_PROCESS(regs)); BUG_ON(i + n > 6); +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) { + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->r13; + case 5: + if (!n--) break; + *args++ = regs->r15; + case 4: + if (!n--) break; + *args++ = regs->r14; + case 3: + if (!n--) break; + *args++ = regs->r10; + case 2: + if (!n--) break; + *args++ = regs->r9; + case 1: + if (!n--) break; + *args++ = regs->r11; + case 0: + if (!n--) break; + default: + BUG(); + break; + } + + return; + } +#endif ia64_syscall_get_set_arguments(task, regs, i, n, args, 0); } @@ -84,9 +128,36 @@ static inline void syscall_set_arguments(struct task_struct *task, unsigned int i, unsigned int n, unsigned long *args) { - BUG_ON(IS_IA32_PROCESS(regs)); BUG_ON(i + n > 6); +#ifdef CONFIG_IA32_SUPPORT + if (IS_IA32_PROCESS(regs)) { + switch (i + n) { + case 6: + if (!n--) break; + regs->r13 = *args++; + case 5: + if (!n--) break; + regs->r15 = *args++; + case 4: + if (!n--) break; + regs->r14 = *args++; + case 3: + if (!n--) break; + regs->r10 = *args++; + case 2: + if (!n--) break; + regs->r9 = *args++; + case 1: + if (!n--) break; + regs->r11 = *args++; + case 0: + if (!n--) break; + } + + return; + } +#endif ia64_syscall_get_set_arguments(task, regs, i, n, args, 1); } #endif /* _ASM_SYSCALL_H */ From 749da7912e1270abbbaef04112a7a78febf3f0f4 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:41 +0900 Subject: [PATCH 05/41] ia64/pv_ops: fix paravirtualization of ivt.S with CONFIG_SMP=n When CONFIG_SMP=n, three instruction in ivt.S were missed to paravirtualize. paravirtualize them. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 416a952b19bd..f675d8e33853 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -580,7 +580,7 @@ ENTRY(dirty_bit) mov b0=r29 // restore b0 ;; st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE + ITC_D(p0, r18, r16) // install updated PTE #endif mov pr=r31,-1 // restore pr RFI @@ -646,7 +646,7 @@ ENTRY(iaccess_bit) mov b0=r29 // restore b0 ;; st8 [r17]=r18 // store back updated PTE - itc.i r18 // install updated PTE + ITC_I(p0, r18, r16) // install updated PTE #endif /* !CONFIG_SMP */ mov pr=r31,-1 RFI @@ -698,7 +698,7 @@ ENTRY(daccess_bit) or r18=_PAGE_A,r18 // set the accessed bit ;; st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE + ITC_D(p0, r18, r16) // install updated PTE #endif mov b0=r29 // restore b0 mov pr=r31,-1 From ce1fc742f9703eeda0787b449ac57a780585bc97 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:42 +0900 Subject: [PATCH 06/41] ia64/pv_ops: avoid name conflict of get_irq_chip(). The macro get_irq_chip() is defined in linux/include/linux/irq.h which cause name conflict with one in linux/arch/ia64/include/asm/paravirt.h. rename the latter to __get_irq_chip(). Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/paravirt.h | 4 ++-- arch/ia64/kernel/paravirt.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/include/asm/paravirt.h b/arch/ia64/include/asm/paravirt.h index 660cab044834..2bf3636473fe 100644 --- a/arch/ia64/include/asm/paravirt.h +++ b/arch/ia64/include/asm/paravirt.h @@ -117,7 +117,7 @@ static inline void paravirt_post_smp_prepare_boot_cpu(void) struct pv_iosapic_ops { void (*pcat_compat_init)(void); - struct irq_chip *(*get_irq_chip)(unsigned long trigger); + struct irq_chip *(*__get_irq_chip)(unsigned long trigger); unsigned int (*__read)(char __iomem *iosapic, unsigned int reg); void (*__write)(char __iomem *iosapic, unsigned int reg, u32 val); @@ -135,7 +135,7 @@ iosapic_pcat_compat_init(void) static inline struct irq_chip* iosapic_get_irq_chip(unsigned long trigger) { - return pv_iosapic_ops.get_irq_chip(trigger); + return pv_iosapic_ops.__get_irq_chip(trigger); } static inline unsigned int diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index afaf5b9a2cf0..de35d8e8b7d2 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -332,7 +332,7 @@ ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) struct pv_iosapic_ops pv_iosapic_ops = { .pcat_compat_init = ia64_native_iosapic_pcat_compat_init, - .get_irq_chip = ia64_native_iosapic_get_irq_chip, + .__get_irq_chip = ia64_native_iosapic_get_irq_chip, .__read = ia64_native_iosapic_read, .__write = ia64_native_iosapic_write, From 41a6ba09839cc5b114ea2c2c064b056e35cf93f1 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:43 +0900 Subject: [PATCH 07/41] ia64/pv_ops: update native/inst.h to clobber predicate. add CLOBBER_PRED() to clobber predicate register. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/native/inst.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h index c8efbf7b849e..0a1026cca4fa 100644 --- a/arch/ia64/include/asm/native/inst.h +++ b/arch/ia64/include/asm/native/inst.h @@ -36,8 +36,13 @@ ;; \ movl clob = PARAVIRT_POISON; \ ;; +# define CLOBBER_PRED(pred_clob) \ + ;; \ + cmp.eq pred_clob, p0 = r0, r0 \ + ;; #else -# define CLOBBER(clob) /* nothing */ +# define CLOBBER(clob) /* nothing */ +# define CLOBBER_PRED(pred_clob) /* nothing */ #endif #define MOV_FROM_IFA(reg) \ @@ -136,7 +141,8 @@ #define SSM_PSR_I(pred, pred_clob, clob) \ (pred) ssm psr.i \ - CLOBBER(clob) + CLOBBER(clob) \ + CLOBBER_PRED(pred_clob) #define RSM_PSR_I(pred, clob0, clob1) \ (pred) rsm psr.i \ From 1b4a18fcfadcc51987682fc266bb88ed59d12935 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:44 +0900 Subject: [PATCH 08/41] ia64: move function declaration, ia64_cpu_local_tick() from .c to .h eliminate the function declaration ia64_cpu_local_tick() in process.c by defining in arch/ia64/include/asm/timex.h The same function will be used in a different .c file later. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/timex.h | 2 ++ arch/ia64/kernel/process.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h index 05a6baf8a472..4e03cfe74a0c 100644 --- a/arch/ia64/include/asm/timex.h +++ b/arch/ia64/include/asm/timex.h @@ -39,4 +39,6 @@ get_cycles (void) return ret; } +extern void ia64_cpu_local_tick (void); + #endif /* _ASM_IA64_TIMEX_H */ diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3ab8373103ec..8de0f460c88d 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -251,7 +251,6 @@ default_idle (void) /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { - extern void ia64_cpu_local_tick (void); unsigned int this_cpu = smp_processor_id(); /* Ack it */ From da0ac27213d6f8f7d6ca918031572eaf7f14dca3 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:45 +0900 Subject: [PATCH 09/41] ia64/xen: reserve "break" numbers used for xen hypercalls. reserve "break" numbers used for xen hypercalls to avoid reuse for something else. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/break.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/ia64/include/asm/break.h b/arch/ia64/include/asm/break.h index f03402039896..e90c40ec9edf 100644 --- a/arch/ia64/include/asm/break.h +++ b/arch/ia64/include/asm/break.h @@ -20,4 +20,13 @@ */ #define __IA64_BREAK_SYSCALL 0x100000 +/* + * Xen specific break numbers: + */ +#define __IA64_XEN_HYPERCALL 0x1000 +/* [__IA64_XEN_HYPERPRIVOP_START, __IA64_XEN_HYPERPRIVOP_MAX] is used + for xen hyperprivops */ +#define __IA64_XEN_HYPERPRIVOP_START 0x1 +#define __IA64_XEN_HYPERPRIVOP_MAX 0x1a + #endif /* _ASM_IA64_BREAK_H */ From 1b051c6be34b3b06fa1244d9fcd47f91fb6aca1c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:46 +0900 Subject: [PATCH 10/41] ia64/xen: introduce sync bitops which is necessary for ia64/xen support. define sync bitops which is necessary for ia64/xen. This bit operation is used to communicate with VMM or other guest kernel Even when this kernel is built for UP, VMM might be SMP so that those operation must always use atomic operation. Cc: Robin Holt Cc: Jeremy Fitzhardinge Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/sync_bitops.h | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 arch/ia64/include/asm/sync_bitops.h diff --git a/arch/ia64/include/asm/sync_bitops.h b/arch/ia64/include/asm/sync_bitops.h new file mode 100644 index 000000000000..593c12eeb270 --- /dev/null +++ b/arch/ia64/include/asm/sync_bitops.h @@ -0,0 +1,51 @@ +#ifndef _ASM_IA64_SYNC_BITOPS_H +#define _ASM_IA64_SYNC_BITOPS_H + +/* + * Copyright (C) 2008 Isaku Yamahata + * + * Based on synch_bitops.h which Dan Magenhaimer wrote. + * + * bit operations which provide guaranteed strong synchronisation + * when communicating with Xen or other guest OSes running on other CPUs. + */ + +static inline void sync_set_bit(int nr, volatile void *addr) +{ + set_bit(nr, addr); +} + +static inline void sync_clear_bit(int nr, volatile void *addr) +{ + clear_bit(nr, addr); +} + +static inline void sync_change_bit(int nr, volatile void *addr) +{ + change_bit(nr, addr); +} + +static inline int sync_test_and_set_bit(int nr, volatile void *addr) +{ + return test_and_set_bit(nr, addr); +} + +static inline int sync_test_and_clear_bit(int nr, volatile void *addr) +{ + return test_and_clear_bit(nr, addr); +} + +static inline int sync_test_and_change_bit(int nr, volatile void *addr) +{ + return test_and_change_bit(nr, addr); +} + +static inline int sync_test_bit(int nr, const volatile void *addr) +{ + return test_bit(nr, addr); +} + +#define sync_cmpxchg(ptr, old, new) \ + ((__typeof__(*(ptr)))cmpxchg_acq((ptr), (old), (new))) + +#endif /* _ASM_IA64_SYNC_BITOPS_H */ From 627308c90775399ea15f890723926d144874d782 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:47 +0900 Subject: [PATCH 11/41] ia64/xen: increase IA64_MAX_RSVD_REGIONS. Xenlinux/ia64 needs to reserve one more region passed from xen hypervisor as start info. Cc: Robin Holt Cc: Bjorn Helgaas Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/meminit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h index 7245a5781594..6bc96ee54327 100644 --- a/arch/ia64/include/asm/meminit.h +++ b/arch/ia64/include/asm/meminit.h @@ -18,10 +18,11 @@ * - crash dumping code reserved region * - Kernel memory map built from EFI memory map * - ELF core header + * - xen start info if CONFIG_XEN * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 8 +#define IA64_MAX_RSVD_REGIONS 9 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ From ffcc99e992aef3bb5ab4b177b6bd84954ac2e327 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:48 +0900 Subject: [PATCH 12/41] ia64/xen: introduce definitions necessary for ia64/xen hypercalls. import arch/ia64/include/asm/xen/interface.h to introduce definitions necessary for ia64/xen hypercalls. They are basic structures to communicate with xen hypervisor and will be used later. Cc: Robin Holt Cc: Jeremy Fitzhardinge Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/interface.h | 346 ++++++++++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 arch/ia64/include/asm/xen/interface.h diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h new file mode 100644 index 000000000000..f00fab40854d --- /dev/null +++ b/arch/ia64/include/asm/xen/interface.h @@ -0,0 +1,346 @@ +/****************************************************************************** + * arch-ia64/hypervisor-if.h + * + * Guest OS interface to IA64 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright by those who contributed. (in alphabetical order) + * + * Anthony Xu + * Eddie Dong + * Fred Yang + * Kevin Tian + * Alex Williamson + * Chris Wright + * Christian Limpach + * Dietmar Hahn + * Hollis Blanchard + * Isaku Yamahata + * Jan Beulich + * John Levon + * Kazuhiro Suzuki + * Keir Fraser + * Kouya Shimura + * Masaki Kanno + * Matt Chapman + * Matthew Chapman + * Samuel Thibault + * Tomonari Horikoshi + * Tristan Gingold + * Tsunehisa Doi + * Yutaka Ezaki + * Zhang Xin + * Zhang xiantao + * dan.magenheimer@hp.com + * ian.pratt@cl.cam.ac.uk + * michael.fetterman@cl.cam.ac.uk + */ + +#ifndef _ASM_IA64_XEN_INTERFACE_H +#define _ASM_IA64_XEN_INTERFACE_H + +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name +#define GUEST_HANDLE_64(name) GUEST_HANDLE(name) +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +__DEFINE_GUEST_HANDLE(ulong, unsigned long); +__DEFINE_GUEST_HANDLE(u64, unsigned long); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(long); +DEFINE_GUEST_HANDLE(void); + +typedef unsigned long xen_pfn_t; +DEFINE_GUEST_HANDLE(xen_pfn_t); +#define PRI_xen_pfn "lx" +#endif + +/* Arch specific VIRQs definition */ +#define VIRQ_ITC VIRQ_ARCH_0 /* V. Virtual itc timer */ +#define VIRQ_MCA_CMC VIRQ_ARCH_1 /* MCA cmc interrupt */ +#define VIRQ_MCA_CPE VIRQ_ARCH_2 /* MCA cpe interrupt */ + +/* Maximum number of virtual CPUs in multi-processor guests. */ +/* keep sizeof(struct shared_page) <= PAGE_SIZE. + * this is checked in arch/ia64/xen/hypervisor.c. */ +#define MAX_VIRT_CPUS 64 + +#ifndef __ASSEMBLY__ + +#define INVALID_MFN (~0UL) + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; + +struct mapped_regs { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + union { + unsigned long vgr[16]; + unsigned long bank1_regs[16]; /* bank1 regs (r16-r31) + when bank0 active */ + }; + union { + unsigned long vbgr[16]; + unsigned long bank0_regs[16]; /* bank0 regs (r16-r31) + when bank1 active */ + }; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; /* CR0 */ + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; /* CR8 */ + unsigned long rsv2[7]; + unsigned long ipsr; /* CR16 */ + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; /* CR24 */ + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; /* CR64 */ + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; /* CR72 */ + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; /* CR80 */ + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + union { + unsigned long reserved5[128]; + struct { + unsigned long precover_ifs; + unsigned long unat; /* not sure if this is needed + until NaT arch is done */ + int interrupt_collection_enabled; /* virtual psr.ic */ + + /* virtual interrupt deliverable flag is + * evtchn_upcall_mask in shared info area now. + * interrupt_mask_addr is the address + * of evtchn_upcall_mask for current vcpu + */ + unsigned char *interrupt_mask_addr; + int pending_interruption; + unsigned char vpsr_pp; + unsigned char vpsr_dfh; + unsigned char hpsr_dfh; + unsigned char hpsr_mfh; + unsigned long reserved5_1[4]; + int metaphysical_mode; /* 1 = use metaphys mapping + 0 = use virtual */ + int banknum; /* 0 or 1, which virtual + register bank is active */ + unsigned long rrs[8]; /* region registers */ + unsigned long krs[8]; /* kernel registers */ + unsigned long tmp[16]; /* temp registers + (e.g. for hyperprivops) */ + }; + }; +}; + +struct arch_vcpu_info { + /* nothing */ +}; + +/* + * This structure is used for magic page in domain pseudo physical address + * space and the result of XENMEM_machine_memory_map. + * As the XENMEM_machine_memory_map result, + * xen_memory_map::nr_entries indicates the size in bytes + * including struct xen_ia64_memmap_info. Not the number of entries. + */ +struct xen_ia64_memmap_info { + uint64_t efi_memmap_size; /* size of EFI memory map */ + uint64_t efi_memdesc_size; /* size of an EFI memory map + * descriptor */ + uint32_t efi_memdesc_version; /* memory descriptor version */ + void *memdesc[0]; /* array of efi_memory_desc_t */ +}; + +struct arch_shared_info { + /* PFN of the start_info page. */ + unsigned long start_info_pfn; + + /* Interrupt vector for event channel. */ + int evtchn_vector; + + /* PFN of memmap_info page */ + unsigned int memmap_info_num_pages; /* currently only = 1 case is + supported. */ + unsigned long memmap_info_pfn; + + uint64_t pad[31]; +}; + +struct xen_callback { + unsigned long ip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +/* Size of the shared_info area (this is not related to page size). */ +#define XSI_SHIFT 14 +#define XSI_SIZE (1 << XSI_SHIFT) +/* Log size of mapped_regs area (64 KB - only 4KB is used). */ +#define XMAPPEDREGS_SHIFT 12 +#define XMAPPEDREGS_SIZE (1 << XMAPPEDREGS_SHIFT) +/* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ +#define XMAPPEDREGS_OFS XSI_SIZE + +/* Hyperprivops. */ +#define HYPERPRIVOP_START 0x1 +#define HYPERPRIVOP_RFI (HYPERPRIVOP_START + 0x0) +#define HYPERPRIVOP_RSM_DT (HYPERPRIVOP_START + 0x1) +#define HYPERPRIVOP_SSM_DT (HYPERPRIVOP_START + 0x2) +#define HYPERPRIVOP_COVER (HYPERPRIVOP_START + 0x3) +#define HYPERPRIVOP_ITC_D (HYPERPRIVOP_START + 0x4) +#define HYPERPRIVOP_ITC_I (HYPERPRIVOP_START + 0x5) +#define HYPERPRIVOP_SSM_I (HYPERPRIVOP_START + 0x6) +#define HYPERPRIVOP_GET_IVR (HYPERPRIVOP_START + 0x7) +#define HYPERPRIVOP_GET_TPR (HYPERPRIVOP_START + 0x8) +#define HYPERPRIVOP_SET_TPR (HYPERPRIVOP_START + 0x9) +#define HYPERPRIVOP_EOI (HYPERPRIVOP_START + 0xa) +#define HYPERPRIVOP_SET_ITM (HYPERPRIVOP_START + 0xb) +#define HYPERPRIVOP_THASH (HYPERPRIVOP_START + 0xc) +#define HYPERPRIVOP_PTC_GA (HYPERPRIVOP_START + 0xd) +#define HYPERPRIVOP_ITR_D (HYPERPRIVOP_START + 0xe) +#define HYPERPRIVOP_GET_RR (HYPERPRIVOP_START + 0xf) +#define HYPERPRIVOP_SET_RR (HYPERPRIVOP_START + 0x10) +#define HYPERPRIVOP_SET_KR (HYPERPRIVOP_START + 0x11) +#define HYPERPRIVOP_FC (HYPERPRIVOP_START + 0x12) +#define HYPERPRIVOP_GET_CPUID (HYPERPRIVOP_START + 0x13) +#define HYPERPRIVOP_GET_PMD (HYPERPRIVOP_START + 0x14) +#define HYPERPRIVOP_GET_EFLAG (HYPERPRIVOP_START + 0x15) +#define HYPERPRIVOP_SET_EFLAG (HYPERPRIVOP_START + 0x16) +#define HYPERPRIVOP_RSM_BE (HYPERPRIVOP_START + 0x17) +#define HYPERPRIVOP_GET_PSR (HYPERPRIVOP_START + 0x18) +#define HYPERPRIVOP_SET_RR0_TO_RR4 (HYPERPRIVOP_START + 0x19) +#define HYPERPRIVOP_MAX (0x1a) + +/* Fast and light hypercalls. */ +#define __HYPERVISOR_ia64_fast_eoi __HYPERVISOR_arch_1 + +/* Xencomm macros. */ +#define XENCOMM_INLINE_MASK 0xf800000000000000UL +#define XENCOMM_INLINE_FLAG 0x8000000000000000UL + +#ifndef __ASSEMBLY__ + +/* + * Optimization features. + * The hypervisor may do some special optimizations for guests. This hypercall + * can be used to switch on/of these special optimizations. + */ +#define __HYPERVISOR_opt_feature 0x700UL + +#define XEN_IA64_OPTF_OFF 0x0 +#define XEN_IA64_OPTF_ON 0x1 + +/* + * If this feature is switched on, the hypervisor inserts the + * tlb entries without calling the guests traphandler. + * This is useful in guests using region 7 for identity mapping + * like the linux kernel does. + */ +#define XEN_IA64_OPTF_IDENT_MAP_REG7 1 + +/* Identity mapping of region 4 addresses in HVM. */ +#define XEN_IA64_OPTF_IDENT_MAP_REG4 2 + +/* Identity mapping of region 5 addresses in HVM. */ +#define XEN_IA64_OPTF_IDENT_MAP_REG5 3 + +#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET (0) + +struct xen_ia64_opt_feature { + unsigned long cmd; /* Which feature */ + unsigned char on; /* Switch feature on/off */ + union { + struct { + /* The page protection bit mask of the pte. + * This will be or'ed with the pte. */ + unsigned long pgprot; + unsigned long key; /* A protection key for itir.*/ + }; + }; +}; + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_XEN_INTERFACE_H */ From b31c09bd82731600a72c83894e7c6a53b36b6c83 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:49 +0900 Subject: [PATCH 13/41] ia64/xen: define several constants for ia64/xen. define several constants for ia64/xen. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/asm-offsets.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 94c44b1ccfd0..eaa988baa877 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -16,6 +16,8 @@ #include #include +#include + #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" @@ -286,4 +288,29 @@ void foo(void) offsetof (struct itc_jitter_data_t, itc_jitter)); DEFINE(IA64_ITC_LASTCYCLE_OFFSET, offsetof (struct itc_jitter_data_t, itc_lastcycle)); + +#ifdef CONFIG_XEN + BLANK(); + +#define DEFINE_MAPPED_REG_OFS(sym, field) \ + DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field))) + + DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); + DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); + DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); + DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); + DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); + DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); + DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); + DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); + DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); + DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); + DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); + DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); + DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); + DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); + DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); +#endif /* CONFIG_XEN */ } From 25c7bfaef201ab8cef400ea96e27803e1d32697a Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:50 +0900 Subject: [PATCH 14/41] ia64/xen: add a necessary header file to compile include/xen/interface/xen.h Create include/asm-ia64/pvclock-abi.h to compile which contains the same definitions of include/asm-x86/pvclock-abi.h because ia64/xen uses same structure. Hopefully include/asm-x86/pvclock-abi.h would be moved to somewhere more generic. Another approach is to include include/asm-x86/pvclock-abi.h from include/asm-ia64/pvclock-abi.h. But this would break if/when x86 header files are moved under arch/x86. So for now, same definitions are duplicated as suggested by Tony. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/pvclock-abi.h | 48 +++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 arch/ia64/include/asm/pvclock-abi.h diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h new file mode 100644 index 000000000000..44ef9ef8f5b3 --- /dev/null +++ b/arch/ia64/include/asm/pvclock-abi.h @@ -0,0 +1,48 @@ +/* + * same structure to x86's + * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic. + * For now, define same duplicated definitions. + */ + +#ifndef _ASM_IA64__PVCLOCK_ABI_H +#define _ASM_IA64__PVCLOCK_ABI_H +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_IA64__PVCLOCK_ABI_H */ From 67fe8d27a8b0845d6a5237af340fe5039335bf99 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:51 +0900 Subject: [PATCH 15/41] ia64/xen: define helper functions for xen related address conversion. Xen needs some address conversions between pseudo physical address (guest phsyical address), guest machine address (real machine address) and dma address. Define helper functions for those address conversion. Cc: Jeremy Fitzhardinge Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/page.h | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 arch/ia64/include/asm/xen/page.h diff --git a/arch/ia64/include/asm/xen/page.h b/arch/ia64/include/asm/xen/page.h new file mode 100644 index 000000000000..03441a780b5b --- /dev/null +++ b/arch/ia64/include/asm/xen/page.h @@ -0,0 +1,65 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/page.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _ASM_IA64_XEN_PAGE_H +#define _ASM_IA64_XEN_PAGE_H + +#define INVALID_P2M_ENTRY (~0UL) + +static inline unsigned long mfn_to_pfn(unsigned long mfn) +{ + return mfn; +} + +static inline unsigned long pfn_to_mfn(unsigned long pfn) +{ + return pfn; +} + +#define phys_to_machine_mapping_valid(_x) (1) + +static inline void *mfn_to_virt(unsigned long mfn) +{ + return __va(mfn << PAGE_SHIFT); +} + +static inline unsigned long virt_to_mfn(void *virt) +{ + return __pa(virt) >> PAGE_SHIFT; +} + +/* for tpmfront.c */ +static inline unsigned long virt_to_machine(void *virt) +{ + return __pa(virt); +} + +static inline void set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ + /* nothing */ +} + +#define pte_mfn(_x) pte_pfn(_x) +#define mfn_pte(_x, _y) __pte_ma(0) /* unmodified use */ +#define __pte_ma(_x) ((pte_t) {(_x)}) /* unmodified use */ + +#endif /* _ASM_IA64_XEN_PAGE_H */ From ed50bd6096dab96e4c501d600776b75687dd2cf0 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:52 +0900 Subject: [PATCH 16/41] ia64/xen: define helper functions for xen hypercalls. introduce helper functions for xen hypercalls which traps to hypervisor. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/hypercall.h | 265 ++++++++++++++++++++++++++ arch/ia64/include/asm/xen/privop.h | 129 +++++++++++++ arch/ia64/xen/Makefile | 5 + arch/ia64/xen/hypercall.S | 91 +++++++++ 4 files changed, 490 insertions(+) create mode 100644 arch/ia64/include/asm/xen/hypercall.h create mode 100644 arch/ia64/include/asm/xen/privop.h create mode 100644 arch/ia64/xen/Makefile create mode 100644 arch/ia64/xen/hypercall.S diff --git a/arch/ia64/include/asm/xen/hypercall.h b/arch/ia64/include/asm/xen/hypercall.h new file mode 100644 index 000000000000..96fc62366aa4 --- /dev/null +++ b/arch/ia64/include/asm/xen/hypercall.h @@ -0,0 +1,265 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_IA64_XEN_HYPERCALL_H +#define _ASM_IA64_XEN_HYPERCALL_H + +#include +#include +#include +#include +struct xencomm_handle; +extern unsigned long __hypercall(unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, + unsigned long a5, unsigned long cmd); + +/* + * Assembler stubs for hyper-calls. + */ + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + __res = __hypercall(0, 0, 0, 0, 0, __HYPERVISOR_##name);\ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res; \ + __res = __hypercall((unsigned long)a1, \ + 0, 0, 0, 0, __HYPERVISOR_##name); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + 0, 0, 0, __HYPERVISOR_##name); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + 0, 0, __HYPERVISOR_##name); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + (unsigned long)a4, \ + 0, __HYPERVISOR_##name); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res; \ + __res = __hypercall((unsigned long)a1, \ + (unsigned long)a2, \ + (unsigned long)a3, \ + (unsigned long)a4, \ + (unsigned long)a5, \ + __HYPERVISOR_##name); \ + (type)__res; \ +}) + + +static inline int +xencomm_arch_hypercall_sched_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, sched_op_new, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op(u64 timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout >> 32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} + +static inline int +xencomm_arch_hypercall_multicall(struct xencomm_handle *call_list, + int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +xencomm_arch_hypercall_memory_op(unsigned int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_event_channel_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, event_channel_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_xen_version(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_console_io(int cmd, int count, + struct xencomm_handle *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +xencomm_arch_hypercall_physdev_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, physdev_op, cmd, arg); +} + +static inline int +xencomm_arch_hypercall_grant_table_op(unsigned int cmd, + struct xencomm_handle *uop, + unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count); + +extern int xencomm_arch_hypercall_suspend(struct xencomm_handle *arg); + +static inline int +xencomm_arch_hypercall_callback_op(int cmd, struct xencomm_handle *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline long +xencomm_arch_hypercall_vcpu_op(int cmd, int cpu, void *arg) +{ + return _hypercall3(long, vcpu_op, cmd, cpu, arg); +} + +static inline int +HYPERVISOR_physdev_op(int cmd, void *arg) +{ + switch (cmd) { + case PHYSDEVOP_eoi: + return _hypercall1(int, ia64_fast_eoi, + ((struct physdev_eoi *)arg)->irq); + default: + return xencomm_hypercall_physdev_op(cmd, arg); + } +} + +static inline long +xencomm_arch_hypercall_opt_feature(struct xencomm_handle *arg) +{ + return _hypercall1(long, opt_feature, arg); +} + +/* for balloon driver */ +#define HYPERVISOR_update_va_mapping(va, new_val, flags) (0) + +/* Use xencomm to do hypercalls. */ +#define HYPERVISOR_sched_op xencomm_hypercall_sched_op +#define HYPERVISOR_event_channel_op xencomm_hypercall_event_channel_op +#define HYPERVISOR_callback_op xencomm_hypercall_callback_op +#define HYPERVISOR_multicall xencomm_hypercall_multicall +#define HYPERVISOR_xen_version xencomm_hypercall_xen_version +#define HYPERVISOR_console_io xencomm_hypercall_console_io +#define HYPERVISOR_memory_op xencomm_hypercall_memory_op +#define HYPERVISOR_suspend xencomm_hypercall_suspend +#define HYPERVISOR_vcpu_op xencomm_hypercall_vcpu_op +#define HYPERVISOR_opt_feature xencomm_hypercall_opt_feature + +/* to compile gnttab_copy_grant_page() in drivers/xen/core/gnttab.c */ +#define HYPERVISOR_mmu_update(req, count, success_count, domid) ({ BUG(); 0; }) + +static inline int +HYPERVISOR_shutdown( + unsigned int reason) +{ + struct sched_shutdown sched_shutdown = { + .reason = reason + }; + + int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); + + return rc; +} + +/* for netfront.c, netback.c */ +#define MULTI_UVMFLAGS_INDEX 0 /* XXX any value */ + +static inline void +MULTI_update_va_mapping( + struct multicall_entry *mcl, unsigned long va, + pte_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->result = 0; +} + +static inline void +MULTI_grant_table_op(struct multicall_entry *mcl, unsigned int cmd, + void *uop, unsigned int count) +{ + mcl->op = __HYPERVISOR_grant_table_op; + mcl->args[0] = cmd; + mcl->args[1] = (unsigned long)uop; + mcl->args[2] = count; +} + +static inline void +MULTI_mmu_update(struct multicall_entry *mcl, struct mmu_update *req, + int count, int *success_count, domid_t domid) +{ + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)req; + mcl->args[1] = count; + mcl->args[2] = (unsigned long)success_count; + mcl->args[3] = domid; +} + +#endif /* _ASM_IA64_XEN_HYPERCALL_H */ diff --git a/arch/ia64/include/asm/xen/privop.h b/arch/ia64/include/asm/xen/privop.h new file mode 100644 index 000000000000..71ec7546e100 --- /dev/null +++ b/arch/ia64/include/asm/xen/privop.h @@ -0,0 +1,129 @@ +#ifndef _ASM_IA64_XEN_PRIVOP_H +#define _ASM_IA64_XEN_PRIVOP_H + +/* + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * + * Paravirtualizations of privileged operations for Xen/ia64 + * + * + * inline privop and paravirt_alt support + * Copyright (c) 2007 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + */ + +#ifndef __ASSEMBLY__ +#include /* arch-ia64.h requires uint64_t */ +#endif +#include + +/* At 1 MB, before per-cpu space but still addressable using addl instead + of movl. */ +#define XSI_BASE 0xfffffffffff00000 + +/* Address of mapped regs. */ +#define XMAPPEDREGS_BASE (XSI_BASE + XSI_SIZE) + +#ifdef __ASSEMBLY__ +#define XEN_HYPER_RFI break HYPERPRIVOP_RFI +#define XEN_HYPER_RSM_PSR_DT break HYPERPRIVOP_RSM_DT +#define XEN_HYPER_SSM_PSR_DT break HYPERPRIVOP_SSM_DT +#define XEN_HYPER_COVER break HYPERPRIVOP_COVER +#define XEN_HYPER_ITC_D break HYPERPRIVOP_ITC_D +#define XEN_HYPER_ITC_I break HYPERPRIVOP_ITC_I +#define XEN_HYPER_SSM_I break HYPERPRIVOP_SSM_I +#define XEN_HYPER_GET_IVR break HYPERPRIVOP_GET_IVR +#define XEN_HYPER_THASH break HYPERPRIVOP_THASH +#define XEN_HYPER_ITR_D break HYPERPRIVOP_ITR_D +#define XEN_HYPER_SET_KR break HYPERPRIVOP_SET_KR +#define XEN_HYPER_GET_PSR break HYPERPRIVOP_GET_PSR +#define XEN_HYPER_SET_RR0_TO_RR4 break HYPERPRIVOP_SET_RR0_TO_RR4 + +#define XSI_IFS (XSI_BASE + XSI_IFS_OFS) +#define XSI_PRECOVER_IFS (XSI_BASE + XSI_PRECOVER_IFS_OFS) +#define XSI_IFA (XSI_BASE + XSI_IFA_OFS) +#define XSI_ISR (XSI_BASE + XSI_ISR_OFS) +#define XSI_IIM (XSI_BASE + XSI_IIM_OFS) +#define XSI_ITIR (XSI_BASE + XSI_ITIR_OFS) +#define XSI_PSR_I_ADDR (XSI_BASE + XSI_PSR_I_ADDR_OFS) +#define XSI_PSR_IC (XSI_BASE + XSI_PSR_IC_OFS) +#define XSI_IPSR (XSI_BASE + XSI_IPSR_OFS) +#define XSI_IIP (XSI_BASE + XSI_IIP_OFS) +#define XSI_B1NAT (XSI_BASE + XSI_B1NATS_OFS) +#define XSI_BANK1_R16 (XSI_BASE + XSI_BANK1_R16_OFS) +#define XSI_BANKNUM (XSI_BASE + XSI_BANKNUM_OFS) +#define XSI_IHA (XSI_BASE + XSI_IHA_OFS) +#endif + +#ifndef __ASSEMBLY__ + +/************************************************/ +/* Instructions paravirtualized for correctness */ +/************************************************/ + +/* "fc" and "thash" are privilege-sensitive instructions, meaning they + * may have different semantics depending on whether they are executed + * at PL0 vs PL!=0. When paravirtualized, these instructions mustn't + * be allowed to execute directly, lest incorrect semantics result. */ +extern void xen_fc(unsigned long addr); +extern unsigned long xen_thash(unsigned long addr); + +/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" + * is not currently used (though it may be in a long-format VHPT system!) + * and the semantics of cover only change if psr.ic is off which is very + * rare (and currently non-existent outside of assembly code */ + +/* There are also privilege-sensitive registers. These registers are + * readable at any privilege level but only writable at PL0. */ +extern unsigned long xen_get_cpuid(int index); +extern unsigned long xen_get_pmd(int index); + +extern unsigned long xen_get_eflag(void); /* see xen_ia64_getreg */ +extern void xen_set_eflag(unsigned long); /* see xen_ia64_setreg */ + +/************************************************/ +/* Instructions paravirtualized for performance */ +/************************************************/ + +/* Xen uses memory-mapped virtual privileged registers for access to many + * performance-sensitive privileged registers. Some, like the processor + * status register (psr), are broken up into multiple memory locations. + * Others, like "pend", are abstractions based on privileged registers. + * "Pend" is guaranteed to be set if reading cr.ivr would return a + * (non-spurious) interrupt. */ +#define XEN_MAPPEDREGS ((struct mapped_regs *)XMAPPEDREGS_BASE) + +#define XSI_PSR_I \ + (*XEN_MAPPEDREGS->interrupt_mask_addr) +#define xen_get_virtual_psr_i() \ + (!XSI_PSR_I) +#define xen_set_virtual_psr_i(_val) \ + ({ XSI_PSR_I = (uint8_t)(_val) ? 0 : 1; }) +#define xen_set_virtual_psr_ic(_val) \ + ({ XEN_MAPPEDREGS->interrupt_collection_enabled = _val ? 1 : 0; }) +#define xen_get_virtual_pend() \ + (*(((uint8_t *)XEN_MAPPEDREGS->interrupt_mask_addr) - 1)) + +/* Although all privileged operations can be left to trap and will + * be properly handled by Xen, some are frequent enough that we use + * hyperprivops for performance. */ +extern unsigned long xen_get_psr(void); +extern unsigned long xen_get_ivr(void); +extern unsigned long xen_get_tpr(void); +extern void xen_hyper_ssm_i(void); +extern void xen_set_itm(unsigned long); +extern void xen_set_tpr(unsigned long); +extern void xen_eoi(unsigned long); +extern unsigned long xen_get_rr(unsigned long index); +extern void xen_set_rr(unsigned long index, unsigned long val); +extern void xen_set_rr0_to_rr4(unsigned long val0, unsigned long val1, + unsigned long val2, unsigned long val3, + unsigned long val4); +extern void xen_set_kr(unsigned long index, unsigned long val); +extern void xen_ptcga(unsigned long addr, unsigned long size); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_XEN_PRIVOP_H */ diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile new file mode 100644 index 000000000000..c200704e2333 --- /dev/null +++ b/arch/ia64/xen/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Xen components +# + +obj-y := hypercall.o diff --git a/arch/ia64/xen/hypercall.S b/arch/ia64/xen/hypercall.S new file mode 100644 index 000000000000..d4ff0b9e79f1 --- /dev/null +++ b/arch/ia64/xen/hypercall.S @@ -0,0 +1,91 @@ +/* + * Support routines for Xen hypercalls + * + * Copyright (C) 2005 Dan Magenheimer + * Copyright (C) 2008 Yaozu (Eddie) Dong + */ + +#include +#include +#include + +/* + * Hypercalls without parameter. + */ +#define __HCALL0(name,hcall) \ + GLOBAL_ENTRY(name); \ + break hcall; \ + br.ret.sptk.many rp; \ + END(name) + +/* + * Hypercalls with 1 parameter. + */ +#define __HCALL1(name,hcall) \ + GLOBAL_ENTRY(name); \ + mov r8=r32; \ + break hcall; \ + br.ret.sptk.many rp; \ + END(name) + +/* + * Hypercalls with 2 parameters. + */ +#define __HCALL2(name,hcall) \ + GLOBAL_ENTRY(name); \ + mov r8=r32; \ + mov r9=r33; \ + break hcall; \ + br.ret.sptk.many rp; \ + END(name) + +__HCALL0(xen_get_psr, HYPERPRIVOP_GET_PSR) +__HCALL0(xen_get_ivr, HYPERPRIVOP_GET_IVR) +__HCALL0(xen_get_tpr, HYPERPRIVOP_GET_TPR) +__HCALL0(xen_hyper_ssm_i, HYPERPRIVOP_SSM_I) + +__HCALL1(xen_set_tpr, HYPERPRIVOP_SET_TPR) +__HCALL1(xen_eoi, HYPERPRIVOP_EOI) +__HCALL1(xen_thash, HYPERPRIVOP_THASH) +__HCALL1(xen_set_itm, HYPERPRIVOP_SET_ITM) +__HCALL1(xen_get_rr, HYPERPRIVOP_GET_RR) +__HCALL1(xen_fc, HYPERPRIVOP_FC) +__HCALL1(xen_get_cpuid, HYPERPRIVOP_GET_CPUID) +__HCALL1(xen_get_pmd, HYPERPRIVOP_GET_PMD) + +__HCALL2(xen_ptcga, HYPERPRIVOP_PTC_GA) +__HCALL2(xen_set_rr, HYPERPRIVOP_SET_RR) +__HCALL2(xen_set_kr, HYPERPRIVOP_SET_KR) + +#ifdef CONFIG_IA32_SUPPORT +__HCALL1(xen_get_eflag, HYPERPRIVOP_GET_EFLAG) +__HCALL1(xen_set_eflag, HYPERPRIVOP_SET_EFLAG) // refer SDM vol1 3.1.8 +#endif /* CONFIG_IA32_SUPPORT */ + +GLOBAL_ENTRY(xen_set_rr0_to_rr4) + mov r8=r32 + mov r9=r33 + mov r10=r34 + mov r11=r35 + mov r14=r36 + XEN_HYPER_SET_RR0_TO_RR4 + br.ret.sptk.many rp + ;; +END(xen_set_rr0_to_rr4) + +GLOBAL_ENTRY(xen_send_ipi) + mov r14=r32 + mov r15=r33 + mov r2=0x400 + break 0x1000 + ;; + br.ret.sptk.many rp + ;; +END(xen_send_ipi) + +GLOBAL_ENTRY(__hypercall) + mov r2=r37 + break 0x1000 + br.ret.sptk.many b0 + ;; +END(__hypercall) From 11d437789d0f35fa2e2ebcb4a983b29587bdfdc5 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:53 +0900 Subject: [PATCH 17/41] ia64/xen: implement the arch specific part of xencomm. On ia64/xen, pointer argument for the hypercall is passed by pseudo physical address (guest physical address.) So it is necessary to convert virtual address into pseudo physical address right before issuing hypercall. The frame work is called xencomm. This patch implements arch specific part. Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Cc: Akio Takebe Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/xencomm.h | 41 +++++++++++++ arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/xencomm.c | 94 +++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/include/asm/xen/xencomm.h create mode 100644 arch/ia64/xen/xencomm.c diff --git a/arch/ia64/include/asm/xen/xencomm.h b/arch/ia64/include/asm/xen/xencomm.h new file mode 100644 index 000000000000..28732cd931cc --- /dev/null +++ b/arch/ia64/include/asm/xen/xencomm.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2006 Hollis Blanchard , IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_IA64_XEN_XENCOMM_H +#define _ASM_IA64_XEN_XENCOMM_H + +#include +#include + +/* Must be called before any hypercall. */ +extern void xencomm_initialize(void); + +/* Check if virtual contiguity means physical contiguity + * where the passed address is a pointer value in virtual address. + * On ia64, identity mapping area in region 7 or the piece of region 5 + * that is mapped by itr[IA64_TR_KERNEL]/dtr[IA64_TR_KERNEL] + */ +static inline int xencomm_is_phys_contiguous(unsigned long addr) +{ + return (PAGE_OFFSET <= addr && + addr < (PAGE_OFFSET + (1UL << IA64_MAX_PHYS_BITS))) || + (KERNEL_START <= addr && + addr < KERNEL_START + KERNEL_TR_PAGE_SIZE); +} + +#endif /* _ASM_IA64_XEN_XENCOMM_H */ diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index c200704e2333..ad0c9f7ddffa 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,4 +2,4 @@ # Makefile for Xen components # -obj-y := hypercall.o +obj-y := hypercall.o xencomm.o diff --git a/arch/ia64/xen/xencomm.c b/arch/ia64/xen/xencomm.c new file mode 100644 index 000000000000..3dc307f0a40d --- /dev/null +++ b/arch/ia64/xen/xencomm.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2006 Hollis Blanchard , IBM Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +static unsigned long kernel_virtual_offset; + +void +xencomm_initialize(void) +{ + kernel_virtual_offset = KERNEL_START - ia64_tpa(KERNEL_START); +} + +/* Translate virtual address to physical address. */ +unsigned long +xencomm_vtop(unsigned long vaddr) +{ + struct page *page; + struct vm_area_struct *vma; + + if (vaddr == 0) + return 0UL; + + if (REGION_NUMBER(vaddr) == 5) { + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep; + + /* On ia64, TASK_SIZE refers to current. It is not initialized + during boot. + Furthermore the kernel is relocatable and __pa() doesn't + work on addresses. */ + if (vaddr >= KERNEL_START + && vaddr < (KERNEL_START + KERNEL_TR_PAGE_SIZE)) + return vaddr - kernel_virtual_offset; + + /* In kernel area -- virtually mapped. */ + pgd = pgd_offset_k(vaddr); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return ~0UL; + + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud) || pud_bad(*pud)) + return ~0UL; + + pmd = pmd_offset(pud, vaddr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return ~0UL; + + ptep = pte_offset_kernel(pmd, vaddr); + if (!ptep) + return ~0UL; + + return (pte_val(*ptep) & _PFN_MASK) | (vaddr & ~PAGE_MASK); + } + + if (vaddr > TASK_SIZE) { + /* percpu variables */ + if (REGION_NUMBER(vaddr) == 7 && + REGION_OFFSET(vaddr) >= (1ULL << IA64_MAX_PHYS_BITS)) + ia64_tpa(vaddr); + + /* kernel address */ + return __pa(vaddr); + } + + /* XXX double-check (lack of) locking */ + vma = find_extend_vma(current->mm, vaddr); + if (!vma) + return ~0UL; + + /* We assume the page is modified. */ + page = follow_page(vma, vaddr, FOLL_WRITE | FOLL_TOUCH); + if (!page) + return ~0UL; + + return (page_to_pfn(page) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK); +} From f021c8b334cc00739b5d43b5be5f97a34b1ad16a Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:54 +0900 Subject: [PATCH 18/41] ia64/xen: xencomm conversion functions for hypercalls On ia64/xen, pointer arguments for hypercall is passed by pseudo physical address(guest physical address.) So such hypercalls needs address conversion functions. This patch implements concrete conversion functions for such hypercalls. Signed-off-by: Akio Takebe Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/xcom_hcall.h | 51 +++ arch/ia64/include/asm/xen/xencomm.h | 1 + arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/xcom_hcall.c | 441 +++++++++++++++++++++++++ arch/ia64/xen/xencomm.c | 11 + 5 files changed, 505 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/include/asm/xen/xcom_hcall.h create mode 100644 arch/ia64/xen/xcom_hcall.c diff --git a/arch/ia64/include/asm/xen/xcom_hcall.h b/arch/ia64/include/asm/xen/xcom_hcall.h new file mode 100644 index 000000000000..20b2950c71b6 --- /dev/null +++ b/arch/ia64/include/asm/xen/xcom_hcall.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2006 Tristan Gingold , Bull SAS + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _ASM_IA64_XEN_XCOM_HCALL_H +#define _ASM_IA64_XEN_XCOM_HCALL_H + +/* These function creates inline or mini descriptor for the parameters and + calls the corresponding xencomm_arch_hypercall_X. + Architectures should defines HYPERVISOR_xxx as xencomm_hypercall_xxx unless + they want to use their own wrapper. */ +extern int xencomm_hypercall_console_io(int cmd, int count, char *str); + +extern int xencomm_hypercall_event_channel_op(int cmd, void *op); + +extern int xencomm_hypercall_xen_version(int cmd, void *arg); + +extern int xencomm_hypercall_physdev_op(int cmd, void *op); + +extern int xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, + unsigned int count); + +extern int xencomm_hypercall_sched_op(int cmd, void *arg); + +extern int xencomm_hypercall_multicall(void *call_list, int nr_calls); + +extern int xencomm_hypercall_callback_op(int cmd, void *arg); + +extern int xencomm_hypercall_memory_op(unsigned int cmd, void *arg); + +extern int xencomm_hypercall_suspend(unsigned long srec); + +extern long xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg); + +extern long xencomm_hypercall_opt_feature(void *arg); + +#endif /* _ASM_IA64_XEN_XCOM_HCALL_H */ diff --git a/arch/ia64/include/asm/xen/xencomm.h b/arch/ia64/include/asm/xen/xencomm.h index 28732cd931cc..cded677bebf2 100644 --- a/arch/ia64/include/asm/xen/xencomm.h +++ b/arch/ia64/include/asm/xen/xencomm.h @@ -24,6 +24,7 @@ /* Must be called before any hypercall. */ extern void xencomm_initialize(void); +extern int xencomm_is_initialized(void); /* Check if virtual contiguity means physical contiguity * where the passed address is a pointer value in virtual address. diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index ad0c9f7ddffa..ae0882233d59 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,4 +2,4 @@ # Makefile for Xen components # -obj-y := hypercall.o xencomm.o +obj-y := hypercall.o xencomm.o xcom_hcall.o diff --git a/arch/ia64/xen/xcom_hcall.c b/arch/ia64/xen/xcom_hcall.c new file mode 100644 index 000000000000..ccaf7431f7c8 --- /dev/null +++ b/arch/ia64/xen/xcom_hcall.c @@ -0,0 +1,441 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Tristan Gingold + * + * Copyright (c) 2007 + * Isaku Yamahata + * VA Linux Systems Japan K.K. + * consolidate mini and inline version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Xencomm notes: + * This file defines hypercalls to be used by xencomm. The hypercalls simply + * create inlines or mini descriptors for pointers and then call the raw arch + * hypercall xencomm_arch_hypercall_XXX + * + * If the arch wants to directly use these hypercalls, simply define macros + * in asm/xen/hypercall.h, eg: + * #define HYPERVISOR_sched_op xencomm_hypercall_sched_op + * + * The arch may also define HYPERVISOR_xxx as a function and do more operations + * before/after doing the hypercall. + * + * Note: because only inline or mini descriptors are created these functions + * must only be called with in kernel memory parameters. + */ + +int +xencomm_hypercall_console_io(int cmd, int count, char *str) +{ + /* xen early printk uses console io hypercall before + * xencomm initialization. In that case, we just ignore it. + */ + if (!xencomm_is_initialized()) + return 0; + + return xencomm_arch_hypercall_console_io + (cmd, count, xencomm_map_no_alloc(str, count)); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_console_io); + +int +xencomm_hypercall_event_channel_op(int cmd, void *op) +{ + struct xencomm_handle *desc; + desc = xencomm_map_no_alloc(op, sizeof(struct evtchn_op)); + if (desc == NULL) + return -EINVAL; + + return xencomm_arch_hypercall_event_channel_op(cmd, desc); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_event_channel_op); + +int +xencomm_hypercall_xen_version(int cmd, void *arg) +{ + struct xencomm_handle *desc; + unsigned int argsize; + + switch (cmd) { + case XENVER_version: + /* do not actually pass an argument */ + return xencomm_arch_hypercall_xen_version(cmd, 0); + case XENVER_extraversion: + argsize = sizeof(struct xen_extraversion); + break; + case XENVER_compile_info: + argsize = sizeof(struct xen_compile_info); + break; + case XENVER_capabilities: + argsize = sizeof(struct xen_capabilities_info); + break; + case XENVER_changeset: + argsize = sizeof(struct xen_changeset_info); + break; + case XENVER_platform_parameters: + argsize = sizeof(struct xen_platform_parameters); + break; + case XENVER_get_features: + argsize = (arg == NULL) ? 0 : sizeof(struct xen_feature_info); + break; + + default: + printk(KERN_DEBUG + "%s: unknown version op %d\n", __func__, cmd); + return -ENOSYS; + } + + desc = xencomm_map_no_alloc(arg, argsize); + if (desc == NULL) + return -EINVAL; + + return xencomm_arch_hypercall_xen_version(cmd, desc); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_xen_version); + +int +xencomm_hypercall_physdev_op(int cmd, void *op) +{ + unsigned int argsize; + + switch (cmd) { + case PHYSDEVOP_apic_read: + case PHYSDEVOP_apic_write: + argsize = sizeof(struct physdev_apic); + break; + case PHYSDEVOP_alloc_irq_vector: + case PHYSDEVOP_free_irq_vector: + argsize = sizeof(struct physdev_irq); + break; + case PHYSDEVOP_irq_status_query: + argsize = sizeof(struct physdev_irq_status_query); + break; + + default: + printk(KERN_DEBUG + "%s: unknown physdev op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_physdev_op + (cmd, xencomm_map_no_alloc(op, argsize)); +} + +static int +xencommize_grant_table_op(struct xencomm_mini **xc_area, + unsigned int cmd, void *op, unsigned int count, + struct xencomm_handle **desc) +{ + struct xencomm_handle *desc1; + unsigned int argsize; + + switch (cmd) { + case GNTTABOP_map_grant_ref: + argsize = sizeof(struct gnttab_map_grant_ref); + break; + case GNTTABOP_unmap_grant_ref: + argsize = sizeof(struct gnttab_unmap_grant_ref); + break; + case GNTTABOP_setup_table: + { + struct gnttab_setup_table *setup = op; + + argsize = sizeof(*setup); + + if (count != 1) + return -EINVAL; + desc1 = __xencomm_map_no_alloc + (xen_guest_handle(setup->frame_list), + setup->nr_frames * + sizeof(*xen_guest_handle(setup->frame_list)), + *xc_area); + if (desc1 == NULL) + return -EINVAL; + (*xc_area)++; + set_xen_guest_handle(setup->frame_list, (void *)desc1); + break; + } + case GNTTABOP_dump_table: + argsize = sizeof(struct gnttab_dump_table); + break; + case GNTTABOP_transfer: + argsize = sizeof(struct gnttab_transfer); + break; + case GNTTABOP_copy: + argsize = sizeof(struct gnttab_copy); + break; + case GNTTABOP_query_size: + argsize = sizeof(struct gnttab_query_size); + break; + default: + printk(KERN_DEBUG "%s: unknown hypercall grant table op %d\n", + __func__, cmd); + BUG(); + } + + *desc = __xencomm_map_no_alloc(op, count * argsize, *xc_area); + if (*desc == NULL) + return -EINVAL; + (*xc_area)++; + + return 0; +} + +int +xencomm_hypercall_grant_table_op(unsigned int cmd, void *op, + unsigned int count) +{ + int rc; + struct xencomm_handle *desc; + XENCOMM_MINI_ALIGNED(xc_area, 2); + + rc = xencommize_grant_table_op(&xc_area, cmd, op, count, &desc); + if (rc) + return rc; + + return xencomm_arch_hypercall_grant_table_op(cmd, desc, count); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_grant_table_op); + +int +xencomm_hypercall_sched_op(int cmd, void *arg) +{ + struct xencomm_handle *desc; + unsigned int argsize; + + switch (cmd) { + case SCHEDOP_yield: + case SCHEDOP_block: + argsize = 0; + break; + case SCHEDOP_shutdown: + argsize = sizeof(struct sched_shutdown); + break; + case SCHEDOP_poll: + { + struct sched_poll *poll = arg; + struct xencomm_handle *ports; + + argsize = sizeof(struct sched_poll); + ports = xencomm_map_no_alloc(xen_guest_handle(poll->ports), + sizeof(*xen_guest_handle(poll->ports))); + + set_xen_guest_handle(poll->ports, (void *)ports); + break; + } + default: + printk(KERN_DEBUG "%s: unknown sched op %d\n", __func__, cmd); + return -ENOSYS; + } + + desc = xencomm_map_no_alloc(arg, argsize); + if (desc == NULL) + return -EINVAL; + + return xencomm_arch_hypercall_sched_op(cmd, desc); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_sched_op); + +int +xencomm_hypercall_multicall(void *call_list, int nr_calls) +{ + int rc; + int i; + struct multicall_entry *mce; + struct xencomm_handle *desc; + XENCOMM_MINI_ALIGNED(xc_area, nr_calls * 2); + + for (i = 0; i < nr_calls; i++) { + mce = (struct multicall_entry *)call_list + i; + + switch (mce->op) { + case __HYPERVISOR_update_va_mapping: + case __HYPERVISOR_mmu_update: + /* No-op on ia64. */ + break; + case __HYPERVISOR_grant_table_op: + rc = xencommize_grant_table_op + (&xc_area, + mce->args[0], (void *)mce->args[1], + mce->args[2], &desc); + if (rc) + return rc; + mce->args[1] = (unsigned long)desc; + break; + case __HYPERVISOR_memory_op: + default: + printk(KERN_DEBUG + "%s: unhandled multicall op entry op %lu\n", + __func__, mce->op); + return -ENOSYS; + } + } + + desc = xencomm_map_no_alloc(call_list, + nr_calls * sizeof(struct multicall_entry)); + if (desc == NULL) + return -EINVAL; + + return xencomm_arch_hypercall_multicall(desc, nr_calls); +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_multicall); + +int +xencomm_hypercall_callback_op(int cmd, void *arg) +{ + unsigned int argsize; + switch (cmd) { + case CALLBACKOP_register: + argsize = sizeof(struct callback_register); + break; + case CALLBACKOP_unregister: + argsize = sizeof(struct callback_unregister); + break; + default: + printk(KERN_DEBUG + "%s: unknown callback op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_callback_op + (cmd, xencomm_map_no_alloc(arg, argsize)); +} + +static int +xencommize_memory_reservation(struct xencomm_mini *xc_area, + struct xen_memory_reservation *mop) +{ + struct xencomm_handle *desc; + + desc = __xencomm_map_no_alloc(xen_guest_handle(mop->extent_start), + mop->nr_extents * + sizeof(*xen_guest_handle(mop->extent_start)), + xc_area); + if (desc == NULL) + return -EINVAL; + + set_xen_guest_handle(mop->extent_start, (void *)desc); + return 0; +} + +int +xencomm_hypercall_memory_op(unsigned int cmd, void *arg) +{ + GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} }; + struct xen_memory_reservation *xmr = NULL; + int rc; + struct xencomm_handle *desc; + unsigned int argsize; + XENCOMM_MINI_ALIGNED(xc_area, 2); + + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + xmr = (struct xen_memory_reservation *)arg; + set_xen_guest_handle(extent_start_va[0], + xen_guest_handle(xmr->extent_start)); + + argsize = sizeof(*xmr); + rc = xencommize_memory_reservation(xc_area, xmr); + if (rc) + return rc; + xc_area++; + break; + + case XENMEM_maximum_ram_page: + argsize = 0; + break; + + case XENMEM_add_to_physmap: + argsize = sizeof(struct xen_add_to_physmap); + break; + + default: + printk(KERN_DEBUG "%s: unknown memory op %d\n", __func__, cmd); + return -ENOSYS; + } + + desc = xencomm_map_no_alloc(arg, argsize); + if (desc == NULL) + return -EINVAL; + + rc = xencomm_arch_hypercall_memory_op(cmd, desc); + + switch (cmd) { + case XENMEM_increase_reservation: + case XENMEM_decrease_reservation: + case XENMEM_populate_physmap: + set_xen_guest_handle(xmr->extent_start, + xen_guest_handle(extent_start_va[0])); + break; + } + + return rc; +} +EXPORT_SYMBOL_GPL(xencomm_hypercall_memory_op); + +int +xencomm_hypercall_suspend(unsigned long srec) +{ + struct sched_shutdown arg; + + arg.reason = SHUTDOWN_suspend; + + return xencomm_arch_hypercall_sched_op( + SCHEDOP_shutdown, xencomm_map_no_alloc(&arg, sizeof(arg))); +} + +long +xencomm_hypercall_vcpu_op(int cmd, int cpu, void *arg) +{ + unsigned int argsize; + switch (cmd) { + case VCPUOP_register_runstate_memory_area: { + struct vcpu_register_runstate_memory_area *area = + (struct vcpu_register_runstate_memory_area *)arg; + argsize = sizeof(*arg); + set_xen_guest_handle(area->addr.h, + (void *)xencomm_map_no_alloc(area->addr.v, + sizeof(area->addr.v))); + break; + } + + default: + printk(KERN_DEBUG "%s: unknown vcpu op %d\n", __func__, cmd); + return -ENOSYS; + } + + return xencomm_arch_hypercall_vcpu_op(cmd, cpu, + xencomm_map_no_alloc(arg, argsize)); +} + +long +xencomm_hypercall_opt_feature(void *arg) +{ + return xencomm_arch_hypercall_opt_feature( + xencomm_map_no_alloc(arg, + sizeof(struct xen_ia64_opt_feature))); +} diff --git a/arch/ia64/xen/xencomm.c b/arch/ia64/xen/xencomm.c index 3dc307f0a40d..1f5d7ac82e97 100644 --- a/arch/ia64/xen/xencomm.c +++ b/arch/ia64/xen/xencomm.c @@ -19,11 +19,22 @@ #include static unsigned long kernel_virtual_offset; +static int is_xencomm_initialized; + +/* for xen early printk. It uses console io hypercall which uses xencomm. + * However early printk may use it before xencomm initialization. + */ +int +xencomm_is_initialized(void) +{ + return is_xencomm_initialized; +} void xencomm_initialize(void) { kernel_virtual_offset = KERNEL_START - ia64_tpa(KERNEL_START); + is_xencomm_initialized = 1; } /* Translate virtual address to physical address. */ From 7ad863132c05b6a746accf12734720abe34c3535 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:55 +0900 Subject: [PATCH 19/41] ia64/xen: implement arch specific part of xen grant table. Xen implements grant tables which is for sharing pages with guest domains. This patch implements arch specific part of grant table initialization. and xen_alloc_vm_area()/xen_free_vm_area() which are helper functions for xen grant table. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/grant_table.h | 29 +++++ arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/grant-table.c | 155 ++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/include/asm/xen/grant_table.h create mode 100644 arch/ia64/xen/grant-table.c diff --git a/arch/ia64/include/asm/xen/grant_table.h b/arch/ia64/include/asm/xen/grant_table.h new file mode 100644 index 000000000000..2b1fae0e2d11 --- /dev/null +++ b/arch/ia64/include/asm/xen/grant_table.h @@ -0,0 +1,29 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/grant_table.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _ASM_IA64_XEN_GRANT_TABLE_H +#define _ASM_IA64_XEN_GRANT_TABLE_H + +struct vm_struct *xen_alloc_vm_area(unsigned long size); +void xen_free_vm_area(struct vm_struct *area); + +#endif /* _ASM_IA64_XEN_GRANT_TABLE_H */ diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index ae0882233d59..eb595637a8ca 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,4 +2,4 @@ # Makefile for Xen components # -obj-y := hypercall.o xencomm.o xcom_hcall.o +obj-y := hypercall.o xencomm.o xcom_hcall.o grant-table.o diff --git a/arch/ia64/xen/grant-table.c b/arch/ia64/xen/grant-table.c new file mode 100644 index 000000000000..777dd9a9108b --- /dev/null +++ b/arch/ia64/xen/grant-table.c @@ -0,0 +1,155 @@ +/****************************************************************************** + * arch/ia64/xen/grant-table.c + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include + +#include +#include +#include + +#include + +struct vm_struct *xen_alloc_vm_area(unsigned long size) +{ + int order; + unsigned long virt; + unsigned long nr_pages; + struct vm_struct *area; + + order = get_order(size); + virt = __get_free_pages(GFP_KERNEL, order); + if (virt == 0) + goto err0; + nr_pages = 1 << order; + scrub_pages(virt, nr_pages); + + area = kmalloc(sizeof(*area), GFP_KERNEL); + if (area == NULL) + goto err1; + + area->flags = VM_IOREMAP; + area->addr = (void *)virt; + area->size = size; + area->pages = NULL; + area->nr_pages = nr_pages; + area->phys_addr = 0; /* xenbus_map_ring_valloc uses this field! */ + + return area; + +err1: + free_pages(virt, order); +err0: + return NULL; +} +EXPORT_SYMBOL_GPL(xen_alloc_vm_area); + +void xen_free_vm_area(struct vm_struct *area) +{ + unsigned int order = get_order(area->size); + unsigned long i; + unsigned long phys_addr = __pa(area->addr); + + /* This area is used for foreign page mappping. + * So underlying machine page may not be assigned. */ + for (i = 0; i < (1 << order); i++) { + unsigned long ret; + unsigned long gpfn = (phys_addr >> PAGE_SHIFT) + i; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &gpfn); + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, + &reservation); + BUG_ON(ret != 1); + } + free_pages((unsigned long)area->addr, order); + kfree(area); +} +EXPORT_SYMBOL_GPL(xen_free_vm_area); + + +/**************************************************************************** + * grant table hack + * cmd: GNTTABOP_xxx + */ + +int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes, + unsigned long max_nr_gframes, + struct grant_entry **__shared) +{ + *__shared = __va(frames[0] << PAGE_SHIFT); + return 0; +} + +void arch_gnttab_unmap_shared(struct grant_entry *shared, + unsigned long nr_gframes) +{ + /* nothing */ +} + +static void +gnttab_map_grant_ref_pre(struct gnttab_map_grant_ref *uop) +{ + uint32_t flags; + + flags = uop->flags; + + if (flags & GNTMAP_host_map) { + if (flags & GNTMAP_application_map) { + printk(KERN_DEBUG + "GNTMAP_application_map is not supported yet: " + "flags 0x%x\n", flags); + BUG(); + } + if (flags & GNTMAP_contains_pte) { + printk(KERN_DEBUG + "GNTMAP_contains_pte is not supported yet: " + "flags 0x%x\n", flags); + BUG(); + } + } else if (flags & GNTMAP_device_map) { + printk("GNTMAP_device_map is not supported yet 0x%x\n", flags); + BUG(); /* not yet. actually this flag is not used. */ + } else { + BUG(); + } +} + +int +HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count) +{ + if (cmd == GNTTABOP_map_grant_ref) { + unsigned int i; + for (i = 0; i < count; i++) { + gnttab_map_grant_ref_pre( + (struct gnttab_map_grant_ref *)uop + i); + } + } + return xencomm_hypercall_grant_table_op(cmd, uop, count); +} + +EXPORT_SYMBOL(HYPERVISOR_grant_table_op); From 8353b00f617c9bb432c6bcdf33a4f9c416dc5751 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:56 +0900 Subject: [PATCH 20/41] ia64/xen: add definitions necessary for xen event channel. Xen paravirtualizes interrupt as event channel. This patch defines arch specific part of xen event channel. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/events.h | 50 ++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 arch/ia64/include/asm/xen/events.h diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h new file mode 100644 index 000000000000..73248781fba8 --- /dev/null +++ b/arch/ia64/include/asm/xen/events.h @@ -0,0 +1,50 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/events.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _ASM_IA64_XEN_EVENTS_H +#define _ASM_IA64_XEN_EVENTS_H + +enum ipi_vector { + XEN_RESCHEDULE_VECTOR, + XEN_IPI_VECTOR, + XEN_CMCP_VECTOR, + XEN_CPEP_VECTOR, + + XEN_NR_IPIS, +}; + +static inline int xen_irqs_disabled(struct pt_regs *regs) +{ + return !(ia64_psr(regs)->i); +} + +static inline void xen_do_IRQ(int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs; + old_regs = set_irq_regs(regs); + irq_enter(); + __do_IRQ(irq); + irq_exit(); + set_irq_regs(old_regs); +} +#define irq_ctx_init(cpu) do { } while (0) + +#endif /* _ASM_IA64_XEN_EVENTS_H */ From c7fdaf338679f5e0343bfdfa7ae1e2fdb41ff0b1 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:57 +0900 Subject: [PATCH 21/41] ia64/xen: introduce helper function to identify domain mode. There are four operating modes Xen code may find itself running in: - native - hvm domain - pv dom0 - pv domU Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/hypervisor.h | 75 ++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 arch/ia64/include/asm/xen/hypervisor.h diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h new file mode 100644 index 000000000000..d1f84e12c0ed --- /dev/null +++ b/arch/ia64/include/asm/xen/hypervisor.h @@ -0,0 +1,75 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _ASM_IA64_XEN_HYPERVISOR_H +#define _ASM_IA64_XEN_HYPERVISOR_H + +#ifdef CONFIG_XEN + +#include +#include +#include /* to compile feature.c */ +#include /* to comiple xen-netfront.c */ +#include + +/* xen_domain_type is set before executing any C code by early_xen_setup */ +enum xen_domain_type { + XEN_NATIVE, + XEN_PV_DOMAIN, + XEN_HVM_DOMAIN, +}; + +extern enum xen_domain_type xen_domain_type; + +#define xen_domain() (xen_domain_type != XEN_NATIVE) +#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) +#define xen_initial_domain() (xen_pv_domain() && \ + (xen_start_info->flags & SIF_INITDOMAIN)) +#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) + +/* deprecated. remove this */ +#define is_running_on_xen() (xen_domain_type == XEN_PV_DOMAIN) + +extern struct start_info *xen_start_info; + +#else /* CONFIG_XEN */ + +#define xen_domain() (0) +#define xen_pv_domain() (0) +#define xen_initial_domain() (0) +#define xen_hvm_domain() (0) +#define is_running_on_xen() (0) /* deprecated. remove this */ +#endif + +#define is_initial_xendomain() (0) /* deprecated. remove this */ + +#endif /* _ASM_IA64_XEN_HYPERVISOR_H */ From 080104cd0f708b6bb5a121922801867a29ad63fc Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:58 +0900 Subject: [PATCH 22/41] ia64/pv_ops/xen: elf note based xen startup. This patch enables elf note based xen startup for IA-64, which gives the kernel an early hint for running on xen like x86 case. In order to avoid the multi entry point, presumably extending booting protocol(i.e. extending struct ia64_boot_param) would be necessary. It probably means that elilo also needs modification. Signed-off-by: Qing He Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/asm-offsets.c | 4 ++ arch/ia64/xen/Makefile | 3 +- arch/ia64/xen/xen_pv_ops.c | 65 ++++++++++++++++++++++++++ arch/ia64/xen/xensetup.S | 83 ++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/xen_pv_ops.c create mode 100644 arch/ia64/xen/xensetup.S diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index eaa988baa877..742dbb1d5a4f 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include #include +#include #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" @@ -292,6 +293,9 @@ void foo(void) #ifdef CONFIG_XEN BLANK(); + DEFINE(XEN_NATIVE_ASM, XEN_NATIVE); + DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN); + #define DEFINE_MAPPED_REG_OFS(sym, field) \ DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field))) diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index eb595637a8ca..abc356f0c5da 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,4 +2,5 @@ # Makefile for Xen components # -obj-y := hypercall.o xencomm.o xcom_hcall.o grant-table.o +obj-y := hypercall.o xensetup.o xen_pv_ops.o \ + xencomm.o xcom_hcall.o grant-table.o diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c new file mode 100644 index 000000000000..77db214b116c --- /dev/null +++ b/arch/ia64/xen/xen_pv_ops.c @@ -0,0 +1,65 @@ +/****************************************************************************** + * arch/ia64/xen/xen_pv_ops.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +/*************************************************************************** + * general info + */ +static struct pv_info xen_info __initdata = { + .kernel_rpl = 2, /* or 1: determin at runtime */ + .paravirt_enabled = 1, + .name = "Xen/ia64", +}; + +#define IA64_RSC_PL_SHIFT 2 +#define IA64_RSC_PL_BIT_SIZE 2 +#define IA64_RSC_PL_MASK \ + (((1UL << IA64_RSC_PL_BIT_SIZE) - 1) << IA64_RSC_PL_SHIFT) + +static void __init +xen_info_init(void) +{ + /* Xenified Linux/ia64 may run on pl = 1 or 2. + * determin at run time. */ + unsigned long rsc = ia64_getreg(_IA64_REG_AR_RSC); + unsigned int rpl = (rsc & IA64_RSC_PL_MASK) >> IA64_RSC_PL_SHIFT; + xen_info.kernel_rpl = rpl; +} + +/*************************************************************************** + * pv_ops initialization + */ + +void __init +xen_setup_pv_ops(void) +{ + xen_info_init(); + pv_info = xen_info; +} diff --git a/arch/ia64/xen/xensetup.S b/arch/ia64/xen/xensetup.S new file mode 100644 index 000000000000..28fed1fcc079 --- /dev/null +++ b/arch/ia64/xen/xensetup.S @@ -0,0 +1,83 @@ +/* + * Support routines for Xen + * + * Copyright (C) 2005 Dan Magenheimer + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + .section .data.read_mostly + .align 8 + .global xen_domain_type +xen_domain_type: + data4 XEN_NATIVE_ASM + .previous + + __INIT +ENTRY(startup_xen) + // Calculate load offset. + // The constant, LOAD_OFFSET, can't be used because the boot + // loader doesn't always load to the LMA specified by the vmlinux.lds. + mov r9=ip // must be the first instruction to make sure + // that r9 = the physical address of startup_xen. + // Usually r9 = startup_xen - LOAD_OFFSET + movl r8=startup_xen + ;; + sub r9=r9,r8 // Usually r9 = -LOAD_OFFSET. + + mov r10=PARAVIRT_HYPERVISOR_TYPE_XEN + movl r11=_start + ;; + add r11=r11,r9 + movl r8=hypervisor_type + ;; + add r8=r8,r9 + mov b0=r11 + ;; + st8 [r8]=r10 + br.cond.sptk.many b0 + ;; +END(startup_xen) + + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, data8.ua startup_xen - LOAD_OFFSET) + +#define isBP p3 // are we the Bootstrap Processor? + + .text + +GLOBAL_ENTRY(xen_setup_hook) + mov r8=XEN_PV_DOMAIN_ASM +(isBP) movl r9=xen_domain_type;; +(isBP) st4 [r9]=r8 + movl r10=xen_ivt;; + + mov cr.iva=r10 + + /* Set xsi base. */ +#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600 +(isBP) mov r2=FW_HYPERCALL_SET_SHARED_INFO_VA +(isBP) movl r28=XSI_BASE;; +(isBP) break 0x1000;; + + /* setup pv_ops */ +(isBP) mov r4=rp + ;; +(isBP) br.call.sptk.many rp=xen_setup_pv_ops + ;; +(isBP) mov rp=r4 + ;; + + br.ret.sptk.many rp + ;; +END(xen_setup_hook) From b5a26e4db818d4647a019d5c462f6778b8213112 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:59 +0900 Subject: [PATCH 23/41] ia64/pv_ops/xen: define xen pv_init_ops for various xen initialization. This patch implements xen version of pv_init_ops to do various xen initialization. This patch also includes ia64 counter part of x86 xen early printk support patches. Signed-off-by: Akio Takebe Signed-off-by: Alex Williamson Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/hypervisor.h | 14 ++++ arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/hypervisor.c | 96 +++++++++++++++++++++ arch/ia64/xen/xen_pv_ops.c | 110 +++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/hypervisor.c diff --git a/arch/ia64/include/asm/xen/hypervisor.h b/arch/ia64/include/asm/xen/hypervisor.h index d1f84e12c0ed..7a804e80fc67 100644 --- a/arch/ia64/include/asm/xen/hypervisor.h +++ b/arch/ia64/include/asm/xen/hypervisor.h @@ -59,8 +59,22 @@ extern enum xen_domain_type xen_domain_type; /* deprecated. remove this */ #define is_running_on_xen() (xen_domain_type == XEN_PV_DOMAIN) +extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; +void __init xen_setup_vcpu_info_placement(void); +void force_evtchn_callback(void); + +/* for drivers/xen/balloon/balloon.c */ +#ifdef CONFIG_XEN_SCRUB_PAGES +#define scrub_pages(_p, _n) memset((void *)(_p), 0, (_n) << PAGE_SHIFT) +#else +#define scrub_pages(_p, _n) ((void)0) +#endif + +/* For setup_arch() in arch/ia64/kernel/setup.c */ +void xen_ia64_enable_opt_feature(void); + #else /* CONFIG_XEN */ #define xen_domain() (0) diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index abc356f0c5da..7cb4247f90be 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -3,4 +3,4 @@ # obj-y := hypercall.o xensetup.o xen_pv_ops.o \ - xencomm.o xcom_hcall.o grant-table.o + hypervisor.o xencomm.o xcom_hcall.o grant-table.o diff --git a/arch/ia64/xen/hypervisor.c b/arch/ia64/xen/hypervisor.c new file mode 100644 index 000000000000..cac4d97c0b5a --- /dev/null +++ b/arch/ia64/xen/hypervisor.c @@ -0,0 +1,96 @@ +/****************************************************************************** + * arch/ia64/xen/hypervisor.c + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include + +#include "irq_xen.h" + +struct shared_info *HYPERVISOR_shared_info __read_mostly = + (struct shared_info *)XSI_BASE; +EXPORT_SYMBOL(HYPERVISOR_shared_info); + +DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); + +struct start_info *xen_start_info; +EXPORT_SYMBOL(xen_start_info); + +EXPORT_SYMBOL(xen_domain_type); + +EXPORT_SYMBOL(__hypercall); + +/* Stolen from arch/x86/xen/enlighten.c */ +/* + * Flag to determine whether vcpu info placement is available on all + * VCPUs. We assume it is to start with, and then set it to zero on + * the first failure. This is because it can succeed on some VCPUs + * and not others, since it can involve hypervisor memory allocation, + * or because the guest failed to guarantee all the appropriate + * constraints on all VCPUs (ie buffer can't cross a page boundary). + * + * Note that any particular CPU may be using a placed vcpu structure, + * but we can only optimise if the all are. + * + * 0: not available, 1: available + */ + +static void __init xen_vcpu_setup(int cpu) +{ + /* + * WARNING: + * before changing MAX_VIRT_CPUS, + * check that shared_info fits on a page + */ + BUILD_BUG_ON(sizeof(struct shared_info) > PAGE_SIZE); + per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; +} + +void __init xen_setup_vcpu_info_placement(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + xen_vcpu_setup(cpu); +} + +void __cpuinit +xen_cpu_init(void) +{ + xen_smp_intr_init(); +} + +/************************************************************************** + * opt feature + */ +void +xen_ia64_enable_opt_feature(void) +{ + /* Enable region 7 identity map optimizations in Xen */ + struct xen_ia64_opt_feature optf; + + optf.cmd = XEN_IA64_OPTF_IDENT_MAP_REG7; + optf.on = XEN_IA64_OPTF_ON; + optf.pgprot = pgprot_val(PAGE_KERNEL); + optf.key = 0; /* No key on linux. */ + HYPERVISOR_opt_feature(&optf); +} diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 77db214b116c..fc9d599b62ae 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -53,6 +53,115 @@ xen_info_init(void) xen_info.kernel_rpl = rpl; } +/*************************************************************************** + * pv_init_ops + * initialization hooks. + */ + +static void +xen_panic_hypercall(struct unw_frame_info *info, void *arg) +{ + current->thread.ksp = (__u64)info->sw - 16; + HYPERVISOR_shutdown(SHUTDOWN_crash); + /* we're never actually going to get here... */ +} + +static int +xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + unw_init_running(xen_panic_hypercall, NULL); + /* we're never actually going to get here... */ + return NOTIFY_DONE; +} + +static struct notifier_block xen_panic_block = { + xen_panic_event, NULL, 0 /* try to go last */ +}; + +static void xen_pm_power_off(void) +{ + local_irq_disable(); + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +static void __init +xen_banner(void) +{ + printk(KERN_INFO + "Running on Xen! pl = %d start_info_pfn=0x%lx nr_pages=%ld " + "flags=0x%x\n", + xen_info.kernel_rpl, + HYPERVISOR_shared_info->arch.start_info_pfn, + xen_start_info->nr_pages, xen_start_info->flags); +} + +static int __init +xen_reserve_memory(struct rsvd_region *region) +{ + region->start = (unsigned long)__va( + (HYPERVISOR_shared_info->arch.start_info_pfn << PAGE_SHIFT)); + region->end = region->start + PAGE_SIZE; + return 1; +} + +static void __init +xen_arch_setup_early(void) +{ + struct shared_info *s; + BUG_ON(!xen_pv_domain()); + + s = HYPERVISOR_shared_info; + xen_start_info = __va(s->arch.start_info_pfn << PAGE_SHIFT); + + /* Must be done before any hypercall. */ + xencomm_initialize(); + + xen_setup_features(); + /* Register a call for panic conditions. */ + atomic_notifier_chain_register(&panic_notifier_list, + &xen_panic_block); + pm_power_off = xen_pm_power_off; + + xen_ia64_enable_opt_feature(); +} + +static void __init +xen_arch_setup_console(char **cmdline_p) +{ + add_preferred_console("xenboot", 0, NULL); + add_preferred_console("tty", 0, NULL); + /* use hvc_xen */ + add_preferred_console("hvc", 0, NULL); + +#if !defined(CONFIG_VT) || !defined(CONFIG_DUMMY_CONSOLE) + conswitchp = NULL; +#endif +} + +static int __init +xen_arch_setup_nomca(void) +{ + return 1; +} + +static void __init +xen_post_smp_prepare_boot_cpu(void) +{ + xen_setup_vcpu_info_placement(); +} + +static const struct pv_init_ops xen_init_ops __initdata = { + .banner = xen_banner, + + .reserve_memory = xen_reserve_memory, + + .arch_setup_early = xen_arch_setup_early, + .arch_setup_console = xen_arch_setup_console, + .arch_setup_nomca = xen_arch_setup_nomca, + + .post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu, +}; + /*************************************************************************** * pv_ops initialization */ @@ -62,4 +171,5 @@ xen_setup_pv_ops(void) { xen_info_init(); pv_info = xen_info; + pv_init_ops = xen_init_ops; } From 4b83ce4367943aa3eb960df56759f45d70722a4c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:00 +0900 Subject: [PATCH 24/41] ia64/pv_ops/xen: define xen pv_cpu_ops. define xen pv_cpu_ops which implementes xen paravirtualized privileged instructions. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/xen_pv_ops.c | 114 +++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index fc9d599b62ae..c236f04ffad5 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -162,6 +162,119 @@ static const struct pv_init_ops xen_init_ops __initdata = { .post_smp_prepare_boot_cpu = xen_post_smp_prepare_boot_cpu, }; +/*************************************************************************** + * pv_cpu_ops + * intrinsics hooks. + */ + +static void xen_setreg(int regnum, unsigned long val) +{ + switch (regnum) { + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_KR7: + xen_set_kr(regnum - _IA64_REG_AR_KR0, val); + break; +#ifdef CONFIG_IA32_SUPPORT + case _IA64_REG_AR_EFLAG: + xen_set_eflag(val); + break; +#endif + case _IA64_REG_CR_TPR: + xen_set_tpr(val); + break; + case _IA64_REG_CR_ITM: + xen_set_itm(val); + break; + case _IA64_REG_CR_EOI: + xen_eoi(val); + break; + default: + ia64_native_setreg_func(regnum, val); + break; + } +} + +static unsigned long xen_getreg(int regnum) +{ + unsigned long res; + + switch (regnum) { + case _IA64_REG_PSR: + res = xen_get_psr(); + break; +#ifdef CONFIG_IA32_SUPPORT + case _IA64_REG_AR_EFLAG: + res = xen_get_eflag(); + break; +#endif + case _IA64_REG_CR_IVR: + res = xen_get_ivr(); + break; + case _IA64_REG_CR_TPR: + res = xen_get_tpr(); + break; + default: + res = ia64_native_getreg_func(regnum); + break; + } + return res; +} + +/* turning on interrupts is a bit more complicated.. write to the + * memory-mapped virtual psr.i bit first (to avoid race condition), + * then if any interrupts were pending, we have to execute a hyperprivop + * to ensure the pending interrupt gets delivered; else we're done! */ +static void +xen_ssm_i(void) +{ + int old = xen_get_virtual_psr_i(); + xen_set_virtual_psr_i(1); + barrier(); + if (!old && xen_get_virtual_pend()) + xen_hyper_ssm_i(); +} + +/* turning off interrupts can be paravirtualized simply by writing + * to a memory-mapped virtual psr.i bit (implemented as a 16-bit bool) */ +static void +xen_rsm_i(void) +{ + xen_set_virtual_psr_i(0); + barrier(); +} + +static unsigned long +xen_get_psr_i(void) +{ + return xen_get_virtual_psr_i() ? IA64_PSR_I : 0; +} + +static void +xen_intrin_local_irq_restore(unsigned long mask) +{ + if (mask & IA64_PSR_I) + xen_ssm_i(); + else + xen_rsm_i(); +} + +static const struct pv_cpu_ops xen_cpu_ops __initdata = { + .fc = xen_fc, + .thash = xen_thash, + .get_cpuid = xen_get_cpuid, + .get_pmd = xen_get_pmd, + .getreg = xen_getreg, + .setreg = xen_setreg, + .ptcga = xen_ptcga, + .get_rr = xen_get_rr, + .set_rr = xen_set_rr, + .set_rr0_to_rr4 = xen_set_rr0_to_rr4, + .ssm_i = xen_ssm_i, + .rsm_i = xen_rsm_i, + .get_psr_i = xen_get_psr_i, + .intrin_local_irq_restore + = xen_intrin_local_irq_restore, +}; + /*************************************************************************** * pv_ops initialization */ @@ -172,4 +285,5 @@ xen_setup_pv_ops(void) xen_info_init(); pv_info = xen_info; pv_init_ops = xen_init_ops; + pv_cpu_ops = xen_cpu_ops; } From d65b503edd7361097974f909c05f26699aff4057 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:01 +0900 Subject: [PATCH 25/41] ia64/pv_ops/xen: define xen paravirtualized instructions for hand written assembly code define xen paravirtualized instructions for hand written assembly code. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Cc: Akio Takebe Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/inst.h | 447 +++++++++++++++++++++++++++++++ 1 file changed, 447 insertions(+) create mode 100644 arch/ia64/include/asm/xen/inst.h diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h new file mode 100644 index 000000000000..03895e985509 --- /dev/null +++ b/arch/ia64/include/asm/xen/inst.h @@ -0,0 +1,447 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#define MOV_FROM_IFA(reg) \ + movl reg = XSI_IFA; \ + ;; \ + ld8 reg = [reg] + +#define MOV_FROM_ITIR(reg) \ + movl reg = XSI_ITIR; \ + ;; \ + ld8 reg = [reg] + +#define MOV_FROM_ISR(reg) \ + movl reg = XSI_ISR; \ + ;; \ + ld8 reg = [reg] + +#define MOV_FROM_IHA(reg) \ + movl reg = XSI_IHA; \ + ;; \ + ld8 reg = [reg] + +#define MOV_FROM_IPSR(pred, reg) \ +(pred) movl reg = XSI_IPSR; \ + ;; \ +(pred) ld8 reg = [reg] + +#define MOV_FROM_IIM(reg) \ + movl reg = XSI_IIM; \ + ;; \ + ld8 reg = [reg] + +#define MOV_FROM_IIP(reg) \ + movl reg = XSI_IIP; \ + ;; \ + ld8 reg = [reg] + +.macro __MOV_FROM_IVR reg, clob + .ifc "\reg", "r8" + XEN_HYPER_GET_IVR + .exitm + .endif + .ifc "\clob", "r8" + XEN_HYPER_GET_IVR + ;; + mov \reg = r8 + .exitm + .endif + + mov \clob = r8 + ;; + XEN_HYPER_GET_IVR + ;; + mov \reg = r8 + ;; + mov r8 = \clob +.endm +#define MOV_FROM_IVR(reg, clob) __MOV_FROM_IVR reg, clob + +.macro __MOV_FROM_PSR pred, reg, clob + .ifc "\reg", "r8" + (\pred) XEN_HYPER_GET_PSR; + .exitm + .endif + .ifc "\clob", "r8" + (\pred) XEN_HYPER_GET_PSR + ;; + (\pred) mov \reg = r8 + .exitm + .endif + + (\pred) mov \clob = r8 + (\pred) XEN_HYPER_GET_PSR + ;; + (\pred) mov \reg = r8 + (\pred) mov r8 = \clob +.endm +#define MOV_FROM_PSR(pred, reg, clob) __MOV_FROM_PSR pred, reg, clob + + +#define MOV_TO_IFA(reg, clob) \ + movl clob = XSI_IFA; \ + ;; \ + st8 [clob] = reg \ + +#define MOV_TO_ITIR(pred, reg, clob) \ +(pred) movl clob = XSI_ITIR; \ + ;; \ +(pred) st8 [clob] = reg + +#define MOV_TO_IHA(pred, reg, clob) \ +(pred) movl clob = XSI_IHA; \ + ;; \ +(pred) st8 [clob] = reg + +#define MOV_TO_IPSR(pred, reg, clob) \ +(pred) movl clob = XSI_IPSR; \ + ;; \ +(pred) st8 [clob] = reg; \ + ;; + +#define MOV_TO_IFS(pred, reg, clob) \ +(pred) movl clob = XSI_IFS; \ + ;; \ +(pred) st8 [clob] = reg; \ + ;; + +#define MOV_TO_IIP(reg, clob) \ + movl clob = XSI_IIP; \ + ;; \ + st8 [clob] = reg + +.macro ____MOV_TO_KR kr, reg, clob0, clob1 + .ifc "\clob0", "r9" + .error "clob0 \clob0 must not be r9" + .endif + .ifc "\clob1", "r8" + .error "clob1 \clob1 must not be r8" + .endif + + .ifnc "\reg", "r9" + .ifnc "\clob1", "r9" + mov \clob1 = r9 + .endif + mov r9 = \reg + .endif + .ifnc "\clob0", "r8" + mov \clob0 = r8 + .endif + mov r8 = \kr + ;; + XEN_HYPER_SET_KR + + .ifnc "\reg", "r9" + .ifnc "\clob1", "r9" + mov r9 = \clob1 + .endif + .endif + .ifnc "\clob0", "r8" + mov r8 = \clob0 + .endif +.endm + +.macro __MOV_TO_KR kr, reg, clob0, clob1 + .ifc "\clob0", "r9" + ____MOV_TO_KR \kr, \reg, \clob1, \clob0 + .exitm + .endif + .ifc "\clob1", "r8" + ____MOV_TO_KR \kr, \reg, \clob1, \clob0 + .exitm + .endif + + ____MOV_TO_KR \kr, \reg, \clob0, \clob1 +.endm + +#define MOV_TO_KR(kr, reg, clob0, clob1) \ + __MOV_TO_KR IA64_KR_ ## kr, reg, clob0, clob1 + + +.macro __ITC_I pred, reg, clob + .ifc "\reg", "r8" + (\pred) XEN_HYPER_ITC_I + .exitm + .endif + .ifc "\clob", "r8" + (\pred) mov r8 = \reg + ;; + (\pred) XEN_HYPER_ITC_I + .exitm + .endif + + (\pred) mov \clob = r8 + (\pred) mov r8 = \reg + ;; + (\pred) XEN_HYPER_ITC_I + ;; + (\pred) mov r8 = \clob + ;; +.endm +#define ITC_I(pred, reg, clob) __ITC_I pred, reg, clob + +.macro __ITC_D pred, reg, clob + .ifc "\reg", "r8" + (\pred) XEN_HYPER_ITC_D + ;; + .exitm + .endif + .ifc "\clob", "r8" + (\pred) mov r8 = \reg + ;; + (\pred) XEN_HYPER_ITC_D + ;; + .exitm + .endif + + (\pred) mov \clob = r8 + (\pred) mov r8 = \reg + ;; + (\pred) XEN_HYPER_ITC_D + ;; + (\pred) mov r8 = \clob + ;; +.endm +#define ITC_D(pred, reg, clob) __ITC_D pred, reg, clob + +.macro __ITC_I_AND_D pred_i, pred_d, reg, clob + .ifc "\reg", "r8" + (\pred_i)XEN_HYPER_ITC_I + ;; + (\pred_d)XEN_HYPER_ITC_D + ;; + .exitm + .endif + .ifc "\clob", "r8" + mov r8 = \reg + ;; + (\pred_i)XEN_HYPER_ITC_I + ;; + (\pred_d)XEN_HYPER_ITC_D + ;; + .exitm + .endif + + mov \clob = r8 + mov r8 = \reg + ;; + (\pred_i)XEN_HYPER_ITC_I + ;; + (\pred_d)XEN_HYPER_ITC_D + ;; + mov r8 = \clob + ;; +.endm +#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ + __ITC_I_AND_D pred_i, pred_d, reg, clob + +.macro __THASH pred, reg0, reg1, clob + .ifc "\reg0", "r8" + (\pred) mov r8 = \reg1 + (\pred) XEN_HYPER_THASH + .exitm + .endc + .ifc "\reg1", "r8" + (\pred) XEN_HYPER_THASH + ;; + (\pred) mov \reg0 = r8 + ;; + .exitm + .endif + .ifc "\clob", "r8" + (\pred) mov r8 = \reg1 + (\pred) XEN_HYPER_THASH + ;; + (\pred) mov \reg0 = r8 + ;; + .exitm + .endif + + (\pred) mov \clob = r8 + (\pred) mov r8 = \reg1 + (\pred) XEN_HYPER_THASH + ;; + (\pred) mov \reg0 = r8 + (\pred) mov r8 = \clob + ;; +.endm +#define THASH(pred, reg0, reg1, clob) __THASH pred, reg0, reg1, clob + +#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ + mov clob0 = 1; \ + movl clob1 = XSI_PSR_IC; \ + ;; \ + st4 [clob1] = clob0 \ + ;; + +#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ + ;; \ + srlz.d; \ + mov clob1 = 1; \ + movl clob0 = XSI_PSR_IC; \ + ;; \ + st4 [clob0] = clob1 + +#define RSM_PSR_IC(clob) \ + movl clob = XSI_PSR_IC; \ + ;; \ + st4 [clob] = r0; \ + ;; + +/* pred will be clobbered */ +#define MASK_TO_PEND_OFS (-1) +#define SSM_PSR_I(pred, pred_clob, clob) \ +(pred) movl clob = XSI_PSR_I_ADDR \ + ;; \ +(pred) ld8 clob = [clob] \ + ;; \ + /* if (pred) vpsr.i = 1 */ \ + /* if (pred) (vcpu->vcpu_info->evtchn_upcall_mask)=0 */ \ +(pred) st1 [clob] = r0, MASK_TO_PEND_OFS \ + ;; \ + /* if (vcpu->vcpu_info->evtchn_upcall_pending) */ \ +(pred) ld1 clob = [clob] \ + ;; \ +(pred) cmp.ne.unc pred_clob, p0 = clob, r0 \ + ;; \ +(pred_clob)XEN_HYPER_SSM_I /* do areal ssm psr.i */ + +#define RSM_PSR_I(pred, clob0, clob1) \ + movl clob0 = XSI_PSR_I_ADDR; \ + mov clob1 = 1; \ + ;; \ + ld8 clob0 = [clob0]; \ + ;; \ +(pred) st1 [clob0] = clob1 + +#define RSM_PSR_I_IC(clob0, clob1, clob2) \ + movl clob0 = XSI_PSR_I_ADDR; \ + movl clob1 = XSI_PSR_IC; \ + ;; \ + ld8 clob0 = [clob0]; \ + mov clob2 = 1; \ + ;; \ + /* note: clears both vpsr.i and vpsr.ic! */ \ + st1 [clob0] = clob2; \ + st4 [clob1] = r0; \ + ;; + +#define RSM_PSR_DT \ + XEN_HYPER_RSM_PSR_DT + +#define SSM_PSR_DT_AND_SRLZ_I \ + XEN_HYPER_SSM_PSR_DT + +#define BSW_0(clob0, clob1, clob2) \ + ;; \ + /* r16-r31 all now hold bank1 values */ \ + mov clob2 = ar.unat; \ + movl clob0 = XSI_BANK1_R16; \ + movl clob1 = XSI_BANK1_R16 + 8; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r16, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r17, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r18, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r19, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r20, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r21, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r22, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r23, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r24, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r25, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r26, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r27, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r28, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r29, 16; \ + ;; \ +.mem.offset 0, 0; st8.spill [clob0] = r30, 16; \ +.mem.offset 8, 0; st8.spill [clob1] = r31, 16; \ + ;; \ + mov clob1 = ar.unat; \ + movl clob0 = XSI_B1NAT; \ + ;; \ + st8 [clob0] = clob1; \ + mov ar.unat = clob2; \ + movl clob0 = XSI_BANKNUM; \ + ;; \ + st4 [clob0] = r0 + + + /* FIXME: THIS CODE IS NOT NaT SAFE! */ +#define XEN_BSW_1(clob) \ + mov clob = ar.unat; \ + movl r30 = XSI_B1NAT; \ + ;; \ + ld8 r30 = [r30]; \ + mov r31 = 1; \ + ;; \ + mov ar.unat = r30; \ + movl r30 = XSI_BANKNUM; \ + ;; \ + st4 [r30] = r31; \ + movl r30 = XSI_BANK1_R16; \ + movl r31 = XSI_BANK1_R16+8; \ + ;; \ + ld8.fill r16 = [r30], 16; \ + ld8.fill r17 = [r31], 16; \ + ;; \ + ld8.fill r18 = [r30], 16; \ + ld8.fill r19 = [r31], 16; \ + ;; \ + ld8.fill r20 = [r30], 16; \ + ld8.fill r21 = [r31], 16; \ + ;; \ + ld8.fill r22 = [r30], 16; \ + ld8.fill r23 = [r31], 16; \ + ;; \ + ld8.fill r24 = [r30], 16; \ + ld8.fill r25 = [r31], 16; \ + ;; \ + ld8.fill r26 = [r30], 16; \ + ld8.fill r27 = [r31], 16; \ + ;; \ + ld8.fill r28 = [r30], 16; \ + ld8.fill r29 = [r31], 16; \ + ;; \ + ld8.fill r30 = [r30]; \ + ld8.fill r31 = [r31]; \ + ;; \ + mov ar.unat = clob + +#define BSW_1(clob0, clob1) XEN_BSW_1(clob1) + + +#define COVER \ + XEN_HYPER_COVER + +#define RFI \ + XEN_HYPER_RFI; \ + dv_serialize_data From 21820cce1701cf978310efb47d90e5e6a927f6ae Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:02 +0900 Subject: [PATCH 26/41] ia64/pv_ops/xen: paravirtualize DO_SAVE_MIN for xen. paravirtualize DO_SAVE_MIN in minstate.h for xen. Signed-off-by: Yaozu (Eddie) Dong Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/inst.h | 2 + arch/ia64/include/asm/xen/minstate.h | 134 +++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 arch/ia64/include/asm/xen/minstate.h diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h index 03895e985509..1e92ed078379 100644 --- a/arch/ia64/include/asm/xen/inst.h +++ b/arch/ia64/include/asm/xen/inst.h @@ -22,6 +22,8 @@ #include +#define DO_SAVE_MIN XEN_DO_SAVE_MIN + #define MOV_FROM_IFA(reg) \ movl reg = XSI_IFA; \ ;; \ diff --git a/arch/ia64/include/asm/xen/minstate.h b/arch/ia64/include/asm/xen/minstate.h new file mode 100644 index 000000000000..4d92d9bbda7b --- /dev/null +++ b/arch/ia64/include/asm/xen/minstate.h @@ -0,0 +1,134 @@ +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * CONFIG_XEN note: p6/p7 are not preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define XEN_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ + mov r16=IA64_KR(CURRENT); /* M */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r25=ar.unat; /* M */ \ + MOV_FROM_IPSR(p0,r29); /* M */ \ + MOV_FROM_IIP(r28); /* M */ \ + mov r21=ar.fpsr; /* M */ \ + mov r26=ar.pfs; /* I */ \ + __COVER; /* B;; (or nothing) */ \ + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ + ;; \ + ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ + st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ + adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + /* SAVE_IFS;*/ /* see xen special handling below */ \ + cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ + ;; \ +(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ +(pUStk) mov.m r24=ar.rnat; \ +(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ +(pKStk) mov r1=sp; /* get sp */ \ + ;; \ +(pUStk) lfetch.fault.excl.nt1 [r22]; \ +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ +(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ +(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ + ;; \ +(pUStk) mov r18=ar.bsp; \ +(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + WORKAROUND; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ +(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ + movl r8=XSI_PRECOVER_IFS; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + /* xen special handling for possibly lazy cover */ \ + /* SAVE_MIN case in dispatch_ia32_handler: mov r30=r0 */ \ + ld8 r30=[r8]; \ +(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + st8 [r16]=r28,16; /* save cr.iip */ \ + ;; \ + st8 [r17]=r30,16; /* save cr.ifs */ \ + mov r8=ar.ccv; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + ;; \ + st8 [r16]=r25,16; /* save ar.unat */ \ + st8 [r17]=r26,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ +(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ +(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ + ;; /* avoid RAW on r16 & r17 */ \ +(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ +(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ + mov r13=IA64_KR(CURRENT); /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + ACCOUNT_GET_STAMP \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + ACCOUNT_SYS_ENTER \ + BSW_1(r3,r14); /* switch back to bank 1 (must be last in insn group) */ \ + ;; From 5142ec4690943eefc86b01396addf70083a2b9fb Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:03 +0900 Subject: [PATCH 27/41] ia64/pv_ops/xen: paravirtualize ivt.S for xen. paravirtualize ivt.S for xen by multi compile. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/inst.h | 1 + arch/ia64/xen/Makefile | 16 +++++++++- arch/ia64/xen/xenivt.S | 52 ++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/xenivt.S diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h index 1e92ed078379..e6a25c34c7a8 100644 --- a/arch/ia64/include/asm/xen/inst.h +++ b/arch/ia64/include/asm/xen/inst.h @@ -22,6 +22,7 @@ #include +#define ia64_ivt xen_ivt #define DO_SAVE_MIN XEN_DO_SAVE_MIN #define MOV_FROM_IFA(reg) \ diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 7cb4247f90be..5c87e4a79d39 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,5 +2,19 @@ # Makefile for Xen components # -obj-y := hypercall.o xensetup.o xen_pv_ops.o \ +obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o \ hypervisor.o xencomm.o xcom_hcall.o grant-table.o + +AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN + +# xen multi compile +ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S +ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o)) +obj-y += $(ASM_PARAVIRT_OBJS) +define paravirtualized_xen +AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_XEN +endef +$(foreach o,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_xen,$(o)))) + +$(obj)/xen-%.o: $(src)/../kernel/%.S FORCE + $(call if_changed_dep,as_o_S) diff --git a/arch/ia64/xen/xenivt.S b/arch/ia64/xen/xenivt.S new file mode 100644 index 000000000000..3e71d50584d9 --- /dev/null +++ b/arch/ia64/xen/xenivt.S @@ -0,0 +1,52 @@ +/* + * arch/ia64/xen/ivt.S + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + */ + +#include +#include +#include + +#include "../kernel/minstate.h" + + .section .text,"ax" +GLOBAL_ENTRY(xen_event_callback) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + ;; + movl r3=XSI_PSR_IC + mov r14=1 + ;; + st4 [r3]=r14 + ;; + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; + SAVE_REST + ;; +1: + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + add out0=16,sp // pass pointer to pt_regs as first arg + ;; + br.call.sptk.many b0=xen_evtchn_do_upcall + ;; + movl r20=XSI_PSR_I_ADDR + ;; + ld8 r20=[r20] + ;; + adds r20=-1,r20 // vcpu_info->evtchn_upcall_pending + ;; + ld1 r20=[r20] + ;; + cmp.ne p6,p0=r20,r0 // if there are pending events, + (p6) br.spnt.few 1b // call evtchn_do_upcall again. + br.sptk.many xen_leave_kernel // we know ia64_leave_kernel is + // paravirtualized as xen_leave_kernel +END(xen_event_callback) From 16583bc0b4871299a60cbcdd1c6e102e3f9b8e4b Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:04 +0900 Subject: [PATCH 28/41] ia64/pv_ops/xen: paravirtualize entry.S for ia64/xen. paravirtualize entry.S for ia64/xen by multi compile. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/inst.h | 8 ++++++++ arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/xen_pv_ops.c | 18 ++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/ia64/include/asm/xen/inst.h b/arch/ia64/include/asm/xen/inst.h index e6a25c34c7a8..19c2ae1d878a 100644 --- a/arch/ia64/include/asm/xen/inst.h +++ b/arch/ia64/include/asm/xen/inst.h @@ -25,6 +25,14 @@ #define ia64_ivt xen_ivt #define DO_SAVE_MIN XEN_DO_SAVE_MIN +#define __paravirt_switch_to xen_switch_to +#define __paravirt_leave_syscall xen_leave_syscall +#define __paravirt_work_processed_syscall xen_work_processed_syscall +#define __paravirt_leave_kernel xen_leave_kernel +#define __paravirt_pending_syscall_end xen_work_pending_syscall_end +#define __paravirt_work_processed_syscall_target \ + xen_work_processed_syscall + #define MOV_FROM_IFA(reg) \ movl reg = XSI_IFA; \ ;; \ diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 5c87e4a79d39..9b77e8ae4f34 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -8,7 +8,7 @@ obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o \ AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN # xen multi compile -ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S +ASM_PARAVIRT_MULTI_COMPILE_SRCS = ivt.S entry.S ASM_PARAVIRT_OBJS = $(addprefix xen-,$(ASM_PARAVIRT_MULTI_COMPILE_SRCS:.S=.o)) obj-y += $(ASM_PARAVIRT_OBJS) define paravirtualized_xen diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index c236f04ffad5..5b23cd5e9153 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -275,6 +275,22 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { = xen_intrin_local_irq_restore, }; +/****************************************************************************** + * replacement of hand written assembly codes. + */ + +extern char xen_switch_to; +extern char xen_leave_syscall; +extern char xen_work_processed_syscall; +extern char xen_leave_kernel; + +const struct pv_cpu_asm_switch xen_cpu_asm_switch = { + .switch_to = (unsigned long)&xen_switch_to, + .leave_syscall = (unsigned long)&xen_leave_syscall, + .work_processed_syscall = (unsigned long)&xen_work_processed_syscall, + .leave_kernel = (unsigned long)&xen_leave_kernel, +}; + /*************************************************************************** * pv_ops initialization */ @@ -286,4 +302,6 @@ xen_setup_pv_ops(void) pv_info = xen_info; pv_init_ops = xen_init_ops; pv_cpu_ops = xen_cpu_ops; + + paravirt_cpu_asm_init(&xen_cpu_asm_switch); } From bcdd48739d1336332e4086c9e1c9f225290b8194 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:05 +0900 Subject: [PATCH 29/41] ia64/pv_ops/xen: implement xen pv_iosapic_ops. implement xen pv_iosapic_ops for xen paravirtualized iosapic. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/xen_pv_ops.c | 52 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 5b23cd5e9153..41a6cbfab150 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -291,6 +291,57 @@ const struct pv_cpu_asm_switch xen_cpu_asm_switch = { .leave_kernel = (unsigned long)&xen_leave_kernel, }; +/*************************************************************************** + * pv_iosapic_ops + * iosapic read/write hooks. + */ +static void +xen_pcat_compat_init(void) +{ + /* nothing */ +} + +static struct irq_chip* +xen_iosapic_get_irq_chip(unsigned long trigger) +{ + return NULL; +} + +static unsigned int +xen_iosapic_read(char __iomem *iosapic, unsigned int reg) +{ + struct physdev_apic apic_op; + int ret; + + apic_op.apic_physbase = (unsigned long)iosapic - + __IA64_UNCACHED_OFFSET; + apic_op.reg = reg; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op); + if (ret) + return ret; + return apic_op.value; +} + +static void +xen_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) +{ + struct physdev_apic apic_op; + + apic_op.apic_physbase = (unsigned long)iosapic - + __IA64_UNCACHED_OFFSET; + apic_op.reg = reg; + apic_op.value = val; + HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op); +} + +static const struct pv_iosapic_ops xen_iosapic_ops __initdata = { + .pcat_compat_init = xen_pcat_compat_init, + .__get_irq_chip = xen_iosapic_get_irq_chip, + + .__read = xen_iosapic_read, + .__write = xen_iosapic_write, +}; + /*************************************************************************** * pv_ops initialization */ @@ -302,6 +353,7 @@ xen_setup_pv_ops(void) pv_info = xen_info; pv_init_ops = xen_init_ops; pv_cpu_ops = xen_cpu_ops; + pv_iosapic_ops = xen_iosapic_ops; paravirt_cpu_asm_init(&xen_cpu_asm_switch); } From 78c2ae4a0ebd1ab46160e163bf4ca1b7e9463301 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:06 +0900 Subject: [PATCH 30/41] ia64/pv_ops/xen: define the nubmer of irqs which xen needs. define arch/ia64/include/asm/xen/irq.h to define the number of irqs which xen needs. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/xen/irq.h | 44 +++++++++++++++++++++++++++++++++ arch/ia64/kernel/nr-irqs.c | 1 + 2 files changed, 45 insertions(+) create mode 100644 arch/ia64/include/asm/xen/irq.h diff --git a/arch/ia64/include/asm/xen/irq.h b/arch/ia64/include/asm/xen/irq.h new file mode 100644 index 000000000000..a90450983003 --- /dev/null +++ b/arch/ia64/include/asm/xen/irq.h @@ -0,0 +1,44 @@ +/****************************************************************************** + * arch/ia64/include/asm/xen/irq.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _ASM_IA64_XEN_IRQ_H +#define _ASM_IA64_XEN_IRQ_H + +/* + * The flat IRQ space is divided into two regions: + * 1. A one-to-one mapping of real physical IRQs. This space is only used + * if we have physical device-access privilege. This region is at the + * start of the IRQ space so that existing device drivers do not need + * to be modified to translate physical IRQ numbers into our IRQ space. + * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These + * are bound using the provided bind/unbind functions. + */ + +#define XEN_PIRQ_BASE 0 +#define XEN_NR_PIRQS 256 + +#define XEN_DYNIRQ_BASE (XEN_PIRQ_BASE + XEN_NR_PIRQS) +#define XEN_NR_DYNIRQS (NR_CPUS * 8) + +#define XEN_NR_IRQS (XEN_NR_PIRQS + XEN_NR_DYNIRQS) + +#endif /* _ASM_IA64_XEN_IRQ_H */ diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c index 8273afc32db8..ee564575148e 100644 --- a/arch/ia64/kernel/nr-irqs.c +++ b/arch/ia64/kernel/nr-irqs.c @@ -10,6 +10,7 @@ #include #include #include +#include void foo(void) { From 7477de989faffd4be4adfa3d3e1bf35bdf2e0f75 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:07 +0900 Subject: [PATCH 31/41] ia64/pv_ops/xen: implement xen pv_irq_ops. implement xen pv_irq_ops to paravirtualize irq handling with xen event channel. Cc: Jeremy Fitzhardinge Signed-off-by: Akio Takebe Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/irq_xen.c | 435 +++++++++++++++++++++++++++++++++++++ arch/ia64/xen/irq_xen.h | 34 +++ arch/ia64/xen/xen_pv_ops.c | 3 + 4 files changed, 473 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/irq_xen.c create mode 100644 arch/ia64/xen/irq_xen.h diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 9b77e8ae4f34..01c4289f0a40 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -2,7 +2,7 @@ # Makefile for Xen components # -obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o \ +obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \ hypervisor.o xencomm.o xcom_hcall.o grant-table.o AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN diff --git a/arch/ia64/xen/irq_xen.c b/arch/ia64/xen/irq_xen.c new file mode 100644 index 000000000000..af93aadb68bb --- /dev/null +++ b/arch/ia64/xen/irq_xen.c @@ -0,0 +1,435 @@ +/****************************************************************************** + * arch/ia64/xen/irq_xen.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include + +#include +#include +#include + +#include + +#include "irq_xen.h" + +/*************************************************************************** + * pv_irq_ops + * irq operations + */ + +static int +xen_assign_irq_vector(int irq) +{ + struct physdev_irq irq_op; + + irq_op.irq = irq; + if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) + return -ENOSPC; + + return irq_op.vector; +} + +static void +xen_free_irq_vector(int vector) +{ + struct physdev_irq irq_op; + + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return; + + irq_op.vector = vector; + if (HYPERVISOR_physdev_op(PHYSDEVOP_free_irq_vector, &irq_op)) + printk(KERN_WARNING "%s: xen_free_irq_vecotr fail vector=%d\n", + __func__, vector); +} + + +static DEFINE_PER_CPU(int, timer_irq) = -1; +static DEFINE_PER_CPU(int, ipi_irq) = -1; +static DEFINE_PER_CPU(int, resched_irq) = -1; +static DEFINE_PER_CPU(int, cmc_irq) = -1; +static DEFINE_PER_CPU(int, cmcp_irq) = -1; +static DEFINE_PER_CPU(int, cpep_irq) = -1; +#define NAME_SIZE 15 +static DEFINE_PER_CPU(char[NAME_SIZE], timer_name); +static DEFINE_PER_CPU(char[NAME_SIZE], ipi_name); +static DEFINE_PER_CPU(char[NAME_SIZE], resched_name); +static DEFINE_PER_CPU(char[NAME_SIZE], cmc_name); +static DEFINE_PER_CPU(char[NAME_SIZE], cmcp_name); +static DEFINE_PER_CPU(char[NAME_SIZE], cpep_name); +#undef NAME_SIZE + +struct saved_irq { + unsigned int irq; + struct irqaction *action; +}; +/* 16 should be far optimistic value, since only several percpu irqs + * are registered early. + */ +#define MAX_LATE_IRQ 16 +static struct saved_irq saved_percpu_irqs[MAX_LATE_IRQ]; +static unsigned short late_irq_cnt; +static unsigned short saved_irq_cnt; +static int xen_slab_ready; + +#ifdef CONFIG_SMP +/* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ, + * it ends up to issue several memory accesses upon percpu data and + * thus adds unnecessary traffic to other paths. + */ +static irqreturn_t +xen_dummy_handler(int irq, void *dev_id) +{ + + return IRQ_HANDLED; +} + +static struct irqaction xen_ipi_irqaction = { + .handler = handle_IPI, + .flags = IRQF_DISABLED, + .name = "IPI" +}; + +static struct irqaction xen_resched_irqaction = { + .handler = xen_dummy_handler, + .flags = IRQF_DISABLED, + .name = "resched" +}; + +static struct irqaction xen_tlb_irqaction = { + .handler = xen_dummy_handler, + .flags = IRQF_DISABLED, + .name = "tlb_flush" +}; +#endif + +/* + * This is xen version percpu irq registration, which needs bind + * to xen specific evtchn sub-system. One trick here is that xen + * evtchn binding interface depends on kmalloc because related + * port needs to be freed at device/cpu down. So we cache the + * registration on BSP before slab is ready and then deal them + * at later point. For rest instances happening after slab ready, + * we hook them to xen evtchn immediately. + * + * FIXME: MCA is not supported by far, and thus "nomca" boot param is + * required. + */ +static void +__xen_register_percpu_irq(unsigned int cpu, unsigned int vec, + struct irqaction *action, int save) +{ + irq_desc_t *desc; + int irq = 0; + + if (xen_slab_ready) { + switch (vec) { + case IA64_TIMER_VECTOR: + snprintf(per_cpu(timer_name, cpu), + sizeof(per_cpu(timer_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_virq_to_irqhandler(VIRQ_ITC, cpu, + action->handler, action->flags, + per_cpu(timer_name, cpu), action->dev_id); + per_cpu(timer_irq, cpu) = irq; + break; + case IA64_IPI_RESCHEDULE: + snprintf(per_cpu(resched_name, cpu), + sizeof(per_cpu(resched_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, cpu, + action->handler, action->flags, + per_cpu(resched_name, cpu), action->dev_id); + per_cpu(resched_irq, cpu) = irq; + break; + case IA64_IPI_VECTOR: + snprintf(per_cpu(ipi_name, cpu), + sizeof(per_cpu(ipi_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_ipi_to_irqhandler(XEN_IPI_VECTOR, cpu, + action->handler, action->flags, + per_cpu(ipi_name, cpu), action->dev_id); + per_cpu(ipi_irq, cpu) = irq; + break; + case IA64_CMC_VECTOR: + snprintf(per_cpu(cmc_name, cpu), + sizeof(per_cpu(cmc_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_virq_to_irqhandler(VIRQ_MCA_CMC, cpu, + action->handler, + action->flags, + per_cpu(cmc_name, cpu), + action->dev_id); + per_cpu(cmc_irq, cpu) = irq; + break; + case IA64_CMCP_VECTOR: + snprintf(per_cpu(cmcp_name, cpu), + sizeof(per_cpu(cmcp_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_ipi_to_irqhandler(XEN_CMCP_VECTOR, cpu, + action->handler, + action->flags, + per_cpu(cmcp_name, cpu), + action->dev_id); + per_cpu(cmcp_irq, cpu) = irq; + break; + case IA64_CPEP_VECTOR: + snprintf(per_cpu(cpep_name, cpu), + sizeof(per_cpu(cpep_name, cpu)), + "%s%d", action->name, cpu); + irq = bind_ipi_to_irqhandler(XEN_CPEP_VECTOR, cpu, + action->handler, + action->flags, + per_cpu(cpep_name, cpu), + action->dev_id); + per_cpu(cpep_irq, cpu) = irq; + break; + case IA64_CPE_VECTOR: + case IA64_MCA_RENDEZ_VECTOR: + case IA64_PERFMON_VECTOR: + case IA64_MCA_WAKEUP_VECTOR: + case IA64_SPURIOUS_INT_VECTOR: + /* No need to complain, these aren't supported. */ + break; + default: + printk(KERN_WARNING "Percpu irq %d is unsupported " + "by xen!\n", vec); + break; + } + BUG_ON(irq < 0); + + if (irq > 0) { + /* + * Mark percpu. Without this, migrate_irqs() will + * mark the interrupt for migrations and trigger it + * on cpu hotplug. + */ + desc = irq_desc + irq; + desc->status |= IRQ_PER_CPU; + } + } + + /* For BSP, we cache registered percpu irqs, and then re-walk + * them when initializing APs + */ + if (!cpu && save) { + BUG_ON(saved_irq_cnt == MAX_LATE_IRQ); + saved_percpu_irqs[saved_irq_cnt].irq = vec; + saved_percpu_irqs[saved_irq_cnt].action = action; + saved_irq_cnt++; + if (!xen_slab_ready) + late_irq_cnt++; + } +} + +static void +xen_register_percpu_irq(ia64_vector vec, struct irqaction *action) +{ + __xen_register_percpu_irq(smp_processor_id(), vec, action, 1); +} + +static void +xen_bind_early_percpu_irq(void) +{ + int i; + + xen_slab_ready = 1; + /* There's no race when accessing this cached array, since only + * BSP will face with such step shortly + */ + for (i = 0; i < late_irq_cnt; i++) + __xen_register_percpu_irq(smp_processor_id(), + saved_percpu_irqs[i].irq, + saved_percpu_irqs[i].action, 0); +} + +/* FIXME: There's no obvious point to check whether slab is ready. So + * a hack is used here by utilizing a late time hook. + */ + +#ifdef CONFIG_HOTPLUG_CPU +static int __devinit +unbind_evtchn_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + if (action == CPU_DEAD) { + /* Unregister evtchn. */ + if (per_cpu(cpep_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(cpep_irq, cpu), NULL); + per_cpu(cpep_irq, cpu) = -1; + } + if (per_cpu(cmcp_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(cmcp_irq, cpu), NULL); + per_cpu(cmcp_irq, cpu) = -1; + } + if (per_cpu(cmc_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(cmc_irq, cpu), NULL); + per_cpu(cmc_irq, cpu) = -1; + } + if (per_cpu(ipi_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(ipi_irq, cpu), NULL); + per_cpu(ipi_irq, cpu) = -1; + } + if (per_cpu(resched_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(resched_irq, cpu), + NULL); + per_cpu(resched_irq, cpu) = -1; + } + if (per_cpu(timer_irq, cpu) >= 0) { + unbind_from_irqhandler(per_cpu(timer_irq, cpu), NULL); + per_cpu(timer_irq, cpu) = -1; + } + } + return NOTIFY_OK; +} + +static struct notifier_block unbind_evtchn_notifier = { + .notifier_call = unbind_evtchn_callback, + .priority = 0 +}; +#endif + +void xen_smp_intr_init_early(unsigned int cpu) +{ +#ifdef CONFIG_SMP + unsigned int i; + + for (i = 0; i < saved_irq_cnt; i++) + __xen_register_percpu_irq(cpu, saved_percpu_irqs[i].irq, + saved_percpu_irqs[i].action, 0); +#endif +} + +void xen_smp_intr_init(void) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = { .ip = (unsigned long)&xen_event_callback }, + }; + + if (cpu == 0) { + /* Initialization was already done for boot cpu. */ +#ifdef CONFIG_HOTPLUG_CPU + /* Register the notifier only once. */ + register_cpu_notifier(&unbind_evtchn_notifier); +#endif + return; + } + + /* This should be piggyback when setup vcpu guest context */ + BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event)); +#endif /* CONFIG_SMP */ +} + +void __init +xen_irq_init(void) +{ + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = { .ip = (unsigned long)&xen_event_callback }, + }; + + xen_init_IRQ(); + BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event)); + late_time_init = xen_bind_early_percpu_irq; +} + +void +xen_platform_send_ipi(int cpu, int vector, int delivery_mode, int redirect) +{ +#ifdef CONFIG_SMP + /* TODO: we need to call vcpu_up here */ + if (unlikely(vector == ap_wakeup_vector)) { + /* XXX + * This should be in __cpu_up(cpu) in ia64 smpboot.c + * like x86. But don't want to modify it, + * keep it untouched. + */ + xen_smp_intr_init_early(cpu); + + xen_send_ipi(cpu, vector); + /* vcpu_prepare_and_up(cpu); */ + return; + } +#endif + + switch (vector) { + case IA64_IPI_VECTOR: + xen_send_IPI_one(cpu, XEN_IPI_VECTOR); + break; + case IA64_IPI_RESCHEDULE: + xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); + break; + case IA64_CMCP_VECTOR: + xen_send_IPI_one(cpu, XEN_CMCP_VECTOR); + break; + case IA64_CPEP_VECTOR: + xen_send_IPI_one(cpu, XEN_CPEP_VECTOR); + break; + case IA64_TIMER_VECTOR: { + /* this is used only once by check_sal_cache_flush() + at boot time */ + static int used = 0; + if (!used) { + xen_send_ipi(cpu, IA64_TIMER_VECTOR); + used = 1; + break; + } + /* fallthrough */ + } + default: + printk(KERN_WARNING "Unsupported IPI type 0x%x\n", + vector); + notify_remote_via_irq(0); /* defaults to 0 irq */ + break; + } +} + +static void __init +xen_register_ipi(void) +{ +#ifdef CONFIG_SMP + register_percpu_irq(IA64_IPI_VECTOR, &xen_ipi_irqaction); + register_percpu_irq(IA64_IPI_RESCHEDULE, &xen_resched_irqaction); + register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &xen_tlb_irqaction); +#endif +} + +static void +xen_resend_irq(unsigned int vector) +{ + (void)resend_irq_on_evtchn(vector); +} + +const struct pv_irq_ops xen_irq_ops __initdata = { + .register_ipi = xen_register_ipi, + + .assign_irq_vector = xen_assign_irq_vector, + .free_irq_vector = xen_free_irq_vector, + .register_percpu_irq = xen_register_percpu_irq, + + .resend_irq = xen_resend_irq, +}; diff --git a/arch/ia64/xen/irq_xen.h b/arch/ia64/xen/irq_xen.h new file mode 100644 index 000000000000..26110f330c87 --- /dev/null +++ b/arch/ia64/xen/irq_xen.h @@ -0,0 +1,34 @@ +/****************************************************************************** + * arch/ia64/xen/irq_xen.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef IRQ_XEN_H +#define IRQ_XEN_H + +extern void (*late_time_init)(void); +extern char xen_event_callback; +void __init xen_init_IRQ(void); + +extern const struct pv_irq_ops xen_irq_ops __initdata; +extern void xen_smp_intr_init(void); +extern void xen_send_ipi(int cpu, int vec); + +#endif /* IRQ_XEN_H */ diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 41a6cbfab150..4fe4e621b7b6 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -29,6 +29,8 @@ #include #include +#include "irq_xen.h" + /*************************************************************************** * general info */ @@ -354,6 +356,7 @@ xen_setup_pv_ops(void) pv_init_ops = xen_init_ops; pv_cpu_ops = xen_cpu_ops; pv_iosapic_ops = xen_iosapic_ops; + pv_irq_ops = xen_irq_ops; paravirt_cpu_asm_init(&xen_cpu_asm_switch); } From dcbbecdad53bd6273891ad018f9c6bb8f2b7813c Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:08 +0900 Subject: [PATCH 32/41] ia64/pv_ops/xen: implement xen pv_time_ops. implement xen pv_time_ops to account steal time. Cc: Jeremy Fitzhardinge Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/time.c | 180 +++++++++++++++++++++++++++++++++++++ arch/ia64/xen/time.h | 23 +++++ arch/ia64/xen/xen_pv_ops.c | 2 + 4 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/time.c create mode 100644 arch/ia64/xen/time.h diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 01c4289f0a40..ed31c76d2bf0 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -3,7 +3,7 @@ # obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \ - hypervisor.o xencomm.o xcom_hcall.o grant-table.o + hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c new file mode 100644 index 000000000000..ec168ec754b2 --- /dev/null +++ b/arch/ia64/xen/time.c @@ -0,0 +1,180 @@ +/****************************************************************************** + * arch/ia64/xen/time.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include + +#include + +#include "../kernel/fsyscall_gtod_data.h" + +DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); +DEFINE_PER_CPU(unsigned long, processed_stolen_time); +DEFINE_PER_CPU(unsigned long, processed_blocked_time); + +/* taken from i386/kernel/time-xen.c */ +static void xen_init_missing_ticks_accounting(int cpu) +{ + struct vcpu_register_runstate_memory_area area; + struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); + int rc; + + memset(runstate, 0, sizeof(*runstate)); + + area.addr.v = runstate; + rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, + &area); + WARN_ON(rc && rc != -ENOSYS); + + per_cpu(processed_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; + per_cpu(processed_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] + + runstate->time[RUNSTATE_offline]; +} + +/* + * Runstate accounting + */ +/* stolen from arch/x86/xen/time.c */ +static void get_runstate_snapshot(struct vcpu_runstate_info *res) +{ + u64 state_time; + struct vcpu_runstate_info *state; + + BUG_ON(preemptible()); + + state = &__get_cpu_var(runstate); + + /* + * The runstate info is always updated by the hypervisor on + * the current CPU, so there's no need to use anything + * stronger than a compiler barrier when fetching it. + */ + do { + state_time = state->state_entry_time; + rmb(); + *res = *state; + rmb(); + } while (state->state_entry_time != state_time); +} + +#define NS_PER_TICK (1000000000LL/HZ) + +static unsigned long +consider_steal_time(unsigned long new_itm) +{ + unsigned long stolen, blocked; + unsigned long delta_itm = 0, stolentick = 0; + int cpu = smp_processor_id(); + struct vcpu_runstate_info runstate; + struct task_struct *p = current; + + get_runstate_snapshot(&runstate); + + /* + * Check for vcpu migration effect + * In this case, itc value is reversed. + * This causes huge stolen value. + * This function just checks and reject this effect. + */ + if (!time_after_eq(runstate.time[RUNSTATE_blocked], + per_cpu(processed_blocked_time, cpu))) + blocked = 0; + + if (!time_after_eq(runstate.time[RUNSTATE_runnable] + + runstate.time[RUNSTATE_offline], + per_cpu(processed_stolen_time, cpu))) + stolen = 0; + + if (!time_after(delta_itm + new_itm, ia64_get_itc())) + stolentick = ia64_get_itc() - new_itm; + + do_div(stolentick, NS_PER_TICK); + stolentick++; + + do_div(stolen, NS_PER_TICK); + + if (stolen > stolentick) + stolen = stolentick; + + stolentick -= stolen; + do_div(blocked, NS_PER_TICK); + + if (blocked > stolentick) + blocked = stolentick; + + if (stolen > 0 || blocked > 0) { + account_steal_time(NULL, jiffies_to_cputime(stolen)); + account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); + run_local_timers(); + + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_mode(get_irq_regs())); + + scheduler_tick(); + run_posix_cpu_timers(p); + delta_itm += local_cpu_data->itm_delta * (stolen + blocked); + + if (cpu == time_keeper_id) { + write_seqlock(&xtime_lock); + do_timer(stolen + blocked); + local_cpu_data->itm_next = delta_itm + new_itm; + write_sequnlock(&xtime_lock); + } else { + local_cpu_data->itm_next = delta_itm + new_itm; + } + per_cpu(processed_stolen_time, cpu) += NS_PER_TICK * stolen; + per_cpu(processed_blocked_time, cpu) += NS_PER_TICK * blocked; + } + return delta_itm; +} + +static int xen_do_steal_accounting(unsigned long *new_itm) +{ + unsigned long delta_itm; + delta_itm = consider_steal_time(*new_itm); + *new_itm += delta_itm; + if (time_after(*new_itm, ia64_get_itc()) && delta_itm) + return 1; + + return 0; +} + +static void xen_itc_jitter_data_reset(void) +{ + u64 lcycle, ret; + + do { + lcycle = itc_jitter_data.itc_lastcycle; + ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0); + } while (unlikely(ret != lcycle)); +} + +struct pv_time_ops xen_time_ops __initdata = { + .init_missing_ticks_accounting = xen_init_missing_ticks_accounting, + .do_steal_accounting = xen_do_steal_accounting, + .clocksource_resume = xen_itc_jitter_data_reset, +}; diff --git a/arch/ia64/xen/time.h b/arch/ia64/xen/time.h new file mode 100644 index 000000000000..b9c7ec5f9cfa --- /dev/null +++ b/arch/ia64/xen/time.h @@ -0,0 +1,23 @@ +/****************************************************************************** + * arch/ia64/xen/time.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +extern struct pv_time_ops xen_time_ops __initdata; diff --git a/arch/ia64/xen/xen_pv_ops.c b/arch/ia64/xen/xen_pv_ops.c index 4fe4e621b7b6..04cd12350455 100644 --- a/arch/ia64/xen/xen_pv_ops.c +++ b/arch/ia64/xen/xen_pv_ops.c @@ -30,6 +30,7 @@ #include #include "irq_xen.h" +#include "time.h" /*************************************************************************** * general info @@ -357,6 +358,7 @@ xen_setup_pv_ops(void) pv_cpu_ops = xen_cpu_ops; pv_iosapic_ops = xen_iosapic_ops; pv_irq_ops = xen_irq_ops; + pv_time_ops = xen_time_ops; paravirt_cpu_asm_init(&xen_cpu_asm_switch); } From a0df655ccd0669bd3efc85346dc816833dd1197f Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:09 +0900 Subject: [PATCH 33/41] ia64/xen: define xen machine vector for domU. define xen machine vector for domU. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/Makefile | 2 ++ arch/ia64/include/asm/machvec.h | 2 ++ arch/ia64/include/asm/machvec_xen.h | 22 ++++++++++++++++++++++ arch/ia64/kernel/acpi.c | 5 +++++ arch/ia64/xen/Makefile | 2 ++ arch/ia64/xen/machvec.c | 4 ++++ 6 files changed, 37 insertions(+) create mode 100644 arch/ia64/include/asm/machvec_xen.h create mode 100644 arch/ia64/xen/machvec.c diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 905d25b13d5a..40242501bcdd 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -56,9 +56,11 @@ core-$(CONFIG_IA64_DIG) += arch/ia64/dig/ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ +core-$(CONFIG_IA64_XEN_GUEST) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/ core-$(CONFIG_KVM) += arch/ia64/kvm/ +core-$(CONFIG_XEN) += arch/ia64/xen/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 2b850ccafef5..de99cb2799cf 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -128,6 +128,8 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # include # elif defined (CONFIG_IA64_SGI_UV) # include +# elif defined (CONFIG_IA64_XEN_GUEST) +# include # elif defined (CONFIG_IA64_GENERIC) # ifdef MACHVEC_PLATFORM_HEADER diff --git a/arch/ia64/include/asm/machvec_xen.h b/arch/ia64/include/asm/machvec_xen.h new file mode 100644 index 000000000000..55f9228056cd --- /dev/null +++ b/arch/ia64/include/asm/machvec_xen.h @@ -0,0 +1,22 @@ +#ifndef _ASM_IA64_MACHVEC_XEN_h +#define _ASM_IA64_MACHVEC_XEN_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_cpu_init_t xen_cpu_init; +extern ia64_mv_irq_init_t xen_irq_init; +extern ia64_mv_send_ipi_t xen_platform_send_ipi; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define platform_name "xen" +#define platform_setup dig_setup +#define platform_cpu_init xen_cpu_init +#define platform_irq_init xen_irq_init +#define platform_send_ipi xen_platform_send_ipi + +#endif /* _ASM_IA64_MACHVEC_XEN_h */ diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5d1eb7ee2bf6..00936491933e 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -52,6 +52,7 @@ #include #include #include +#include #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -121,6 +122,8 @@ acpi_get_sysname(void) return "uv"; else return "sn2"; + } else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) { + return "xen"; } return "dig"; @@ -137,6 +140,8 @@ acpi_get_sysname(void) return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; +# elif defined (CONFIG_IA64_XEN_GUEST) + return "xen"; # else # error Unknown platform. Fix acpi.c. # endif diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index ed31c76d2bf0..972d085567d9 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -5,6 +5,8 @@ obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \ hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o +obj-$(CONFIG_IA64_GENERIC) += machvec.o + AFLAGS_xenivt.o += -D__IA64_ASM_PARAVIRTUALIZED_XEN # xen multi compile diff --git a/arch/ia64/xen/machvec.c b/arch/ia64/xen/machvec.c new file mode 100644 index 000000000000..4ad588a7c279 --- /dev/null +++ b/arch/ia64/xen/machvec.c @@ -0,0 +1,4 @@ +#define MACHVEC_PLATFORM_NAME xen +#define MACHVEC_PLATFORM_HEADER +#include + From 91834e685d2059b69c4e8e3d707f35d94438de94 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:10 +0900 Subject: [PATCH 34/41] ia64/xen: preliminary support for save/restore. preliminary support for save/restore. Although Save/restore isn't fully working yet, this patch is necessary to compile. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/Makefile | 2 +- arch/ia64/xen/suspend.c | 64 +++++++++++++++++++++++++++++++++++++++++ arch/ia64/xen/time.c | 33 +++++++++++++++++++++ arch/ia64/xen/time.h | 1 + 4 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/xen/suspend.c diff --git a/arch/ia64/xen/Makefile b/arch/ia64/xen/Makefile index 972d085567d9..0ad0224693d9 100644 --- a/arch/ia64/xen/Makefile +++ b/arch/ia64/xen/Makefile @@ -3,7 +3,7 @@ # obj-y := hypercall.o xenivt.o xensetup.o xen_pv_ops.o irq_xen.o \ - hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o + hypervisor.o xencomm.o xcom_hcall.o grant-table.o time.o suspend.o obj-$(CONFIG_IA64_GENERIC) += machvec.o diff --git a/arch/ia64/xen/suspend.c b/arch/ia64/xen/suspend.c new file mode 100644 index 000000000000..fd66b048c6fa --- /dev/null +++ b/arch/ia64/xen/suspend.c @@ -0,0 +1,64 @@ +/****************************************************************************** + * arch/ia64/xen/suspend.c + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * suspend/resume + */ + +#include +#include +#include "time.h" + +void +xen_mm_pin_all(void) +{ + /* nothing */ +} + +void +xen_mm_unpin_all(void) +{ + /* nothing */ +} + +void xen_pre_device_suspend(void) +{ + /* nothing */ +} + +void +xen_pre_suspend() +{ + /* nothing */ +} + +void +xen_post_suspend(int suspend_cancelled) +{ + if (suspend_cancelled) + return; + + xen_ia64_enable_opt_feature(); + /* add more if necessary */ +} + +void xen_arch_resume(void) +{ + xen_timer_resume_on_aps(); +} diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index ec168ec754b2..d15a94c330fb 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -26,6 +26,8 @@ #include #include +#include + #include #include @@ -178,3 +180,34 @@ struct pv_time_ops xen_time_ops __initdata = { .do_steal_accounting = xen_do_steal_accounting, .clocksource_resume = xen_itc_jitter_data_reset, }; + +/* Called after suspend, to resume time. */ +static void xen_local_tick_resume(void) +{ + /* Just trigger a tick. */ + ia64_cpu_local_tick(); + touch_softlockup_watchdog(); +} + +void +xen_timer_resume(void) +{ + unsigned int cpu; + + xen_local_tick_resume(); + + for_each_online_cpu(cpu) + xen_init_missing_ticks_accounting(cpu); +} + +static void ia64_cpu_local_tick_fn(void *unused) +{ + xen_local_tick_resume(); + xen_init_missing_ticks_accounting(smp_processor_id()); +} + +void +xen_timer_resume_on_aps(void) +{ + smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1); +} diff --git a/arch/ia64/xen/time.h b/arch/ia64/xen/time.h index b9c7ec5f9cfa..f98d7e1a42f0 100644 --- a/arch/ia64/xen/time.h +++ b/arch/ia64/xen/time.h @@ -21,3 +21,4 @@ */ extern struct pv_time_ops xen_time_ops __initdata; +void xen_timer_resume_on_aps(void); From ba9cc328bfeec3ec5fad3356dd53bc0c1ff506b0 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:11 +0900 Subject: [PATCH 35/41] ia64/pv_ops: update Kconfig for paravirtualized guest and xen. introduce CONFIG_PARAVIRT_GUEST, CONFIG_PARAVIRT for paravirtualized guest. introduce CONFIG_XEN, CONFIG_IA64_XEN_GUEST for xen. Signed-off-by: Alex Williamson Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 32 ++++++++++++++++++++++++++++++++ arch/ia64/xen/Kconfig | 26 ++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 arch/ia64/xen/Kconfig diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3b7aa38254a8..9318dee22537 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -108,6 +108,33 @@ config AUDIT_ARCH bool default y +menuconfig PARAVIRT_GUEST + bool "Paravirtualized guest support" + help + Say Y here to get to see options related to running Linux under + various hypervisors. This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if PARAVIRT_GUEST + +config PARAVIRT + bool "Enable paravirtualization code" + depends on PARAVIRT_GUEST + default y + bool + default y + help + This changes the kernel so it can modify itself when it is run + under a hypervisor, potentially improving performance significantly + over full virtualization. However, when run without a hypervisor + the kernel is theoretically slower and slightly larger. + + +source "arch/ia64/xen/Kconfig" + +endif + choice prompt "System type" default IA64_GENERIC @@ -129,6 +156,7 @@ config IA64_GENERIC SGI-SN2 For SGI Altix systems SGI-UV For SGI UV systems Ski-simulator For the HP simulator + Xen-domU For xen domU system If you don't know what to do, choose "generic". @@ -179,6 +207,10 @@ config IA64_HP_SIM bool "Ski-simulator" select SWIOTLB +config IA64_XEN_GUEST + bool "Xen guest" + depends on XEN + endchoice choice diff --git a/arch/ia64/xen/Kconfig b/arch/ia64/xen/Kconfig new file mode 100644 index 000000000000..f1683a20275b --- /dev/null +++ b/arch/ia64/xen/Kconfig @@ -0,0 +1,26 @@ +# +# This Kconfig describes xen/ia64 options +# + +config XEN + bool "Xen hypervisor support" + default y + depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL + select XEN_XENCOMM + select NO_IDLE_HZ + + # those are required to save/restore. + select ARCH_SUSPEND_POSSIBLE + select SUSPEND + select PM_SLEEP + help + Enable Xen hypervisor support. Resulting kernel runs + both as a guest OS on Xen and natively on hardware. + +config XEN_XENCOMM + depends on XEN + bool + +config NO_IDLE_HZ + depends on XEN + bool From 98c99d7c27973538081a809c114b8d5c6195ecfa Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Wed, 28 May 2008 17:13:53 +0900 Subject: [PATCH 36/41] ia64/xen: a recipe for using xen/ia64 with pv_ops. Recipe for using xen/ia64 with pv_ops domU. Signed-off-by: Akio Takebe Signed-off-by: Isaku Yamahata Cc: Randy Dunlap Signed-off-by: Tony Luck --- Documentation/ia64/xen.txt | 183 +++++++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 Documentation/ia64/xen.txt diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt new file mode 100644 index 000000000000..c61a99f7c8bb --- /dev/null +++ b/Documentation/ia64/xen.txt @@ -0,0 +1,183 @@ + Recipe for getting/building/running Xen/ia64 with pv_ops + -------------------------------------------------------- + +This recipe describes how to get xen-ia64 source and build it, +and run domU with pv_ops. + +============ +Requirements +============ + + - python + - mercurial + it (aka "hg") is an open-source source code + management software. See the below. + http://www.selenic.com/mercurial/wiki/ + - git + - bridge-utils + +================================= +Getting and Building Xen and Dom0 +================================= + + My environment is; + Machine : Tiger4 + Domain0 OS : RHEL5 + DomainU OS : RHEL5 + + 1. Download source + # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg + # cd xen-unstable.hg + # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg + + 2. # make world + + 3. # make install-tools + + 4. copy kernels and xen + # cp xen/xen.gz /boot/efi/efi/redhat/ + # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \ + /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen + + 5. make initrd for Dom0/DomU + # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \ + O=$(/bin/pwd)/build-linux-2.6.18-xen_ia64 + # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \ + 2.6.18.8-xen --builtin mptspi --builtin mptbase \ + --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \ + --builtin ehci-hcd + +================================ +Making a disk image for guest OS +================================ + + 1. make file + # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0 + # mke2fs -F -j /root/rhel5.img + # mount -o loop /root/rhel5.img /mnt + # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt + # mkdir /mnt/{root,proc,sys,home,tmp} + + Note: You may miss some device files. If so, please create them + with mknod. Or you can use tar instead of cp. + + 2. modify DomU's fstab + # vi /mnt/etc/fstab + /dev/xvda1 / ext3 defaults 1 1 + none /dev/pts devpts gid=5,mode=620 0 0 + none /dev/shm tmpfs defaults 0 0 + none /proc proc defaults 0 0 + none /sys sysfs defaults 0 0 + + 3. modify inittab + set runlevel to 3 to avoid X trying to start + # vi /mnt/etc/inittab + id:3:initdefault: + Start a getty on the hvc0 console + X0:2345:respawn:/sbin/mingetty hvc0 + tty1-6 mingetty can be commented out + + 4. add hvc0 into /etc/securetty + # vi /mnt/etc/securetty (add hvc0) + + 5. umount + # umount /mnt + +FYI, virt-manager can also make a disk image for guest OS. +It's GUI tools and easy to make it. + +================== +Boot Xen & Domain0 +================== + + 1. replace elilo + elilo of RHEL5 can boot Xen and Dom0. + If you use old elilo (e.g RHEL4), please download from the below + http://elilo.sourceforge.net/cgi-bin/blosxom + and copy into /boot/efi/efi/redhat/ + # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi + + 2. modify elilo.conf (like the below) + # vi /boot/efi/efi/redhat/elilo.conf + prompt + timeout=20 + default=xen + relocatable + + image=vmlinuz-2.6.18.8-xen + label=xen + vmm=xen.gz + initrd=initrd-2.6.18.8-xen.img + read-only + append=" -- rhgb root=/dev/sda2" + +The append options before "--" are for xen hypervisor, +the options after "--" are for dom0. + +FYI, your machine may need console options like +"com1=19200,8n1 console=vga,com1". For example, +append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \ +console=ttyS0 root=/dev/sda2" + +===================================== +Getting and Building domU with pv_ops +===================================== + + 1. get pv_ops tree + # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/ + + 2. git branch (if necessary) + # cd linux-2.6-xen-ia64/ + # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19 + (Note: The current branch is xen-ia64-domu-minimal-2008may19. + But you would find the new branch. You can see with + "git branch -r" to get the branch lists. + http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/ + is also available. The tree is based on + git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test) + + + 3. copy .config for pv_ops of domU + # cp arch/ia64/configs/xen_domu_wip_defconfig .config + + 4. make kernel with pv_ops + # make oldconfig + # make + + 5. install the kernel and initrd + # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU + # make modules_install + # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \ + 2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \ + --builtin mptbase --builtin mptscsih --builtin uhci-hcd \ + --builtin ohci-hcd --builtin ehci-hcd + +======================== +Boot DomainU with pv_ops +======================== + + 1. make config of DomU + # vi /etc/xen/rhel5 + kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU" + ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img" + vcpus = 1 + memory = 512 + name = "rhel5" + disk = [ 'file:/root/rhel5.img,xvda1,w' ] + root = "/dev/xvda1 ro" + extra= "rhgb console=hvc0" + + 2. After boot xen and dom0, start xend + # /etc/init.d/xend start + ( In the debugging case, # XEND_DEBUG=1 xend trace_start ) + + 3. start domU + # xm create -c rhel5 + +========= +Reference +========= +- Wiki of Xen/IA64 upstream merge + http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge + +Written by Akio Takebe on 28 May 2008 From f8d1f99f3958c46cdc983743d75d0b31b9accb80 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:13 +0900 Subject: [PATCH 37/41] ia64/pv_ops: paravirtualized instruction checker. This patch implements a checker to detect instructions which should be paravirtualized instead of direct writing raw instruction. This patch does rough check so that it doesn't fully cover all cases, but it can detects most cases of paravirtualization breakage of hand written assembly codes. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/include/asm/native/pvchk_inst.h | 263 ++++++++++++++++++++++ arch/ia64/kernel/Makefile | 18 ++ arch/ia64/kernel/paravirt_inst.h | 4 +- arch/ia64/scripts/pvcheck.sed | 32 +++ 4 files changed, 316 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/include/asm/native/pvchk_inst.h create mode 100644 arch/ia64/scripts/pvcheck.sed diff --git a/arch/ia64/include/asm/native/pvchk_inst.h b/arch/ia64/include/asm/native/pvchk_inst.h new file mode 100644 index 000000000000..b8e6eb1090d7 --- /dev/null +++ b/arch/ia64/include/asm/native/pvchk_inst.h @@ -0,0 +1,263 @@ +#ifndef _ASM_NATIVE_PVCHK_INST_H +#define _ASM_NATIVE_PVCHK_INST_H + +/****************************************************************************** + * arch/ia64/include/asm/native/pvchk_inst.h + * Checker for paravirtualizations of privileged operations. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +/********************************************** + * Instructions paravirtualized for correctness + **********************************************/ + +/* "fc" and "thash" are privilege-sensitive instructions, meaning they + * may have different semantics depending on whether they are executed + * at PL0 vs PL!=0. When paravirtualized, these instructions mustn't + * be allowed to execute directly, lest incorrect semantics result. + */ + +#define fc .error "fc should not be used directly." +#define thash .error "thash should not be used directly." + +/* Note that "ttag" and "cover" are also privilege-sensitive; "ttag" + * is not currently used (though it may be in a long-format VHPT system!) + * and the semantics of cover only change if psr.ic is off which is very + * rare (and currently non-existent outside of assembly code + */ +#define ttag .error "ttag should not be used directly." +#define cover .error "cover should not be used directly." + +/* There are also privilege-sensitive registers. These registers are + * readable at any privilege level but only writable at PL0. + */ +#define cpuid .error "cpuid should not be used directly." +#define pmd .error "pmd should not be used directly." + +/* + * mov ar.eflag = + * mov = ar.eflag + */ + +/********************************************** + * Instructions paravirtualized for performance + **********************************************/ +/* + * Those instructions include '.' which can't be handled by cpp. + * or can't be handled by cpp easily. + * They are handled by sed instead of cpp. + */ + +/* for .S + * itc.i + * itc.d + * + * bsw.0 + * bsw.1 + * + * ssm psr.ic | PSR_DEFAULT_BITS + * ssm psr.ic + * rsm psr.ic + * ssm psr.i + * rsm psr.i + * rsm psr.i | psr.ic + * rsm psr.dt + * ssm psr.dt + * + * mov = cr.ifa + * mov = cr.itir + * mov = cr.isr + * mov = cr.iha + * mov = cr.ipsr + * mov = cr.iim + * mov = cr.iip + * mov = cr.ivr + * mov = psr + * + * mov cr.ifa = + * mov cr.itir = + * mov cr.iha = + * mov cr.ipsr = + * mov cr.ifs = + * mov cr.iip = + * mov cr.kr = + */ + +/* for intrinsics + * ssm psr.i + * rsm psr.i + * mov = psr + * mov = ivr + * mov = tpr + * mov cr.itm = + * mov eoi = + * mov rr[] = + * mov = rr[] + * mov = kr + * mov kr = + * ptc.ga + */ + +/************************************************************* + * define paravirtualized instrcution macros as nop to ingore. + * and check whether arguments are appropriate. + *************************************************************/ + +/* check whether reg is a regular register */ +.macro is_rreg_in reg + .ifc "\reg", "r0" + nop 0 + .exitm + .endif + ;; + mov \reg = r0 + ;; +.endm +#define IS_RREG_IN(reg) is_rreg_in reg ; + +#define IS_RREG_OUT(reg) \ + ;; \ + mov reg = r0 \ + ;; + +#define IS_RREG_CLOB(reg) IS_RREG_OUT(reg) + +/* check whether pred is a predicate register */ +#define IS_PRED_IN(pred) \ + ;; \ + (pred) nop 0 \ + ;; + +#define IS_PRED_OUT(pred) \ + ;; \ + cmp.eq pred, p0 = r0, r0 \ + ;; + +#define IS_PRED_CLOB(pred) IS_PRED_OUT(pred) + + +#define DO_SAVE_MIN(__COVER, SAVE_IFS, EXTRA, WORKAROUND) \ + nop 0 +#define MOV_FROM_IFA(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_ITIR(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_ISR(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IHA(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IPSR(pred, reg) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IIM(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IIP(reg) \ + IS_RREG_OUT(reg) +#define MOV_FROM_IVR(reg, clob) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) +#define MOV_FROM_PSR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IFA(reg, clob) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_ITIR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IHA(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IPSR(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IFS(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_IIP(reg, clob) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define MOV_TO_KR(kr, reg, clob0, clob1) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define ITC_I(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define ITC_D(pred, reg, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ + IS_PRED_IN(pred_i) \ + IS_PRED_IN(pred_d) \ + IS_RREG_IN(reg) \ + IS_RREG_CLOB(clob) +#define THASH(pred, reg0, reg1, clob) \ + IS_PRED_IN(pred) \ + IS_RREG_OUT(reg0) \ + IS_RREG_IN(reg1) \ + IS_RREG_CLOB(clob) +#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define RSM_PSR_IC(clob) \ + IS_RREG_CLOB(clob) +#define SSM_PSR_I(pred, pred_clob, clob) \ + IS_PRED_IN(pred) \ + IS_PRED_CLOB(pred_clob) \ + IS_RREG_CLOB(clob) +#define RSM_PSR_I(pred, clob0, clob1) \ + IS_PRED_IN(pred) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define RSM_PSR_I_IC(clob0, clob1, clob2) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) \ + IS_RREG_CLOB(clob2) +#define RSM_PSR_DT \ + nop 0 +#define SSM_PSR_DT_AND_SRLZ_I \ + nop 0 +#define BSW_0(clob0, clob1, clob2) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) \ + IS_RREG_CLOB(clob2) +#define BSW_1(clob0, clob1) \ + IS_RREG_CLOB(clob0) \ + IS_RREG_CLOB(clob1) +#define COVER \ + nop 0 +#define RFI \ + br.ret.sptk.many rp /* defining nop causes dependency error */ + +#endif /* _ASM_NATIVE_PVCHK_INST_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 87fea11aecb7..55e6ca8eebda 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -112,5 +112,23 @@ clean-files += $(objtree)/include/asm-ia64/nr-irqs.h ASM_PARAVIRT_OBJS = ivt.o entry.o define paravirtualized_native AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE +AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK +extra-y += pvchk-$(1) endef $(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj)))) + +# +# Checker for paravirtualizations of privileged operations. +# +quiet_cmd_pv_check_sed = PVCHK $@ +define cmd_pv_check_sed + sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@ +endef + +$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE + $(call if_changed_dep,as_s_S) +$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE + $(call if_changed,pv_check_sed) +$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE + $(call if_changed,as_o_S) +.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h index 5cad6fb2ed19..64d6d810c64b 100644 --- a/arch/ia64/kernel/paravirt_inst.h +++ b/arch/ia64/kernel/paravirt_inst.h @@ -20,7 +20,9 @@ * */ -#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN +#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK +#include +#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN) #include #include #else diff --git a/arch/ia64/scripts/pvcheck.sed b/arch/ia64/scripts/pvcheck.sed new file mode 100644 index 000000000000..ba66ac2e4c60 --- /dev/null +++ b/arch/ia64/scripts/pvcheck.sed @@ -0,0 +1,32 @@ +# +# Checker for paravirtualizations of privileged operations. +# +s/ssm.*psr\.ic.*/.warning \"ssm psr.ic should not be used directly\"/g +s/rsm.*psr\.ic.*/.warning \"rsm psr.ic should not be used directly\"/g +s/ssm.*psr\.i.*/.warning \"ssm psr.i should not be used directly\"/g +s/rsm.*psr\.i.*/.warning \"rsm psr.i should not be used directly\"/g +s/ssm.*psr\.dt.*/.warning \"ssm psr.dt should not be used directly\"/g +s/rsm.*psr\.dt.*/.warning \"rsm psr.dt should not be used directly\"/g +s/mov.*=.*cr\.ifa/.warning \"cr.ifa should not used directly\"/g +s/mov.*=.*cr\.itir/.warning \"cr.itir should not used directly\"/g +s/mov.*=.*cr\.isr/.warning \"cr.isr should not used directly\"/g +s/mov.*=.*cr\.iha/.warning \"cr.iha should not used directly\"/g +s/mov.*=.*cr\.ipsr/.warning \"cr.ipsr should not used directly\"/g +s/mov.*=.*cr\.iim/.warning \"cr.iim should not used directly\"/g +s/mov.*=.*cr\.iip/.warning \"cr.iip should not used directly\"/g +s/mov.*=.*cr\.ivr/.warning \"cr.ivr should not used directly\"/g +s/mov.*=[^\.]*psr/.warning \"psr should not used directly\"/g # avoid ar.fpsr +s/mov.*=.*ar\.eflags/.warning \"ar.eflags should not used directly\"/g +s/mov.*cr\.ifa.*=.*/.warning \"cr.ifa should not used directly\"/g +s/mov.*cr\.itir.*=.*/.warning \"cr.itir should not used directly\"/g +s/mov.*cr\.iha.*=.*/.warning \"cr.iha should not used directly\"/g +s/mov.*cr\.ipsr.*=.*/.warning \"cr.ipsr should not used directly\"/g +s/mov.*cr\.ifs.*=.*/.warning \"cr.ifs should not used directly\"/g +s/mov.*cr\.iip.*=.*/.warning \"cr.iip should not used directly\"/g +s/mov.*cr\.kr.*=.*/.warning \"cr.kr should not used directly\"/g +s/mov.*ar\.eflags.*=.*/.warning \"ar.eflags should not used directly\"/g +s/itc\.i.*/.warning \"itc.i should not be used directly.\"/g +s/itc\.d.*/.warning \"itc.d should not be used directly.\"/g +s/bsw\.0/.warning \"bsw.0 should not be used directly.\"/g +s/bsw\.1/.warning \"bsw.1 should not be used directly.\"/g +s/ptc\.ga.*/.warning \"ptc.ga should not be used directly.\"/g From 62fdd7678a26efadd6ac5c2869543caff77d2df0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 17 Oct 2008 12:14:13 -0700 Subject: [PATCH 38/41] [IA64] Add Variable Page Size and IA64 Support in Intel IOMMU The patch contains Intel IOMMU IA64 specific code. It defines new machvec dig_vtd, hooks for IOMMU, DMAR table detection, cache line flush function, etc. For a generic kernel with CONFIG_DMAR=y, if Intel IOMMU is detected, dig_vtd is used for machinve vector. Otherwise, kernel falls back to dig machine vector. Kernel parameter "machvec=dig" or "intel_iommu=off" can be used to force kernel to boot dig machine vector. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 17 ++++ arch/ia64/Makefile | 1 + arch/ia64/configs/generic_defconfig | 2 + arch/ia64/configs/tiger_defconfig | 2 + arch/ia64/dig/Makefile | 5 + arch/ia64/dig/dig_vtd_iommu.c | 59 +++++++++++ arch/ia64/dig/machvec_vtd.c | 3 + arch/ia64/include/asm/cacheflush.h | 2 + arch/ia64/include/asm/device.h | 3 + arch/ia64/include/asm/dma-mapping.h | 50 +++++++++ arch/ia64/include/asm/iommu.h | 16 +++ arch/ia64/include/asm/machvec.h | 2 + arch/ia64/include/asm/machvec_dig_vtd.h | 38 +++++++ arch/ia64/include/asm/machvec_init.h | 1 + arch/ia64/include/asm/pci.h | 3 + arch/ia64/include/asm/swiotlb.h | 56 ++++++++++ arch/ia64/kernel/Makefile | 4 + arch/ia64/kernel/acpi.c | 17 ++++ arch/ia64/kernel/msi_ia64.c | 80 +++++++++++++++ arch/ia64/kernel/pci-dma.c | 129 ++++++++++++++++++++++++ arch/ia64/kernel/pci-swiotlb.c | 46 +++++++++ arch/ia64/kernel/setup.c | 42 +++++--- arch/ia64/lib/flush.S | 55 ++++++++++ 23 files changed, 620 insertions(+), 13 deletions(-) create mode 100644 arch/ia64/dig/dig_vtd_iommu.c create mode 100644 arch/ia64/dig/machvec_vtd.c create mode 100644 arch/ia64/include/asm/iommu.h create mode 100644 arch/ia64/include/asm/machvec_dig_vtd.h create mode 100644 arch/ia64/include/asm/swiotlb.h create mode 100644 arch/ia64/kernel/pci-dma.c create mode 100644 arch/ia64/kernel/pci-swiotlb.c diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3b7aa38254a8..a924a84df325 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -117,6 +117,7 @@ config IA64_GENERIC select NUMA select ACPI_NUMA select SWIOTLB + select PCI_MSI help This selects the system type of your hardware. A "generic" kernel will run on any supported IA-64 system. However, if you configure @@ -124,6 +125,7 @@ config IA64_GENERIC generic For any supported IA-64 system DIG-compliant For DIG ("Developer's Interface Guide") compliant systems + DIG+Intel+IOMMU For DIG systems with Intel IOMMU HP-zx1/sx1000 For HP systems HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices. SGI-SN2 For SGI Altix systems @@ -136,6 +138,11 @@ config IA64_DIG bool "DIG-compliant" select SWIOTLB +config IA64_DIG_VTD + bool "DIG+Intel+IOMMU" + select DMAR + select PCI_MSI + config IA64_HP_ZX1 bool "HP-zx1/sx1000" help @@ -581,6 +588,16 @@ source "drivers/pci/hotplug/Kconfig" source "drivers/pcmcia/Kconfig" +config DMAR + bool "Support for DMA Remapping Devices (EXPERIMENTAL)" + depends on IA64_GENERIC && ACPI && EXPERIMENTAL + help + DMA remapping (DMAR) devices support enables independent address + translations for Direct Memory Access (DMA) from devices. + These DMA remapping devices are reported via ACPI tables + and include PCI device scope covered by these DMA + remapping devices. + endmenu endif diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 905d25b13d5a..04cecee1ed1b 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -53,6 +53,7 @@ libs-y += arch/ia64/lib/ core-y += arch/ia64/kernel/ arch/ia64/mm/ core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/ core-$(CONFIG_IA64_DIG) += arch/ia64/dig/ +core-$(CONFIG_IA64_DIG_VTD) += arch/ia64/dig/ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 9f483976228f..e05f9e1d3faa 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -233,6 +233,8 @@ CONFIG_DMIID=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m +# CONFIG_DMAR is not set + # # Power management and ACPI # diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 797acf9066c1..c522edf23c62 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -172,6 +172,8 @@ CONFIG_DMIID=y CONFIG_BINFMT_ELF=y CONFIG_BINFMT_MISC=m +# CONFIG_DMAR is not set + # # Power management and ACPI # diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile index 971cd7870dd4..5c0283830bd6 100644 --- a/arch/ia64/dig/Makefile +++ b/arch/ia64/dig/Makefile @@ -6,4 +6,9 @@ # obj-y := setup.o +ifeq ($(CONFIG_DMAR), y) +obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o +else obj-$(CONFIG_IA64_GENERIC) += machvec.o +endif +obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o diff --git a/arch/ia64/dig/dig_vtd_iommu.c b/arch/ia64/dig/dig_vtd_iommu.c new file mode 100644 index 000000000000..1c8a079017a3 --- /dev/null +++ b/arch/ia64/dig/dig_vtd_iommu.c @@ -0,0 +1,59 @@ +#include +#include +#include +#include + +void * +vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags) +{ + return intel_alloc_coherent(dev, size, dma_handle, flags); +} +EXPORT_SYMBOL_GPL(vtd_alloc_coherent); + +void +vtd_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + intel_free_coherent(dev, size, vaddr, dma_handle); +} +EXPORT_SYMBOL_GPL(vtd_free_coherent); + +dma_addr_t +vtd_map_single_attrs(struct device *dev, void *addr, size_t size, + int dir, struct dma_attrs *attrs) +{ + return intel_map_single(dev, (phys_addr_t)addr, size, dir); +} +EXPORT_SYMBOL_GPL(vtd_map_single_attrs); + +void +vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size, + int dir, struct dma_attrs *attrs) +{ + intel_unmap_single(dev, iova, size, dir); +} +EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs); + +int +vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents, + int dir, struct dma_attrs *attrs) +{ + return intel_map_sg(dev, sglist, nents, dir); +} +EXPORT_SYMBOL_GPL(vtd_map_sg_attrs); + +void +vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, int dir, struct dma_attrs *attrs) +{ + intel_unmap_sg(dev, sglist, nents, dir); +} +EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs); + +int +vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ + return 0; +} +EXPORT_SYMBOL_GPL(vtd_dma_mapping_error); diff --git a/arch/ia64/dig/machvec_vtd.c b/arch/ia64/dig/machvec_vtd.c new file mode 100644 index 000000000000..7cd3eb471cad --- /dev/null +++ b/arch/ia64/dig/machvec_vtd.c @@ -0,0 +1,3 @@ +#define MACHVEC_PLATFORM_NAME dig_vtd +#define MACHVEC_PLATFORM_HEADER +#include diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h index afcfbda76e20..c8ce2719fee8 100644 --- a/arch/ia64/include/asm/cacheflush.h +++ b/arch/ia64/include/asm/cacheflush.h @@ -34,6 +34,8 @@ do { \ #define flush_dcache_mmap_unlock(mapping) do { } while (0) extern void flush_icache_range (unsigned long start, unsigned long end); +extern void clflush_cache_range(void *addr, int size); + #define flush_icache_user_range(vma, page, user_addr, len) \ do { \ diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index 3db6daf7f251..41ab85d66f33 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -10,6 +10,9 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif +#ifdef CONFIG_DMAR + void *iommu; /* hook for IOMMU specific extension */ +#endif }; #endif /* _ASM_IA64_DEVICE_H */ diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 06ff1ba21465..bbab7e2b0fc9 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -7,6 +7,49 @@ */ #include #include +#include + +struct dma_mapping_ops { + int (*mapping_error)(struct device *dev, + dma_addr_t dma_addr); + void* (*alloc_coherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + void (*free_coherent)(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr, + size_t size, int direction); + void (*unmap_single)(struct device *dev, dma_addr_t addr, + size_t size, int direction); + void (*sync_single_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_for_device)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_range_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_single_range_for_device)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_sg_for_cpu)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + void (*sync_sg_for_device)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + int (*map_sg)(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + void (*unmap_sg)(struct device *hwdev, + struct scatterlist *sg, int nents, + int direction); + int (*dma_supported_op)(struct device *hwdev, u64 mask); + int is_phys; +}; + +extern struct dma_mapping_ops *dma_ops; +extern struct ia64_machine_vector ia64_mv; +extern void set_iommu_machvec(void); #define dma_alloc_coherent(dev, size, handle, gfp) \ platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA) @@ -96,4 +139,11 @@ dma_cache_sync (struct device *dev, void *vaddr, size_t size, #define dma_is_consistent(d, h) (1) /* all we do is coherent memory... */ +static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) +{ + return dma_ops; +} + + + #endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h new file mode 100644 index 000000000000..5fb2bb93de3b --- /dev/null +++ b/arch/ia64/include/asm/iommu.h @@ -0,0 +1,16 @@ +#ifndef _ASM_IA64_IOMMU_H +#define _ASM_IA64_IOMMU_H 1 + +#define cpu_has_x2apic 0 +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) + +extern void pci_iommu_shutdown(void); +extern void no_iommu_init(void); +extern int force_iommu, no_iommu; +extern int iommu_detected; +extern void iommu_dma_init(void); +extern void machvec_init(const char *name); +extern int forbid_dac; + +#endif diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 2b850ccafef5..ce9b1d05083a 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -120,6 +120,8 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # include # elif defined (CONFIG_IA64_DIG) # include +# elif defined(CONFIG_IA64_DIG_VTD) +# include # elif defined (CONFIG_IA64_HP_ZX1) # include # elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) diff --git a/arch/ia64/include/asm/machvec_dig_vtd.h b/arch/ia64/include/asm/machvec_dig_vtd.h new file mode 100644 index 000000000000..3400b561e711 --- /dev/null +++ b/arch/ia64/include/asm/machvec_dig_vtd.h @@ -0,0 +1,38 @@ +#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h +#define _ASM_IA64_MACHVEC_DIG_VTD_h + +extern ia64_mv_setup_t dig_setup; +extern ia64_mv_dma_alloc_coherent vtd_alloc_coherent; +extern ia64_mv_dma_free_coherent vtd_free_coherent; +extern ia64_mv_dma_map_single_attrs vtd_map_single_attrs; +extern ia64_mv_dma_unmap_single_attrs vtd_unmap_single_attrs; +extern ia64_mv_dma_map_sg_attrs vtd_map_sg_attrs; +extern ia64_mv_dma_unmap_sg_attrs vtd_unmap_sg_attrs; +extern ia64_mv_dma_supported iommu_dma_supported; +extern ia64_mv_dma_mapping_error vtd_dma_mapping_error; +extern ia64_mv_dma_init pci_iommu_alloc; + +/* + * This stuff has dual use! + * + * For a generic kernel, the macros are used to initialize the + * platform's machvec structure. When compiling a non-generic kernel, + * the macros are used directly. + */ +#define platform_name "dig_vtd" +#define platform_setup dig_setup +#define platform_dma_init pci_iommu_alloc +#define platform_dma_alloc_coherent vtd_alloc_coherent +#define platform_dma_free_coherent vtd_free_coherent +#define platform_dma_map_single_attrs vtd_map_single_attrs +#define platform_dma_unmap_single_attrs vtd_unmap_single_attrs +#define platform_dma_map_sg_attrs vtd_map_sg_attrs +#define platform_dma_unmap_sg_attrs vtd_unmap_sg_attrs +#define platform_dma_sync_single_for_cpu machvec_dma_sync_single +#define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg +#define platform_dma_sync_single_for_device machvec_dma_sync_single +#define platform_dma_sync_sg_for_device machvec_dma_sync_sg +#define platform_dma_supported iommu_dma_supported +#define platform_dma_mapping_error vtd_dma_mapping_error + +#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */ diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h index 7f21249fba3f..ef964b286842 100644 --- a/arch/ia64/include/asm/machvec_init.h +++ b/arch/ia64/include/asm/machvec_init.h @@ -1,3 +1,4 @@ +#include #include extern ia64_mv_send_ipi_t ia64_send_ipi; diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 0149097b736d..645ef06ddaa4 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -164,4 +164,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14); } +#ifdef CONFIG_DMAR +extern void pci_iommu_alloc(void); +#endif #endif /* _ASM_IA64_PCI_H */ diff --git a/arch/ia64/include/asm/swiotlb.h b/arch/ia64/include/asm/swiotlb.h new file mode 100644 index 000000000000..fb79423834d0 --- /dev/null +++ b/arch/ia64/include/asm/swiotlb.h @@ -0,0 +1,56 @@ +#ifndef ASM_IA64__SWIOTLB_H +#define ASM_IA64__SWIOTLB_H + +#include + +/* SWIOTLB interface */ + +extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, + size_t size, int dir); +extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags); +extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_for_device(struct device *hwdev, + dma_addr_t dev_addr, + size_t size, int dir); +extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, + dma_addr_t dev_addr, + unsigned long offset, + size_t size, int dir); +extern void swiotlb_sync_single_range_for_device(struct device *hwdev, + dma_addr_t dev_addr, + unsigned long offset, + size_t size, int dir); +extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern void swiotlb_sync_sg_for_device(struct device *hwdev, + struct scatterlist *sg, int nelems, + int dir); +extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); +extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); +extern void swiotlb_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle); +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); +extern void swiotlb_init(void); + +extern int swiotlb_force; + +#ifdef CONFIG_SWIOTLB +extern int swiotlb; +extern void pci_swiotlb_init(void); +#else +#define swiotlb 0 +static inline void pci_swiotlb_init(void) +{ +} +#endif + +#endif /* ASM_IA64__SWIOTLB_H */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 87fea11aecb7..af0e750705e3 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif +obj-$(CONFIG_DMAR) += pci-dma.o +ifeq ($(CONFIG_DMAR), y) +obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +endif # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5d1eb7ee2bf6..8cc2f8a610c4 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -91,6 +91,9 @@ acpi_get_sysname(void) struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; +#ifdef CONFIG_DMAR + u64 i, nentries; +#endif rsdp_phys = acpi_find_rsdp(); if (!rsdp_phys) { @@ -123,6 +126,18 @@ acpi_get_sysname(void) return "sn2"; } +#ifdef CONFIG_DMAR + /* Look for Intel IOMMU */ + nentries = (hdr->length - sizeof(*hdr)) / + sizeof(xsdt->table_offset_entry[0]); + for (i = 0; i < nentries; i++) { + hdr = __va(xsdt->table_offset_entry[i]); + if (strncmp(hdr->signature, ACPI_SIG_DMAR, + sizeof(ACPI_SIG_DMAR) - 1) == 0) + return "dig_vtd"; + } +#endif + return "dig"; #else # if defined (CONFIG_IA64_HP_SIM) @@ -137,6 +152,8 @@ acpi_get_sysname(void) return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; +# elif defined(CONFIG_IA64_DIG_VTD) + return "dig_vtd"; # else # error Unknown platform. Fix acpi.c. # endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 60c6ef67ebb2..702a09c13238 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } + +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + int cpu = first_cpu(mask); + + + if (!cpu_online(cpu)) + return; + + if (irq_prepare_move(irq, cpu)) + return; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DESTID_MASK; + msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ia64_msi_retrigger_irq, +}; + +static int +msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned dest; + cpumask_t mask; + + cpus_and(mask, irq_to_domain(irq), cpu_online_map); + dest = cpu_physical_id(first_cpu(mask)); + + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DESTID_CPU(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(cfg->vector); + return 0; +} + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif /* CONFIG_DMAR */ + diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 000000000000..10a75b557650 --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,129 @@ +/* + * Dynamic DMA mapping support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_DMAR + +#include +#include + +#include +#include + +dma_addr_t bad_dma_address __read_mostly; +EXPORT_SYMBOL(bad_dma_address); + +static int iommu_sac_force __read_mostly; + +int no_iommu __read_mostly; +#ifdef CONFIG_IOMMU_DEBUG +int force_iommu __read_mostly = 1; +#else +int force_iommu __read_mostly; +#endif + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + +void __init pci_iommu_alloc(void) +{ + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + +static int __init pci_iommu_init(void) +{ + if (iommu_detected) + intel_iommu_init(); + + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); + +void pci_iommu_shutdown(void) +{ + return; +} + +void __init +iommu_dma_init(void) +{ + return; +} + +struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + dev_info(dev, "Disallowing DAC for device\n"); + return 0; + } +#endif + + if (ops->dma_supported_op) + return ops->dma_supported_op(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + dev_info(dev, "Force SAC with mask %lx\n", mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(iommu_dma_supported); + +#endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c new file mode 100644 index 000000000000..16c50516dbc1 --- /dev/null +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -0,0 +1,46 @@ +/* Glue code to lib/swiotlb.c */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +int swiotlb __read_mostly; +EXPORT_SYMBOL(swiotlb); + +struct dma_mapping_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc_coherent = swiotlb_alloc_coherent, + .free_coherent = swiotlb_free_coherent, + .map_single = swiotlb_map_single, + .unmap_single = swiotlb_unmap_single, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg, + .unmap_sg = swiotlb_unmap_sg, + .dma_supported_op = swiotlb_dma_supported, +}; + +void __init pci_swiotlb_init(void) +{ + if (!iommu_detected) { +#ifdef CONFIG_IA64_GENERIC + swiotlb = 1; + printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); + machvec_init("dig"); + swiotlb_init(); + dma_ops = &swiotlb_dma_ops; +#else + panic("Unable to find Intel IOMMU"); +#endif + } +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index de636b215677..2a67a74a48fe 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -116,6 +116,13 @@ unsigned int num_io_spaces; */ #define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ unsigned long ia64_i_cache_stride_shift = ~0; +/* + * "clflush_cache_range()" needs to know what processor dependent stride size to + * use when it flushes cache lines including both d-cache and i-cache. + */ +/* Safest way to go: 32 bytes by 32 bytes */ +#define CACHE_STRIDE_SHIFT 5 +unsigned long ia64_cache_stride_shift = ~0; /* * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This @@ -847,13 +854,14 @@ setup_per_cpu_areas (void) } /* - * Calculate the max. cache line size. + * Do the following calculations: * - * In addition, the minimum of the i-cache stride sizes is calculated for - * "flush_icache_range()". + * 1. the max. cache line size. + * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". + * 3. the minimum of the cache stride sizes for "clflush_cache_range()". */ static void __cpuinit -get_max_cacheline_size (void) +get_cache_info(void) { unsigned long line_size, max = 1; u64 l, levels, unique_caches; @@ -867,12 +875,14 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* Safest setup for "flush_icache_range()" */ ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + /* Safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; goto out; } for (l = 0; l < levels; ++l) { - status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, - &cci); + /* cache_type (data_or_unified)=2 */ + status = ia64_pal_cache_config_info(l, 2, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", @@ -880,15 +890,21 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* The safest setup for "flush_icache_range()" */ cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + /* The safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; cci.pcci_unified = 1; + } else { + if (cci.pcci_stride < ia64_cache_stride_shift) + ia64_cache_stride_shift = cci.pcci_stride; + + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; } - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; + if (!cci.pcci_unified) { - status = ia64_pal_cache_config_info(l, - /* cache_type (instruction)= */ 1, - &cci); + /* cache_type (instruction)=1*/ + status = ia64_pal_cache_config_info(l, 1, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", @@ -942,7 +958,7 @@ cpu_init (void) } #endif - get_max_cacheline_size(); + get_cache_info(); /* * We can't pass "local_cpu_data" to identify_cpu() because we haven't called diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S index 2a0d27f2f21b..1d8c88860063 100644 --- a/arch/ia64/lib/flush.S +++ b/arch/ia64/lib/flush.S @@ -60,3 +60,58 @@ GLOBAL_ENTRY(flush_icache_range) mov ar.lc=r3 // restore ar.lc br.ret.sptk.many rp END(flush_icache_range) + + /* + * clflush_cache_range(start,size) + * + * Flush cache lines from start to start+size-1. + * + * Must deal with range from start to start+size-1 but nothing else + * (need to be careful not to touch addresses that may be + * unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ + .section .kprobes.text,"ax" +GLOBAL_ENTRY(clflush_cache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_cache_stride_shift + mov r21=1 + add r22=in1,in0 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=r22,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc" = + // "start" rounded down to stride + // boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop_fc: + fc r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop_fc + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(clflush_cache_range) From 9f3541ed2cc5e74a4ace7052701055dedbe46938 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Sep 2008 18:16:52 +0200 Subject: [PATCH 39/41] [IA64] remove sys32_pause It's just a duplicate of the native sys_pause, which we can use after defining __ARCH_WANT_SYS_PAUSE. Signed-off-by: Christoph Hellwig Signed-off-by: Tony Luck --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/ia32/sys_ia32.c | 8 -------- arch/ia64/include/asm/unistd.h | 1 + 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 53505bb04771..fa9844305610 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -202,7 +202,7 @@ ia32_syscall_table: data8 sys32_ptrace data8 sys32_alarm data8 sys_ni_syscall - data8 sys32_pause + data8 sys_pause data8 compat_sys_utime /* 30 */ data8 sys_ni_syscall /* old stty syscall holder */ data8 sys_ni_syscall /* old gtty syscall holder */ diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index f4430bb4bbdc..0fd06b471bc1 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1703,14 +1703,6 @@ out: return ret; } -asmlinkage int -sys32_pause (void) -{ - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; -} - asmlinkage int sys32_msync (unsigned int start, unsigned int len, int flags) { diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index d535833aab5e..f791576355ad 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -337,6 +337,7 @@ # define __ARCH_WANT_SYS_NICE # define __ARCH_WANT_SYS_OLD_GETRLIMIT # define __ARCH_WANT_SYS_OLDUMOUNT +# define __ARCH_WANT_SYS_PAUSE # define __ARCH_WANT_SYS_SIGPENDING # define __ARCH_WANT_SYS_SIGPROCMASK # define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND From 0f32dc9dc2cfe3e4608fd5cf7afa5d2acb6e3c10 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 Sep 2008 18:18:10 +0200 Subject: [PATCH 40/41] [IA64] kill sys32_pipe It's just a duplicate of the generic sys_pipe that still lacks the recently added error handling. Signed-off-by: Christoph Hellwig Signed-off-by: Tony Luck --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/ia32/sys_ia32.c | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index fa9844305610..82a7a0bc5d2e 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -215,7 +215,7 @@ ia32_syscall_table: data8 sys_mkdir data8 sys_rmdir /* 40 */ data8 sys_dup - data8 sys32_pipe + data8 sys_pipe data8 compat_sys_times data8 sys_ni_syscall /* old prof syscall holder */ data8 sys32_brk /* 45 */ diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 0fd06b471bc1..800a7174a5cb 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1098,21 +1098,6 @@ sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len, return ret; } -asmlinkage long -sys32_pipe (int __user *fd) -{ - int retval; - int fds[2]; - - retval = do_pipe_flags(fds, 0); - if (retval) - goto out; - if (copy_to_user(fd, fds, sizeof(fds))) - retval = -EFAULT; - out: - return retval; -} - asmlinkage unsigned long sys32_alarm (unsigned int seconds) { From a9894a4a3c7fb53258d46dafe9dd4f45369fd9dd Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 17 Oct 2008 13:47:53 -0700 Subject: [PATCH 41/41] [IA64] Fix annoying IA64_TR_ALLOC_MAX message. Madison cpus support 64 TR registers. Increase IA64_TR_ALLOC_MAX to 64. Also fixup the messages that get printed when this limit is exceeded. Repeating for every cpu is too noisy. Signed-off-by: Tony Luck --- arch/ia64/include/asm/kregs.h | 2 +- arch/ia64/mm/tlb.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h index aefcdfee7f23..39e65f6639f5 100644 --- a/arch/ia64/include/asm/kregs.h +++ b/arch/ia64/include/asm/kregs.h @@ -32,7 +32,7 @@ #define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */ #define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/ -#define IA64_TR_ALLOC_MAX 32 /* Max number for dynamic use*/ +#define IA64_TR_ALLOC_MAX 64 /* Max number for dynamic use*/ /* Processor status register bits: */ #define IA64_PSR_BE_BIT 1 diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 8caf42471f0d..bd9818a36b47 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -362,9 +362,13 @@ ia64_tlb_init (void) per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { + static int justonce = 1; per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; - printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!" - "IA64_TR_ALLOC_MAX should be extended\n"); + if (justonce) { + justonce = 0; + printk(KERN_DEBUG "TR register number exceeds " + "IA64_TR_ALLOC_MAX!\n"); + } } }