diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index ba86bfba95ac..d73bd4db5e84 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -706,6 +706,13 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" + depends on SMP && HOTPLUG && EXPERIMENTAL + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + source "kernel/Kconfig.preempt" config GUSA diff --git a/arch/sh/boards/board-urquell.c b/arch/sh/boards/board-urquell.c index a9bd6e3ee10b..d81c609decc7 100644 --- a/arch/sh/boards/board-urquell.c +++ b/arch/sh/boards/board-urquell.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * bit 1234 5678 @@ -203,6 +204,8 @@ static void __init urquell_setup(char **cmdline_p) printk(KERN_INFO "Renesas Technology Corp. Urquell support.\n"); pm_power_off = urquell_power_off; + + register_smp_ops(&shx3_smp_ops); } /* diff --git a/arch/sh/boards/mach-sdk7786/setup.c b/arch/sh/boards/mach-sdk7786/setup.c index 0c057a93fe29..2ec1ea5cf8ef 100644 --- a/arch/sh/boards/mach-sdk7786/setup.c +++ b/arch/sh/boards/mach-sdk7786/setup.c @@ -21,6 +21,7 @@ #include #include #include +#include static struct resource heartbeat_resource = { .start = 0x07fff8b0, @@ -189,6 +190,8 @@ static void __init sdk7786_setup(char **cmdline_p) machine_ops.restart = sdk7786_restart; pm_power_off = sdk7786_power_off; + + register_smp_ops(&shx3_smp_ops); } /* diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c index e284592fd42a..102bf56befb4 100644 --- a/arch/sh/boards/mach-x3proto/setup.c +++ b/arch/sh/boards/mach-x3proto/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include static struct resource heartbeat_resources[] = { [0] = { @@ -152,7 +153,13 @@ static void __init x3proto_init_irq(void) __raw_writel(__raw_readl(0xfe410000) | (1 << 21), 0xfe410000); } +static void __init x3proto_setup(char **cmdline_p) +{ + register_smp_ops(&shx3_smp_ops); +} + static struct sh_machine_vector mv_x3proto __initmv = { .mv_name = "x3proto", + .mv_setup = x3proto_setup, .mv_init_irq = x3proto_init_irq, }; diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h index 99c593b3a827..02c2f0102cfa 100644 --- a/arch/sh/include/asm/irq.h +++ b/arch/sh/include/asm/irq.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_IRQ_H #define __ASM_SH_IRQ_H +#include #include /* @@ -50,6 +51,8 @@ static inline int generic_irq_demux(int irq) #define irq_demux(irq) sh_mv.mv_irq_demux(irq) void init_IRQ(void); +void migrate_irqs(void); + asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs); #ifdef CONFIG_IRQSTACKS diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h index 26b3f026eec9..0a58cb25a658 100644 --- a/arch/sh/include/asm/processor.h +++ b/arch/sh/include/asm/processor.h @@ -85,6 +85,10 @@ struct sh_cpuinfo { struct tlb_info itlb; struct tlb_info dtlb; +#ifdef CONFIG_SMP + struct task_struct *idle; +#endif + unsigned long flags; } __attribute__ ((aligned(L1_CACHE_BYTES))); diff --git a/arch/sh/include/asm/smp-ops.h b/arch/sh/include/asm/smp-ops.h new file mode 100644 index 000000000000..c590f76856f1 --- /dev/null +++ b/arch/sh/include/asm/smp-ops.h @@ -0,0 +1,51 @@ +#ifndef __ASM_SH_SMP_OPS_H +#define __ASM_SH_SMP_OPS_H + +struct plat_smp_ops { + void (*smp_setup)(void); + unsigned int (*smp_processor_id)(void); + void (*prepare_cpus)(unsigned int max_cpus); + void (*start_cpu)(unsigned int cpu, unsigned long entry_point); + void (*send_ipi)(unsigned int cpu, unsigned int message); + int (*cpu_disable)(unsigned int cpu); + void (*cpu_die)(unsigned int cpu); + void (*play_dead)(void); +}; + +extern struct plat_smp_ops *mp_ops; +extern struct plat_smp_ops shx3_smp_ops; + +#ifdef CONFIG_SMP + +static inline void plat_smp_setup(void) +{ + BUG_ON(!mp_ops); + mp_ops->smp_setup(); +} + +static inline void play_dead(void) +{ + mp_ops->play_dead(); +} + +extern void register_smp_ops(struct plat_smp_ops *ops); + +#else + +static inline void plat_smp_setup(void) +{ + /* UP, nothing to do ... */ +} + +static inline void register_smp_ops(struct plat_smp_ops *ops) +{ +} + +static inline void play_dead(void) +{ + BUG(); +} + +#endif /* CONFIG_SMP */ + +#endif /* __ASM_SH_SMP_OPS_H */ diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h index 53ef26ced75f..9070d943ddde 100644 --- a/arch/sh/include/asm/smp.h +++ b/arch/sh/include/asm/smp.h @@ -3,15 +3,16 @@ #include #include +#include #ifdef CONFIG_SMP #include #include #include +#include #define raw_smp_processor_id() (current_thread_info()->cpu) -#define hard_smp_processor_id() plat_smp_processor_id() /* Map from cpu id to sequential logical cpu number. */ extern int __cpu_number_map[NR_CPUS]; @@ -30,20 +31,43 @@ enum { SMP_MSG_NR, /* must be last */ }; +DECLARE_PER_CPU(int, cpu_state); + void smp_message_recv(unsigned int msg); void smp_timer_broadcast(const struct cpumask *mask); void local_timer_interrupt(void); void local_timer_setup(unsigned int cpu); - -void plat_smp_setup(void); -void plat_prepare_cpus(unsigned int max_cpus); -int plat_smp_processor_id(void); -void plat_start_cpu(unsigned int cpu, unsigned long entry_point); -void plat_send_ipi(unsigned int cpu, unsigned int message); +void local_timer_stop(unsigned int cpu); void arch_send_call_function_single_ipi(int cpu); -extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); +void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +void native_play_dead(void); +void native_cpu_die(unsigned int cpu); +int native_cpu_disable(unsigned int cpu); + +#ifdef CONFIG_HOTPLUG_CPU +void play_dead_common(void); +extern int __cpu_disable(void); + +static inline void __cpu_die(unsigned int cpu) +{ + extern struct plat_smp_ops *mp_ops; /* private */ + + mp_ops->cpu_die(cpu); +} +#endif + +static inline int hard_smp_processor_id(void) +{ + extern struct plat_smp_ops *mp_ops; /* private */ + + if (!mp_ops) + return 0; /* boot CPU */ + + return mp_ops->smp_processor_id(); +} #else diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index c98b4574c44e..de865cac02ee 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -1,7 +1,7 @@ /* * SH-X3 SMP * - * Copyright (C) 2007 - 2008 Paul Mundt + * Copyright (C) 2007 - 2010 Paul Mundt * Copyright (C) 2007 Magnus Damm * * This file is subject to the terms and conditions of the GNU General Public @@ -9,16 +9,22 @@ * for more details. */ #include +#include #include #include #include #include +#include +#include +#include +#include #define STBCR_REG(phys_id) (0xfe400004 | (phys_id << 12)) #define RESET_REG(phys_id) (0xfe400008 | (phys_id << 12)) #define STBCR_MSTP 0x00000001 #define STBCR_RESET 0x00000002 +#define STBCR_SLEEP 0x00000004 #define STBCR_LTSLP 0x80000000 static irqreturn_t ipi_interrupt_handler(int irq, void *arg) @@ -37,7 +43,7 @@ static irqreturn_t ipi_interrupt_handler(int irq, void *arg) return IRQ_HANDLED; } -void __init plat_smp_setup(void) +static void shx3_smp_setup(void) { unsigned int cpu = 0; int i, num; @@ -63,7 +69,7 @@ void __init plat_smp_setup(void) printk(KERN_INFO "Detected %i available secondary CPU(s)\n", num); } -void __init plat_prepare_cpus(unsigned int max_cpus) +static void shx3_prepare_cpus(unsigned int max_cpus) { int i; @@ -74,9 +80,12 @@ void __init plat_prepare_cpus(unsigned int max_cpus) for (i = 0; i < SMP_MSG_NR; i++) request_irq(104 + i, ipi_interrupt_handler, IRQF_DISABLED | IRQF_PERCPU, "IPI", (void *)(long)i); + + for (i = 0; i < max_cpus; i++) + set_cpu_present(i, true); } -void plat_start_cpu(unsigned int cpu, unsigned long entry_point) +static void shx3_start_cpu(unsigned int cpu, unsigned long entry_point) { if (__in_29bit_mode()) __raw_writel(entry_point, RESET_REG(cpu)); @@ -93,12 +102,12 @@ void plat_start_cpu(unsigned int cpu, unsigned long entry_point) __raw_writel(STBCR_RESET | STBCR_LTSLP, STBCR_REG(cpu)); } -int plat_smp_processor_id(void) +static unsigned int shx3_smp_processor_id(void) { return __raw_readl(0xff000048); /* CPIDR */ } -void plat_send_ipi(unsigned int cpu, unsigned int message) +static void shx3_send_ipi(unsigned int cpu, unsigned int message) { unsigned long addr = 0xfe410070 + (cpu * 4); @@ -106,3 +115,52 @@ void plat_send_ipi(unsigned int cpu, unsigned int message) __raw_writel(1 << (message << 2), addr); /* C0INTICI..CnINTICI */ } + +static void shx3_update_boot_vector(unsigned int cpu) +{ + __raw_writel(STBCR_MSTP, STBCR_REG(cpu)); + while (!(__raw_readl(STBCR_REG(cpu)) & STBCR_MSTP)) + cpu_relax(); + __raw_writel(STBCR_RESET, STBCR_REG(cpu)); +} + +static int __cpuinit +shx3_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned int)hcpu; + + switch (action) { + case CPU_UP_PREPARE: + shx3_update_boot_vector(cpu); + break; + case CPU_ONLINE: + pr_info("CPU %u is now online\n", cpu); + break; + case CPU_DEAD: + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata shx3_cpu_notifier = { + .notifier_call = shx3_cpu_callback, +}; + +static int __cpuinit register_shx3_cpu_notifier(void) +{ + register_hotcpu_notifier(&shx3_cpu_notifier); + return 0; +} +late_initcall(register_shx3_cpu_notifier); + +struct plat_smp_ops shx3_smp_ops = { + .smp_setup = shx3_smp_setup, + .prepare_cpus = shx3_prepare_cpus, + .start_cpu = shx3_start_cpu, + .smp_processor_id = shx3_smp_processor_id, + .send_ipi = shx3_send_ipi, + .cpu_die = native_cpu_die, + .cpu_disable = native_cpu_disable, + .play_dead = native_play_dead, +}; diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 273f890b17ae..425d604e3a28 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -19,6 +19,7 @@ #include #include #include +#include void (*pm_idle)(void) = NULL; @@ -89,10 +90,13 @@ void cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(1); - while (!need_resched() && cpu_online(cpu)) { + while (!need_resched()) { check_pgt_cache(); rmb(); + if (cpu_is_offline(cpu)) + play_dead(); + local_irq_disable(); /* Don't trace irqs off for idle */ stop_critical_timings(); @@ -133,7 +137,7 @@ static void do_nothing(void *unused) void stop_this_cpu(void *unused) { local_irq_disable(); - cpu_clear(smp_processor_id(), cpu_online_map); + set_cpu_online(smp_processor_id(), false); for (;;) cpu_sleep(); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index f6a9319c28e2..257de1f0692b 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -292,3 +293,44 @@ int __init arch_probe_nr_irqs(void) return 0; } #endif + +#ifdef CONFIG_HOTPLUG_CPU +static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) +{ + printk(KERN_INFO "IRQ%u: moving from cpu%u to cpu%u\n", + irq, desc->node, cpu); + + raw_spin_lock_irq(&desc->lock); + desc->chip->set_affinity(irq, cpumask_of(cpu)); + raw_spin_unlock_irq(&desc->lock); +} + +/* + * The CPU has been marked offline. Migrate IRQs off this CPU. If + * the affinity settings do not allow other CPUs, force them onto any + * available CPU. + */ +void migrate_irqs(void) +{ + struct irq_desc *desc; + unsigned int irq, cpu = smp_processor_id(); + + for_each_irq_desc(irq, desc) { + if (desc->node == cpu) { + unsigned int newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + if (newcpu >= nr_cpu_ids) { + if (printk_ratelimit()) + printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", + irq, cpu); + + cpumask_setall(desc->affinity); + newcpu = cpumask_any_and(desc->affinity, + cpu_online_mask); + } + + route_irq(desc, irq, newcpu); + } + } +} +#endif diff --git a/arch/sh/kernel/localtimer.c b/arch/sh/kernel/localtimer.c index 0b04e7d4a9b9..8bfc6dfa8b94 100644 --- a/arch/sh/kernel/localtimer.c +++ b/arch/sh/kernel/localtimer.c @@ -44,7 +44,7 @@ static void dummy_timer_set_mode(enum clock_event_mode mode, { } -void __cpuinit local_timer_setup(unsigned int cpu) +void local_timer_setup(unsigned int cpu) { struct clock_event_device *clk = &per_cpu(local_clockevent, cpu); @@ -60,3 +60,7 @@ void __cpuinit local_timer_setup(unsigned int cpu) clockevents_register_device(clk); } + +void local_timer_stop(unsigned int cpu) +{ +} diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 8870d6ba64bf..14735d5ede52 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -39,6 +39,7 @@ #include #include #include +#include #include /* @@ -459,9 +460,7 @@ void __init setup_arch(char **cmdline_p) if (likely(sh_mv.mv_setup)) sh_mv.mv_setup(cmdline_p); -#ifdef CONFIG_SMP plat_smp_setup(); -#endif } /* processor boot mode configuration */ diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 002cc612deef..86cd6f94b53b 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -3,7 +3,7 @@ * * SMP support for the SuperH processors. * - * Copyright (C) 2002 - 2008 Paul Mundt + * Copyright (C) 2002 - 2010 Paul Mundt * Copyright (C) 2006 - 2007 Akio Idehara * * This file is subject to the terms and conditions of the GNU General Public @@ -31,7 +31,20 @@ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ int __cpu_logical_map[NR_CPUS]; /* Map logical to physical */ -static inline void __init smp_store_cpu_info(unsigned int cpu) +struct plat_smp_ops *mp_ops = NULL; + +/* State of each CPU */ +DEFINE_PER_CPU(int, cpu_state) = { 0 }; + +void __cpuinit register_smp_ops(struct plat_smp_ops *ops) +{ + if (mp_ops) + printk(KERN_WARNING "Overriding previously set SMP ops\n"); + + mp_ops = ops; +} + +static inline void __cpuinit smp_store_cpu_info(unsigned int cpu) { struct sh_cpuinfo *c = cpu_data + cpu; @@ -46,14 +59,14 @@ void __init smp_prepare_cpus(unsigned int max_cpus) init_new_context(current, &init_mm); current_thread_info()->cpu = cpu; - plat_prepare_cpus(max_cpus); + mp_ops->prepare_cpus(max_cpus); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(&cpu_possible_map); #endif } -void __devinit smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { unsigned int cpu = smp_processor_id(); @@ -62,37 +75,137 @@ void __devinit smp_prepare_boot_cpu(void) set_cpu_online(cpu, true); set_cpu_possible(cpu, true); + + per_cpu(cpu_state, cpu) = CPU_ONLINE; } +#ifdef CONFIG_HOTPLUG_CPU +void native_cpu_die(unsigned int cpu) +{ + unsigned int i; + + for (i = 0; i < 10; i++) { + smp_rmb(); + if (per_cpu(cpu_state, cpu) == CPU_DEAD) { + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); + + return; + } + + msleep(100); + } + + pr_err("CPU %u didn't die...\n", cpu); +} + +int native_cpu_disable(unsigned int cpu) +{ + return cpu == 0 ? -EPERM : 0; +} + +void play_dead_common(void) +{ + idle_task_exit(); + irq_ctx_exit(raw_smp_processor_id()); + mb(); + + __get_cpu_var(cpu_state) = CPU_DEAD; + local_irq_disable(); +} + +void native_play_dead(void) +{ + play_dead_common(); +} + +int __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + struct task_struct *p; + int ret; + + ret = mp_ops->cpu_disable(cpu); + if (ret) + return ret; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Stop the local timer for this CPU. + */ + local_timer_stop(cpu); + + /* + * Flush user cache and TLB mappings, and then remove this CPU + * from the vm mask set of all processes. + */ + flush_cache_all(); + local_flush_tlb_all(); + + read_lock(&tasklist_lock); + for_each_process(p) + if (p->mm) + cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); + read_unlock(&tasklist_lock); + + return 0; +} +#else /* ... !CONFIG_HOTPLUG_CPU */ +int native_cpu_disable(void) +{ + return -ENOSYS; +} + +void native_cpu_die(unsigned int cpu) +{ + /* We said "no" in __cpu_disable */ + BUG(); +} + +void native_play_dead(void) +{ + BUG(); +} +#endif + asmlinkage void __cpuinit start_secondary(void) { - unsigned int cpu; + unsigned int cpu = smp_processor_id(); struct mm_struct *mm = &init_mm; enable_mmu(); atomic_inc(&mm->mm_count); atomic_inc(&mm->mm_users); current->active_mm = mm; - BUG_ON(current->mm); enter_lazy_tlb(mm, current); + local_flush_tlb_all(); per_cpu_trap_init(); preempt_disable(); - notify_cpu_starting(smp_processor_id()); + notify_cpu_starting(cpu); local_irq_enable(); - cpu = smp_processor_id(); - /* Enable local timers */ local_timer_setup(cpu); calibrate_delay(); smp_store_cpu_info(cpu); - cpu_set(cpu, cpu_online_map); + set_cpu_online(cpu, true); + per_cpu(cpu_state, cpu) = CPU_ONLINE; cpu_idle(); } @@ -111,12 +224,19 @@ int __cpuinit __cpu_up(unsigned int cpu) struct task_struct *tsk; unsigned long timeout; - tsk = fork_idle(cpu); - if (IS_ERR(tsk)) { - printk(KERN_ERR "Failed forking idle task for cpu %d\n", cpu); - return PTR_ERR(tsk); + tsk = cpu_data[cpu].idle; + if (!tsk) { + tsk = fork_idle(cpu); + if (IS_ERR(tsk)) { + pr_err("Failed forking idle task for cpu %d\n", cpu); + return PTR_ERR(tsk); + } + + cpu_data[cpu].idle = tsk; } + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Fill in data in head.S for secondary cpus */ stack_start.sp = tsk->thread.sp; stack_start.thread_info = tsk->stack; @@ -127,7 +247,7 @@ int __cpuinit __cpu_up(unsigned int cpu) (unsigned long)&stack_start + sizeof(stack_start)); wmb(); - plat_start_cpu(cpu, (unsigned long)_stext); + mp_ops->start_cpu(cpu, (unsigned long)_stext); timeout = jiffies + HZ; while (time_before(jiffies, timeout)) { @@ -135,6 +255,7 @@ int __cpuinit __cpu_up(unsigned int cpu) break; udelay(10); + barrier(); } if (cpu_online(cpu)) @@ -159,7 +280,7 @@ void __init smp_cpus_done(unsigned int max_cpus) void smp_send_reschedule(int cpu) { - plat_send_ipi(cpu, SMP_MSG_RESCHEDULE); + mp_ops->send_ipi(cpu, SMP_MSG_RESCHEDULE); } void smp_send_stop(void) @@ -172,12 +293,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - plat_send_ipi(cpu, SMP_MSG_FUNCTION); + mp_ops->send_ipi(cpu, SMP_MSG_FUNCTION); } void arch_send_call_function_single_ipi(int cpu) { - plat_send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); + mp_ops->send_ipi(cpu, SMP_MSG_FUNCTION_SINGLE); } void smp_timer_broadcast(const struct cpumask *mask) @@ -185,7 +306,7 @@ void smp_timer_broadcast(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - plat_send_ipi(cpu, SMP_MSG_TIMER); + mp_ops->send_ipi(cpu, SMP_MSG_TIMER); } static void ipi_timer(void) @@ -249,7 +370,6 @@ static void flush_tlb_mm_ipi(void *mm) * behalf of debugees, kswapd stealing pages from another process etc). * Kanoj 07/00. */ - void flush_tlb_mm(struct mm_struct *mm) { preempt_disable(); diff --git a/arch/sh/kernel/topology.c b/arch/sh/kernel/topology.c index 9b0b633b6c92..948fdb656933 100644 --- a/arch/sh/kernel/topology.c +++ b/arch/sh/kernel/topology.c @@ -52,7 +52,11 @@ static int __init topology_init(void) #endif for_each_present_cpu(i) { - ret = register_cpu(&per_cpu(cpu_devices, i), i); + struct cpu *c = &per_cpu(cpu_devices, i); + + c->hotpluggable = 1; + + ret = register_cpu(c, i); if (unlikely(ret)) printk(KERN_WARNING "%s: register_cpu %d failed (%d)\n", __func__, i, ret);