From 8fe7e94eb71430cf63a742f3c19739d82a662758 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 1 Jun 2011 15:31:44 +0200 Subject: [PATCH 1/5] oprofile, x86: Fix race in nmi handler while starting counters In some rare cases, nmis are generated immediately after the nmi handler of the cpu was started. This causes the counter not to be enabled. Before enabling the nmi handlers we need to set variable ctr_running first and make sure its value is written to memory. Also, the patch makes all existing barriers a memory barrier instead of a compiler barrier only. Reported-by: Suravee Suthikulpanit Cc: # .35+ Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cf9750004a08..68894fdc034b 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -112,8 +112,10 @@ static void nmi_cpu_start(void *dummy) static int nmi_start(void) { get_online_cpus(); - on_each_cpu(nmi_cpu_start, NULL, 1); ctr_running = 1; + /* make ctr_running visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_start, NULL, 1); put_online_cpus(); return 0; } @@ -504,15 +506,18 @@ static int nmi_setup(void) nmi_enabled = 0; ctr_running = 0; - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); err = register_die_notifier(&profile_exceptions_nb); if (err) goto fail; get_online_cpus(); register_cpu_notifier(&oprofile_cpu_nb); - on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; + /* make nmi_enabled visible to the nmi handler: */ + smp_mb(); + on_each_cpu(nmi_cpu_setup, NULL, 1); put_online_cpus(); return 0; @@ -531,7 +536,8 @@ static void nmi_shutdown(void) nmi_enabled = 0; ctr_running = 0; put_online_cpus(); - barrier(); + /* make variables visible to the nmi handler: */ + smp_mb(); unregister_die_notifier(&profile_exceptions_nb); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); From 161b6ae0e067e421b20bb35caf66bdb405c929ac Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sat, 28 May 2011 13:23:42 +0200 Subject: [PATCH 2/5] debugobjects: Fix boot crash when kmemleak and debugobjects enabled Order of initialization look like this: ... debugobjects kmemleak ...(lots of other subsystems)... workqueues (through early initcall) ... debugobjects use schedule_work for batch freeing of its data and kmemleak heavily use debugobjects, so when it comes to freeing and workqueues were not initialized yet, kernel crashes: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __queue_work+0x29/0x41a [] queue_work_on+0x16/0x1d [] queue_work+0x29/0x55 [] schedule_work+0x13/0x15 [] free_object+0x90/0x95 [] debug_check_no_obj_freed+0x187/0x1d3 [] ? _raw_spin_unlock_irqrestore+0x30/0x4d [] ? free_object_rcu+0x68/0x6d [] kmem_cache_free+0x64/0x12c [] free_object_rcu+0x68/0x6d [] __rcu_process_callbacks+0x1b6/0x2d9 ... because system_wq is NULL. Fix it by checking if workqueues susbystem was initialized before using. Signed-off-by: Marcin Slusarz Cc: Catalin Marinas Cc: Tejun Heo Cc: Dipankar Sarma Cc: Paul E. McKenney Cc: stable@kernel.org Link: http://lkml.kernel.org/r/20110528112342.GA3068@joi.lan Signed-off-by: Thomas Gleixner --- lib/debugobjects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 9d86e45086f5..a78b7c6e042c 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -198,7 +198,7 @@ static void free_object(struct debug_obj *obj) * initialized: */ if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) - sched = !work_pending(&debug_obj_work); + sched = keventd_up() && !work_pending(&debug_obj_work); hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; obj_pool_used--; From 140fe3b1ab9c082182ef13359fab4ddba95c24c3 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 21 Jun 2011 10:35:55 +0800 Subject: [PATCH 3/5] jump_label: Fix jump_label update for modules The jump labels entries for modules do not stop at __stop__jump_table, but after mod->jump_entries + mod_num_jump_entries. By checking the wrong end point, module trace events never get enabled. Cc: Ingo Molnar Acked-by: Jason Baron Tested-by: Avi Kivity Tested-by: Johannes Berg Signed-off-by: Xiao Guangrong Link: http://lkml.kernel.org/r/4E00038B.2060404@cn.fujitsu.com Signed-off-by: Steven Rostedt --- kernel/jump_label.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index fa27e750dbc0..a8ce45097f3d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -375,15 +375,19 @@ int jump_label_text_reserved(void *start, void *end) static void jump_label_update(struct jump_label_key *key, int enable) { - struct jump_entry *entry = key->entries; - - /* if there are no users, entry can be NULL */ - if (entry) - __jump_label_update(key, entry, __stop___jump_table, enable); + struct jump_entry *entry = key->entries, *stop = __stop___jump_table; #ifdef CONFIG_MODULES + struct module *mod = __module_address((jump_label_t)key); + __jump_label_mod_update(key, enable); + + if (mod) + stop = mod->jump_entries + mod->num_jump_entries; #endif + /* if there are no users, entry can be NULL */ + if (entry) + __jump_label_update(key, entry, stop, enable); } #endif From cd62287e364c0d15d517c6ced4e4808b54711475 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Sat, 4 Jun 2011 15:03:20 +0200 Subject: [PATCH 4/5] sched, cgroups: Fix MIN_SHARES on 64-bit boxen Commit c8b28116 ("sched: Increase SCHED_LOAD_SCALE resolution") intended to have no user-visible effect, but allows setting cpu.shares to < MIN_SHARES, which the user then sees. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Nikhil Rao Link: http://lkml.kernel.org/r/1307192600.8618.3.camel@marge.simson.net Signed-off-by: Ingo Molnar --- kernel/sched.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 3f2e502d609b..9769c756ad66 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -292,8 +292,8 @@ static DEFINE_SPINLOCK(task_group_lock); * (The default weight is 1024 - so there's no practical * limitation from this.) */ -#define MIN_SHARES 2 -#define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION)) +#define MIN_SHARES (1UL << 1) +#define MAX_SHARES (1UL << 18) static int root_task_group_load = ROOT_TASK_GROUP_LOAD; #endif @@ -8450,10 +8450,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) if (!tg->se[0]) return -EINVAL; - if (shares < MIN_SHARES) - shares = MIN_SHARES; - else if (shares > MAX_SHARES) - shares = MAX_SHARES; + shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); mutex_lock(&shares_mutex); if (tg->shares == shares) From e4c2fb0d5776b58049d2556b456144a4db3fe5a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Jul 2011 10:56:32 +0200 Subject: [PATCH 5/5] sched: Disable (revert) SCHED_LOAD_SCALE increase Alex reported that commit c8b281161df ("sched: Increase SCHED_LOAD_SCALE resolution") caused a power usage regression under light load as it increases the number of load-balance operations and keeps idle cpus from staying idle. Time has run out to find the root cause for this release so disable the feature for v3.0 until we can figure out what causes the problem. Reported-by: "Alex, Shi" Signed-off-by: Peter Zijlstra Cc: Nikhil Rao Cc: Ming Lei Cc: Mike Galbraith Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/n/tip-m4onxn0sxnyn5iz9o88eskc3@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index a837b20ba190..496770a96487 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -808,7 +808,7 @@ enum cpu_idle_type { * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the * increased costs. */ -#if BITS_PER_LONG > 32 +#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load */ # define SCHED_LOAD_RESOLUTION 10 # define scale_load(w) ((w) << SCHED_LOAD_RESOLUTION) # define scale_load_down(w) ((w) >> SCHED_LOAD_RESOLUTION)