3205f8063b
I was running the trace_event benchmark and noticed that the times to record a trace_event was all over the place. I looked at the assembly of the ring_buffer_lock_reserver() and saw this: <ring_buffer_lock_reserve>: 31 c0 xor %eax,%eax 48 83 3d 76 47 bd 00 cmpq $0x1,0xbd4776(%rip) # ffffffff81d10d60 <ring_buffer_flags> 01 55 push %rbp 48 89 e5 mov %rsp,%rbp 75 1d jne ffffffff8113c60d <ring_buffer_lock_reserve+0x2d> 65 ff 05 69 e3 ec 7e incl %gs:0x7eece369(%rip) # a960 <__preempt_count> 8b 47 08 mov 0x8(%rdi),%eax 85 c0 test %eax,%eax +---- 74 12 je ffffffff8113c610 <ring_buffer_lock_reserve+0x30> | 65 ff 0d 5b e3 ec 7e decl %gs:0x7eece35b(%rip) # a960 <__preempt_count> | 0f 84 85 00 00 00 je ffffffff8113c690 <ring_buffer_lock_reserve+0xb0> | 31 c0 xor %eax,%eax | 5d pop %rbp | c3 retq | 90 nop +---> 65 44 8b 05 48 e3 ec mov %gs:0x7eece348(%rip),%r8d # a960 <__preempt_count> 7e 41 81 e0 ff ff ff 7f and $0x7fffffff,%r8d b0 08 mov $0x8,%al 65 8b 0d 58 36 ed 7e mov %gs:0x7eed3658(%rip),%ecx # fc80 <current_context> 41 f7 c0 00 ff 1f 00 test $0x1fff00,%r8d 74 1e je ffffffff8113c64f <ring_buffer_lock_reserve+0x6f> 41 f7 c0 00 00 10 00 test $0x100000,%r8d b0 01 mov $0x1,%al 75 13 jne ffffffff8113c64f <ring_buffer_lock_reserve+0x6f> 41 81 e0 00 00 0f 00 and $0xf0000,%r8d 49 83 f8 01 cmp $0x1,%r8 19 c0 sbb %eax,%eax 83 e0 02 and $0x2,%eax 83 c0 02 add $0x2,%eax 85 c8 test %ecx,%eax 75 ab jne ffffffff8113c5fe <ring_buffer_lock_reserve+0x1e> 09 c8 or %ecx,%eax 65 89 05 24 36 ed 7e mov %eax,%gs:0x7eed3624(%rip) # fc80 <current_context> The arrow is the fast path. After adding the unlikely's, the fast path looks a bit better: <ring_buffer_lock_reserve>: 31 c0 xor %eax,%eax 48 83 3d 76 47 bd 00 cmpq $0x1,0xbd4776(%rip) # ffffffff81d10d60 <ring_buffer_flags> 01 55 push %rbp 48 89 e5 mov %rsp,%rbp 75 7b jne ffffffff8113c66b <ring_buffer_lock_reserve+0x8b> 65 ff 05 69 e3 ec 7e incl %gs:0x7eece369(%rip) # a960 <__preempt_count> 8b 47 08 mov 0x8(%rdi),%eax 85 c0 test %eax,%eax 0f 85 9f 00 00 00 jne ffffffff8113c6a1 <ring_buffer_lock_reserve+0xc1> 65 8b 0d 57 e3 ec 7e mov %gs:0x7eece357(%rip),%ecx # a960 <__preempt_count> 81 e1 ff ff ff 7f and $0x7fffffff,%ecx b0 08 mov $0x8,%al 65 8b 15 68 36 ed 7e mov %gs:0x7eed3668(%rip),%edx # fc80 <current_context> f7 c1 00 ff 1f 00 test $0x1fff00,%ecx 75 50 jne ffffffff8113c670 <ring_buffer_lock_reserve+0x90> 85 d0 test %edx,%eax 75 7d jne ffffffff8113c6a1 <ring_buffer_lock_reserve+0xc1> 09 d0 or %edx,%eax 65 89 05 53 36 ed 7e mov %eax,%gs:0x7eed3653(%rip) # fc80 <current_context> 65 8b 05 fc da ec 7e mov %gs:0x7eecdafc(%rip),%eax # a130 <cpu_number> 89 c2 mov %eax,%edx Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
||
---|---|---|
.. | ||
bpf | ||
configs | ||
debug | ||
events | ||
gcov | ||
irq | ||
livepatch | ||
locking | ||
power | ||
printk | ||
rcu | ||
sched | ||
time | ||
trace | ||
.gitignore | ||
acct.c | ||
async.c | ||
audit_tree.c | ||
audit_watch.c | ||
audit.c | ||
audit.h | ||
auditfilter.c | ||
auditsc.c | ||
backtracetest.c | ||
bounds.c | ||
capability.c | ||
cgroup_freezer.c | ||
cgroup.c | ||
compat.c | ||
configs.c | ||
context_tracking.c | ||
cpu_pm.c | ||
cpu.c | ||
cpuset.c | ||
crash_dump.c | ||
cred.c | ||
delayacct.c | ||
dma.c | ||
elfcore.c | ||
exec_domain.c | ||
exit.c | ||
extable.c | ||
fork.c | ||
freezer.c | ||
futex_compat.c | ||
futex.c | ||
groups.c | ||
hung_task.c | ||
irq_work.c | ||
jump_label.c | ||
kallsyms.c | ||
kcmp.c | ||
Kconfig.freezer | ||
Kconfig.hz | ||
Kconfig.locks | ||
Kconfig.preempt | ||
kexec.c | ||
kmod.c | ||
kprobes.c | ||
ksysfs.c | ||
kthread.c | ||
latencytop.c | ||
Makefile | ||
module_signing.c | ||
module-internal.h | ||
module.c | ||
notifier.c | ||
nsproxy.c | ||
padata.c | ||
panic.c | ||
params.c | ||
pid_namespace.c | ||
pid.c | ||
profile.c | ||
ptrace.c | ||
range.c | ||
reboot.c | ||
relay.c | ||
resource.c | ||
seccomp.c | ||
signal.c | ||
smp.c | ||
smpboot.c | ||
smpboot.h | ||
softirq.c | ||
stacktrace.c | ||
stop_machine.c | ||
sys_ni.c | ||
sys.c | ||
sysctl_binary.c | ||
sysctl.c | ||
system_certificates.S | ||
system_keyring.c | ||
task_work.c | ||
taskstats.c | ||
test_kprobes.c | ||
torture.c | ||
tracepoint.c | ||
tsacct.c | ||
uid16.c | ||
up.c | ||
user_namespace.c | ||
user-return-notifier.c | ||
user.c | ||
utsname_sysctl.c | ||
utsname.c | ||
watchdog.c | ||
workqueue_internal.h | ||
workqueue.c |