mirror of
https://github.com/torvalds/linux.git
synced 2024-11-13 23:51:39 +00:00
481eaec37e
This adds micro-benchmarks useful for tuning virtio ring layouts. Three layouts are currently implemented: - virtio 0.9 compatible one - an experimental extension bypassing the ring index, polling ring itself instead - an experimental extension bypassing avail and used ring completely Typical use: sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring It doesn't depend on the kernel directly, but it's handy to have as much virtio stuff as possible in one tree. Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
120 lines
2.3 KiB
C
120 lines
2.3 KiB
C
/*
|
|
* Copyright (C) 2016 Red Hat, Inc.
|
|
* Author: Michael S. Tsirkin <mst@redhat.com>
|
|
* This work is licensed under the terms of the GNU GPL, version 2.
|
|
*
|
|
* Common macros and functions for ring benchmarking.
|
|
*/
|
|
#ifndef MAIN_H
|
|
#define MAIN_H
|
|
|
|
#include <stdbool.h>
|
|
|
|
extern bool do_exit;
|
|
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
#include "x86intrin.h"
|
|
|
|
static inline void wait_cycles(unsigned long long cycles)
|
|
{
|
|
unsigned long long t;
|
|
|
|
t = __rdtsc();
|
|
while (__rdtsc() - t < cycles) {}
|
|
}
|
|
|
|
#define VMEXIT_CYCLES 500
|
|
#define VMENTRY_CYCLES 500
|
|
|
|
#else
|
|
static inline void wait_cycles(unsigned long long cycles)
|
|
{
|
|
_Exit(5);
|
|
}
|
|
#define VMEXIT_CYCLES 0
|
|
#define VMENTRY_CYCLES 0
|
|
#endif
|
|
|
|
static inline void vmexit(void)
|
|
{
|
|
if (!do_exit)
|
|
return;
|
|
|
|
wait_cycles(VMEXIT_CYCLES);
|
|
}
|
|
static inline void vmentry(void)
|
|
{
|
|
if (!do_exit)
|
|
return;
|
|
|
|
wait_cycles(VMENTRY_CYCLES);
|
|
}
|
|
|
|
/* implemented by ring */
|
|
void alloc_ring(void);
|
|
/* guest side */
|
|
int add_inbuf(unsigned, void *, void *);
|
|
void *get_buf(unsigned *, void **);
|
|
void disable_call();
|
|
bool enable_call();
|
|
void kick_available();
|
|
void poll_used();
|
|
/* host side */
|
|
void disable_kick();
|
|
bool enable_kick();
|
|
bool use_buf(unsigned *, void **);
|
|
void call_used();
|
|
void poll_avail();
|
|
|
|
/* implemented by main */
|
|
extern bool do_sleep;
|
|
void kick(void);
|
|
void wait_for_kick(void);
|
|
void call(void);
|
|
void wait_for_call(void);
|
|
|
|
extern unsigned ring_size;
|
|
|
|
/* Compiler barrier - similar to what Linux uses */
|
|
#define barrier() asm volatile("" ::: "memory")
|
|
|
|
/* Is there a portable way to do this? */
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
#define cpu_relax() asm ("rep; nop" ::: "memory")
|
|
#else
|
|
#define cpu_relax() assert(0)
|
|
#endif
|
|
|
|
extern bool do_relax;
|
|
|
|
static inline void busy_wait(void)
|
|
{
|
|
if (do_relax)
|
|
cpu_relax();
|
|
else
|
|
/* prevent compiler from removing busy loops */
|
|
barrier();
|
|
}
|
|
|
|
/*
|
|
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
|
|
* with other __ATOMIC_SEQ_CST calls.
|
|
*/
|
|
#define smp_mb() __sync_synchronize()
|
|
|
|
/*
|
|
* This abuses the atomic builtins for thread fences, and
|
|
* adds a compiler barrier.
|
|
*/
|
|
#define smp_release() do { \
|
|
barrier(); \
|
|
__atomic_thread_fence(__ATOMIC_RELEASE); \
|
|
} while (0)
|
|
|
|
#define smp_acquire() do { \
|
|
__atomic_thread_fence(__ATOMIC_ACQUIRE); \
|
|
barrier(); \
|
|
} while (0)
|
|
|
|
#endif
|