Merge tag 'blk-dim-v2' into rdma.git for-next

Generic DIM

From: Tal Gilboa and Yamin Fridman

Implement net DIM over a generic DIM library, add RDMA DIM

dim.h lib exposes an implementation of the DIM algorithm for
dynamically-tuned interrupt moderation for networking interfaces.

We want a similar functionality for other protocols, which might need to
optimize interrupts differently. Main motivation here is DIM for NVMf
storage protocol.

Current DIM implementation prioritizes reducing interrupt overhead over
latency. Also, in order to reduce DIM's own overhead, the algorithm might
take some time to identify it needs to change profiles. While this is
acceptable for networking, it might not work well on other scenarios.

Here we propose a new structure to DIM. The idea is to allow a slightly
modified functionality without the risk of breaking Net DIM behavior for
netdev. We verified there are no degradations in current DIM behavior with
the modified solution.

Suggested solution:
- Common logic is implemented in lib/dim/dim.c
- Net DIM (existing) logic is implemented in lib/dim/net_dim.c, which uses
  the common logic in dim.c
- Any new DIM logic will be implemented in "lib/dim/new_dim.c".
  This new implementation will expose modified versions of profiles,
  dim_step() and dim_decision().
- DIM API is declared in include/linux/dim.h for all implementations.

Pros for this solution are:
- Zero impact on existing net_dim implementation and usage
- Relatively more code reuse (compared to two separate solutions)
- Increased extensibility

Required for dependencies in the next series.

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
This commit is contained in:
Jason Gunthorpe 2019-07-08 14:58:59 -03:00
commit 2ef38e380e
23 changed files with 728 additions and 489 deletions

View File

@ -5600,7 +5600,8 @@ F: include/linux/dynamic_debug.h
DYNAMIC INTERRUPT MODERATION
M: Tal Gilboa <talgi@mellanox.com>
S: Maintained
F: include/linux/net_dim.h
F: include/linux/dim.h
F: lib/dim/
DZ DECSTATION DZ11 SERIAL DRIVER
M: "Maciej W. Rozycki" <macro@linux-mips.org>

View File

@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM
default y
depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \
SIBYTE_SB1xxx_SOC
select DIMLIB
---help---
If you have a network (Ethernet) chipset belonging to this class,
say Y.

View File

@ -609,7 +609,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev,
struct ethtool_coalesce *ec)
{
struct bcm_sysport_priv *priv = netdev_priv(dev);
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
u32 usecs, pkts;
unsigned int i;
@ -992,7 +992,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
{
struct bcm_sysport_priv *priv =
container_of(napi, struct bcm_sysport_priv, napi);
struct net_dim_sample dim_sample;
struct dim_sample dim_sample;
unsigned int work_done = 0;
work_done = bcm_sysport_desc_rx(priv, budget);
@ -1016,7 +1016,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget)
}
if (priv->dim.use_dim) {
net_dim_sample(priv->dim.event_ctr, priv->dim.packets,
dim_update_sample(priv->dim.event_ctr, priv->dim.packets,
priv->dim.bytes, &dim_sample);
net_dim(&priv->dim.dim, dim_sample);
}
@ -1087,16 +1087,16 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv)
static void bcm_sysport_dim_work(struct work_struct *work)
{
struct net_dim *dim = container_of(work, struct net_dim, work);
struct dim *dim = container_of(work, struct dim, work);
struct bcm_sysport_net_dim *ndim =
container_of(dim, struct bcm_sysport_net_dim, dim);
struct bcm_sysport_priv *priv =
container_of(ndim, struct bcm_sysport_priv, dim);
struct net_dim_cq_moder cur_profile =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode,
dim->profile_ix);
bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts);
dim->state = NET_DIM_START_MEASURE;
dim->state = DIM_START_MEASURE;
}
/* RX and misc interrupt routine */
@ -1437,7 +1437,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
struct bcm_sysport_net_dim *dim = &priv->dim;
INIT_WORK(&dim->dim.work, cb);
dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->event_ctr = 0;
dim->packets = 0;
dim->bytes = 0;
@ -1446,7 +1446,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv,
static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv)
{
struct bcm_sysport_net_dim *dim = &priv->dim;
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
u32 usecs, pkts;
usecs = priv->rx_coalesce_usecs;

View File

@ -11,7 +11,7 @@
#include <linux/bitmap.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/net_dim.h>
#include <linux/dim.h>
/* Receive/transmit descriptor format */
#define DESC_ADDR_HI_STATUS_LEN 0x00
@ -702,7 +702,7 @@ struct bcm_sysport_net_dim {
u16 event_ctr;
unsigned long packets;
unsigned long bytes;
struct net_dim dim;
struct dim dim;
};
/* Software view of the TX ring */

View File

@ -2130,9 +2130,9 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
}
}
if (bp->flags & BNXT_FLAG_DIM) {
struct net_dim_sample dim_sample;
struct dim_sample dim_sample;
net_dim_sample(cpr->event_ctr,
dim_update_sample(cpr->event_ctr,
cpr->rx_packets,
cpr->rx_bytes,
&dim_sample);
@ -7813,7 +7813,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
if (bp->bnapi[i]->rx_ring) {
INIT_WORK(&cpr->dim.work, bnxt_dim_work);
cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
napi_enable(&bp->bnapi[i]->napi);
}

View File

@ -24,7 +24,7 @@
#include <net/devlink.h>
#include <net/dst_metadata.h>
#include <net/xdp.h>
#include <linux/net_dim.h>
#include <linux/dim.h>
struct tx_bd {
__le32 tx_bd_len_flags_type;
@ -810,7 +810,7 @@ struct bnxt_cp_ring_info {
u64 rx_bytes;
u64 event_ctr;
struct net_dim dim;
struct dim dim;
union {
struct tx_cmp *cp_desc_ring[MAX_CP_PAGES];

View File

@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include "bnxt_hsi.h"
#include <linux/net_dim.h>
#include <linux/dim.h>
#include "bnxt.h"
#include "bnxt_debugfs.h"
@ -21,7 +21,7 @@ static ssize_t debugfs_dim_read(struct file *filep,
char __user *buffer,
size_t count, loff_t *ppos)
{
struct net_dim *dim = filep->private_data;
struct dim *dim = filep->private_data;
int len;
char *buf;
@ -61,7 +61,7 @@ static const struct file_operations debugfs_dim_fops = {
.read = debugfs_dim_read,
};
static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx,
static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx,
struct dentry *dd)
{
static char qname[16];

View File

@ -7,26 +7,25 @@
* the Free Software Foundation.
*/
#include <linux/net_dim.h>
#include <linux/dim.h>
#include "bnxt_hsi.h"
#include "bnxt.h"
void bnxt_dim_work(struct work_struct *work)
{
struct net_dim *dim = container_of(work, struct net_dim,
work);
struct dim *dim = container_of(work, struct dim, work);
struct bnxt_cp_ring_info *cpr = container_of(dim,
struct bnxt_cp_ring_info,
dim);
struct bnxt_napi *bnapi = container_of(cpr,
struct bnxt_napi,
cp_ring);
struct net_dim_cq_moder cur_moder =
struct dim_cq_moder cur_moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
cpr->rx_ring_coal.coal_ticks = cur_moder.usec;
cpr->rx_ring_coal.coal_bufs = cur_moder.pkts;
bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi);
dim->state = NET_DIM_START_MEASURE;
dim->state = DIM_START_MEASURE;
}

View File

@ -640,7 +640,7 @@ static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring,
static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring,
struct ethtool_coalesce *ec)
{
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
u32 usecs, pkts;
ring->rx_coalesce_usecs = ec->rx_coalesce_usecs;
@ -1895,7 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
{
struct bcmgenet_rx_ring *ring = container_of(napi,
struct bcmgenet_rx_ring, napi);
struct net_dim_sample dim_sample;
struct dim_sample dim_sample;
unsigned int work_done;
work_done = bcmgenet_desc_rx(ring, budget);
@ -1906,7 +1906,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
}
if (ring->dim.use_dim) {
net_dim_sample(ring->dim.event_ctr, ring->dim.packets,
dim_update_sample(ring->dim.event_ctr, ring->dim.packets,
ring->dim.bytes, &dim_sample);
net_dim(&ring->dim.dim, dim_sample);
}
@ -1916,16 +1916,16 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget)
static void bcmgenet_dim_work(struct work_struct *work)
{
struct net_dim *dim = container_of(work, struct net_dim, work);
struct dim *dim = container_of(work, struct dim, work);
struct bcmgenet_net_dim *ndim =
container_of(dim, struct bcmgenet_net_dim, dim);
struct bcmgenet_rx_ring *ring =
container_of(ndim, struct bcmgenet_rx_ring, dim);
struct net_dim_cq_moder cur_profile =
struct dim_cq_moder cur_profile =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts);
dim->state = NET_DIM_START_MEASURE;
dim->state = DIM_START_MEASURE;
}
/* Assign skb to RX DMA descriptor. */
@ -2082,7 +2082,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
struct bcmgenet_net_dim *dim = &ring->dim;
INIT_WORK(&dim->dim.work, cb);
dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
dim->event_ctr = 0;
dim->packets = 0;
dim->bytes = 0;
@ -2091,7 +2091,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring,
static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring)
{
struct bcmgenet_net_dim *dim = &ring->dim;
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
u32 usecs, pkts;
usecs = ring->rx_coalesce_usecs;

View File

@ -13,7 +13,7 @@
#include <linux/mii.h>
#include <linux/if_vlan.h>
#include <linux/phy.h>
#include <linux/net_dim.h>
#include <linux/dim.h>
/* total number of Buffer Descriptors, same for Rx/Tx */
#define TOTAL_DESC 256
@ -578,7 +578,7 @@ struct bcmgenet_net_dim {
u16 event_ctr;
unsigned long packets;
unsigned long bytes;
struct net_dim dim;
struct dim dim;
};
struct bcmgenet_rx_ring {

View File

@ -34,6 +34,7 @@ config MLX5_CORE_EN
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m
select PAGE_POOL
select DIMLIB
default n
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.

View File

@ -48,7 +48,7 @@
#include <linux/rhashtable.h>
#include <net/switchdev.h>
#include <net/xdp.h>
#include <linux/net_dim.h>
#include <linux/dim.h>
#include <linux/bits.h>
#include "wq.h"
#include "mlx5_core.h"
@ -238,9 +238,9 @@ struct mlx5e_params {
u16 num_channels;
u8 num_tc;
bool rx_cqe_compress_def;
struct net_dim_cq_moder rx_cq_moderation;
struct net_dim_cq_moder tx_cq_moderation;
bool tunneled_offload_en;
struct dim_cq_moder rx_cq_moderation;
struct dim_cq_moder tx_cq_moderation;
bool lro_en;
u8 tx_min_inline_mode;
bool vlan_strip_disable;
@ -356,7 +356,7 @@ struct mlx5e_txqsq {
/* dirtied @completion */
u16 cc;
u32 dma_fifo_cc;
struct net_dim dim; /* Adaptive Moderation */
struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */
u16 pc ____cacheline_aligned_in_smp;
@ -596,7 +596,7 @@ struct mlx5e_rq {
int ix;
unsigned int hw_mtu;
struct net_dim dim; /* Dynamic Interrupt Moderation */
struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */
struct bpf_prog *xdp_prog;

View File

@ -30,22 +30,22 @@
* SOFTWARE.
*/
#include <linux/net_dim.h>
#include <linux/dim.h>
#include "en.h"
static void
mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder,
mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder,
struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq)
{
mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts);
dim->state = NET_DIM_START_MEASURE;
dim->state = DIM_START_MEASURE;
}
void mlx5e_rx_dim_work(struct work_struct *work)
{
struct net_dim *dim = container_of(work, struct net_dim, work);
struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim);
struct net_dim_cq_moder cur_moder =
struct dim_cq_moder cur_moder =
net_dim_get_rx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq);
@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work)
void mlx5e_tx_dim_work(struct work_struct *work)
{
struct net_dim *dim = container_of(work, struct net_dim, work);
struct dim *dim = container_of(work, struct dim, work);
struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim);
struct net_dim_cq_moder cur_moder =
struct dim_cq_moder cur_moder =
net_dim_get_tx_moderation(dim->mode, dim->profile_ix);
mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq);

View File

@ -466,7 +466,7 @@ static int mlx5e_set_channels(struct net_device *dev,
int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
struct net_dim_cq_moder *rx_moder, *tx_moder;
struct dim_cq_moder *rx_moder, *tx_moder;
if (!MLX5_CAP_GEN(priv->mdev, cq_moderation))
return -EOPNOTSUPP;
@ -521,7 +521,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal)
{
struct net_dim_cq_moder *rx_moder, *tx_moder;
struct dim_cq_moder *rx_moder, *tx_moder;
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_channels new_channels = {};
int err = 0;

View File

@ -584,11 +584,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
switch (params->rx_cq_moderation.cq_period_mode) {
case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE;
rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
break;
case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
default:
rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
rq->page_cache.head = 0;
@ -1571,7 +1571,7 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
}
static int mlx5e_open_cq(struct mlx5e_channel *c,
struct net_dim_cq_moder moder,
struct dim_cq_moder moder,
struct mlx5e_cq_param *param,
struct mlx5e_cq *cq)
{
@ -1776,7 +1776,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
struct mlx5e_channel **cp)
{
int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
struct net_dim_cq_moder icocq_moder = {0, 0};
struct dim_cq_moder icocq_moder = {0, 0};
struct net_device *netdev = priv->netdev;
struct mlx5e_channel *c;
unsigned int irq;
@ -2153,7 +2153,7 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
mlx5e_build_common_cq_param(priv, param);
param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
static void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
@ -4421,9 +4421,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
}
static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
{
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
@ -4434,9 +4434,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
return moder;
}
static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
{
struct net_dim_cq_moder moder;
struct dim_cq_moder moder;
moder.cq_period_mode = cq_period_mode;
moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
@ -4450,8 +4450,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
{
return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE :
NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE;
DIM_CQ_PERIOD_MODE_START_FROM_CQE :
DIM_CQ_PERIOD_MODE_START_FROM_EQE;
}
void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)

View File

@ -48,26 +48,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq)
{
struct mlx5e_sq_stats *stats = sq->stats;
struct net_dim_sample dim_sample;
struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state)))
return;
net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes,
&dim_sample);
dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&sq->dim, dim_sample);
}
static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq)
{
struct mlx5e_rq_stats *stats = rq->stats;
struct net_dim_sample dim_sample;
struct dim_sample dim_sample;
if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state)))
return;
net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes,
&dim_sample);
dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample);
net_dim(&rq->dim, dim_sample);
}

366
include/linux/dim.h Normal file
View File

@ -0,0 +1,366 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2019 Mellanox Technologies. */
#ifndef DIM_H
#define DIM_H
#include <linux/module.h>
/**
* Number of events between DIM iterations.
* Causes a moderation of the algorithm run.
*/
#define DIM_NEVENTS 64
/**
* Is a difference between values justifies taking an action.
* We consider 10% difference as significant.
*/
#define IS_SIGNIFICANT_DIFF(val, ref) \
(((100UL * abs((val) - (ref))) / (ref)) > 10)
/**
* Calculate the gap between two values.
* Take wrap-around and variable size into consideration.
*/
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \
& (BIT_ULL(bits) - 1))
/**
* Structure for CQ moderation values.
* Used for communications between DIM and its consumer.
*
* @usec: CQ timer suggestion (by DIM)
* @pkts: CQ packet counter suggestion (by DIM)
* @cq_period_mode: CQ priod count mode (from CQE/EQE)
*/
struct dim_cq_moder {
u16 usec;
u16 pkts;
u16 comps;
u8 cq_period_mode;
};
/**
* Structure for DIM sample data.
* Used for communications between DIM and its consumer.
*
* @time: Sample timestamp
* @pkt_ctr: Number of packets
* @byte_ctr: Number of bytes
* @event_ctr: Number of events
*/
struct dim_sample {
ktime_t time;
u32 pkt_ctr;
u32 byte_ctr;
u16 event_ctr;
u32 comp_ctr;
};
/**
* Structure for DIM stats.
* Used for holding current measured rates.
*
* @ppms: Packets per msec
* @bpms: Bytes per msec
* @epms: Events per msec
*/
struct dim_stats {
int ppms; /* packets per msec */
int bpms; /* bytes per msec */
int epms; /* events per msec */
int cpms; /* completions per msec */
int cpe_ratio; /* ratio of completions to events */
};
/**
* Main structure for dynamic interrupt moderation (DIM).
* Used for holding all information about a specific DIM instance.
*
* @state: Algorithm state (see below)
* @prev_stats: Measured rates from previous iteration (for comparison)
* @start_sample: Sampled data at start of current iteration
* @work: Work to perform on action required
* @profile_ix: Current moderation profile
* @mode: CQ period count mode
* @tune_state: Algorithm tuning state (see below)
* @steps_right: Number of steps taken towards higher moderation
* @steps_left: Number of steps taken towards lower moderation
* @tired: Parking depth counter
*/
struct dim {
u8 state;
struct dim_stats prev_stats;
struct dim_sample start_sample;
struct dim_sample measuring_sample;
struct work_struct work;
u8 profile_ix;
u8 mode;
u8 tune_state;
u8 steps_right;
u8 steps_left;
u8 tired;
};
/**
* enum dim_cq_period_mode
*
* These are the modes for CQ period count.
*
* @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE
* @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset)
* @DIM_CQ_PERIOD_NUM_MODES: Number of modes
*/
enum {
DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
DIM_CQ_PERIOD_NUM_MODES
};
/**
* enum dim_state
*
* These are the DIM algorithm states.
* These will determine if the algorithm is in a valid state to start an iteration.
*
* @DIM_START_MEASURE: This is the first iteration (also after applying a new profile)
* @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if
* need to perform an action
* @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure
*/
enum {
DIM_START_MEASURE,
DIM_MEASURE_IN_PROGRESS,
DIM_APPLY_NEW_PROFILE,
};
/**
* enum dim_tune_state
*
* These are the DIM algorithm tune states.
* These will determine which action the algorithm should perform.
*
* @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference
* @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0
* @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels
* @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels
*/
enum {
DIM_PARKING_ON_TOP,
DIM_PARKING_TIRED,
DIM_GOING_RIGHT,
DIM_GOING_LEFT,
};
/**
* enum dim_stats_state
*
* These are the DIM algorithm statistics states.
* These will determine the verdict of current iteration.
*
* @DIM_STATS_WORSE: Current iteration shows worse performance than before
* @DIM_STATS_WORSE: Current iteration shows same performance than before
* @DIM_STATS_WORSE: Current iteration shows better performance than before
*/
enum {
DIM_STATS_WORSE,
DIM_STATS_SAME,
DIM_STATS_BETTER,
};
/**
* enum dim_step_result
*
* These are the DIM algorithm step results.
* These describe the result of a step.
*
* @DIM_STEPPED: Performed a regular step
* @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to
* tired parking
* @DIM_ON_EDGE: Stepped to the most left/right profile
*/
enum {
DIM_STEPPED,
DIM_TOO_TIRED,
DIM_ON_EDGE,
};
/**
* dim_on_top - check if current state is a good place to stop (top location)
* @dim: DIM context
*
* Check if current profile is a good place to park at.
* This will result in reducing the DIM checks frequency as we assume we
* shouldn't probably change profiles, unless traffic pattern wasn't changed.
*/
bool dim_on_top(struct dim *dim);
/**
* dim_turn - change profile alterning direction
* @dim: DIM context
*
* Go left if we were going right and vice-versa.
* Do nothing if currently parking.
*/
void dim_turn(struct dim *dim);
/**
* dim_park_on_top - enter a parking state on a top location
* @dim: DIM context
*
* Enter parking state.
* Clear all movement history.
*/
void dim_park_on_top(struct dim *dim);
/**
* dim_park_tired - enter a tired parking state
* @dim: DIM context
*
* Enter parking state.
* Clear all movement history and cause DIM checks frequency to reduce.
*/
void dim_park_tired(struct dim *dim);
/**
* dim_calc_stats - calculate the difference between two samples
* @start: start sample
* @end: end sample
* @curr_stats: delta between samples
*
* Calculate the delta between two samples (in data rates).
* Takes into consideration counter wrap-around.
*/
void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
struct dim_stats *curr_stats);
/**
* dim_update_sample - set a sample's fields with give values
* @event_ctr: number of events to set
* @packets: number of packets to set
* @bytes: number of bytes to set
* @s: DIM sample
*/
static inline void
dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s)
{
s->time = ktime_get();
s->pkt_ctr = packets;
s->byte_ctr = bytes;
s->event_ctr = event_ctr;
}
/**
* dim_update_sample_with_comps - set a sample's fields with given
* values including the completion parameter
* @event_ctr: number of events to set
* @packets: number of packets to set
* @bytes: number of bytes to set
* @comps: number of completions to set
* @s: DIM sample
*/
static inline void
dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps,
struct dim_sample *s)
{
dim_update_sample(event_ctr, packets, bytes, s);
s->comp_ctr = comps;
}
/* Net DIM */
/*
* Net DIM profiles:
* There are different set of profiles for each CQ period mode.
* There are different set of profiles for RX/TX CQs.
* Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
*/
#define NET_DIM_PARAMS_NUM_PROFILES 5
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
#define NET_DIM_RX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
}
#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
{32, 64}, \
{64, 64} \
}
#define NET_DIM_TX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
}
#define NET_DIM_TX_CQE_PROFILES { \
{5, 128}, \
{8, 64}, \
{16, 32}, \
{32, 32}, \
{64, 32} \
}
static const struct dim_cq_moder
rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct dim_cq_moder
tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
/**
* net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile
* @cq_period_mode: CQ period mode
* @ix: Profile index
*/
struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix);
/**
* net_dim_get_def_rx_moderation - provide the default RX moderation
* @cq_period_mode: CQ period mode
*/
struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode);
/**
* net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile
* @cq_period_mode: CQ period mode
* @ix: Profile index
*/
struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix);
/**
* net_dim_get_def_tx_moderation - provide the default TX moderation
* @cq_period_mode: CQ period mode
*/
struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode);
/**
* net_dim - main DIM algorithm entry point
* @dim: DIM instance information
* @end_sample: Current data measurement
*
* Called by the consumer.
* This is the main logic of the algorithm, where data is processed in order to decide on next
* required action.
*/
void net_dim(struct dim *dim, struct dim_sample end_sample);
#endif /* DIM_H */

View File

@ -1,418 +0,0 @@
/*
* Copyright (c) 2016, Mellanox Technologies. All rights reserved.
* Copyright (c) 2017-2018, Broadcom Limited. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef NET_DIM_H
#define NET_DIM_H
#include <linux/module.h>
struct net_dim_cq_moder {
u16 usec;
u16 pkts;
u8 cq_period_mode;
};
struct net_dim_sample {
ktime_t time;
u32 pkt_ctr;
u32 byte_ctr;
u16 event_ctr;
};
struct net_dim_stats {
int ppms; /* packets per msec */
int bpms; /* bytes per msec */
int epms; /* events per msec */
};
struct net_dim { /* Adaptive Moderation */
u8 state;
struct net_dim_stats prev_stats;
struct net_dim_sample start_sample;
struct work_struct work;
u8 profile_ix;
u8 mode;
u8 tune_state;
u8 steps_right;
u8 steps_left;
u8 tired;
};
enum {
NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0,
NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1,
NET_DIM_CQ_PERIOD_NUM_MODES
};
/* Adaptive moderation logic */
enum {
NET_DIM_START_MEASURE,
NET_DIM_MEASURE_IN_PROGRESS,
NET_DIM_APPLY_NEW_PROFILE,
};
enum {
NET_DIM_PARKING_ON_TOP,
NET_DIM_PARKING_TIRED,
NET_DIM_GOING_RIGHT,
NET_DIM_GOING_LEFT,
};
enum {
NET_DIM_STATS_WORSE,
NET_DIM_STATS_SAME,
NET_DIM_STATS_BETTER,
};
enum {
NET_DIM_STEPPED,
NET_DIM_TOO_TIRED,
NET_DIM_ON_EDGE,
};
#define NET_DIM_PARAMS_NUM_PROFILES 5
/* Adaptive moderation profiles */
#define NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256
#define NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE 128
#define NET_DIM_DEF_PROFILE_CQE 1
#define NET_DIM_DEF_PROFILE_EQE 1
/* All profiles sizes must be NET_PARAMS_DIM_NUM_PROFILES */
#define NET_DIM_RX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
{256, NET_DIM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \
}
#define NET_DIM_RX_CQE_PROFILES { \
{2, 256}, \
{8, 128}, \
{16, 64}, \
{32, 64}, \
{64, 64} \
}
#define NET_DIM_TX_EQE_PROFILES { \
{1, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{8, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{32, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{64, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE}, \
{128, NET_DIM_DEFAULT_TX_CQ_MODERATION_PKTS_FROM_EQE} \
}
#define NET_DIM_TX_CQE_PROFILES { \
{5, 128}, \
{8, 64}, \
{16, 32}, \
{32, 32}, \
{64, 32} \
}
static const struct net_dim_cq_moder
rx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_RX_EQE_PROFILES,
NET_DIM_RX_CQE_PROFILES,
};
static const struct net_dim_cq_moder
tx_profile[NET_DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
NET_DIM_TX_EQE_PROFILES,
NET_DIM_TX_CQE_PROFILES,
};
static inline struct net_dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct net_dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
static inline struct net_dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
static inline struct net_dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct net_dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
static inline struct net_dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
static inline bool net_dim_on_top(struct net_dim *dim)
{
switch (dim->tune_state) {
case NET_DIM_PARKING_ON_TOP:
case NET_DIM_PARKING_TIRED:
return true;
case NET_DIM_GOING_RIGHT:
return (dim->steps_left > 1) && (dim->steps_right == 1);
default: /* NET_DIM_GOING_LEFT */
return (dim->steps_right > 1) && (dim->steps_left == 1);
}
}
static inline void net_dim_turn(struct net_dim *dim)
{
switch (dim->tune_state) {
case NET_DIM_PARKING_ON_TOP:
case NET_DIM_PARKING_TIRED:
break;
case NET_DIM_GOING_RIGHT:
dim->tune_state = NET_DIM_GOING_LEFT;
dim->steps_left = 0;
break;
case NET_DIM_GOING_LEFT:
dim->tune_state = NET_DIM_GOING_RIGHT;
dim->steps_right = 0;
break;
}
}
static inline int net_dim_step(struct net_dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return NET_DIM_TOO_TIRED;
switch (dim->tune_state) {
case NET_DIM_PARKING_ON_TOP:
case NET_DIM_PARKING_TIRED:
break;
case NET_DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return NET_DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case NET_DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return NET_DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return NET_DIM_STEPPED;
}
static inline void net_dim_park_on_top(struct net_dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tired = 0;
dim->tune_state = NET_DIM_PARKING_ON_TOP;
}
static inline void net_dim_park_tired(struct net_dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tune_state = NET_DIM_PARKING_TIRED;
}
static inline void net_dim_exit_parking(struct net_dim *dim)
{
dim->tune_state = dim->profile_ix ? NET_DIM_GOING_LEFT :
NET_DIM_GOING_RIGHT;
net_dim_step(dim);
}
#define IS_SIGNIFICANT_DIFF(val, ref) \
(((100UL * abs((val) - (ref))) / (ref)) > 10) /* more than 10% difference */
static inline int net_dim_stats_compare(struct net_dim_stats *curr,
struct net_dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? NET_DIM_STATS_BETTER :
NET_DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? NET_DIM_STATS_BETTER :
NET_DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? NET_DIM_STATS_BETTER :
NET_DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? NET_DIM_STATS_BETTER :
NET_DIM_STATS_WORSE;
if (!prev->epms)
return NET_DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? NET_DIM_STATS_BETTER :
NET_DIM_STATS_WORSE;
return NET_DIM_STATS_SAME;
}
static inline bool net_dim_decision(struct net_dim_stats *curr_stats,
struct net_dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case NET_DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
if (stats_res != NET_DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case NET_DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case NET_DIM_GOING_RIGHT:
case NET_DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats, &dim->prev_stats);
if (stats_res != NET_DIM_STATS_BETTER)
net_dim_turn(dim);
if (net_dim_on_top(dim)) {
net_dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case NET_DIM_ON_EDGE:
net_dim_park_on_top(dim);
break;
case NET_DIM_TOO_TIRED:
net_dim_park_tired(dim);
break;
}
break;
}
if ((prev_state != NET_DIM_PARKING_ON_TOP) ||
(dim->tune_state != NET_DIM_PARKING_ON_TOP))
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
static inline void net_dim_sample(u16 event_ctr,
u64 packets,
u64 bytes,
struct net_dim_sample *s)
{
s->time = ktime_get();
s->pkt_ctr = packets;
s->byte_ctr = bytes;
s->event_ctr = event_ctr;
}
#define NET_DIM_NEVENTS 64
#define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) & (BIT_ULL(bits) - 1))
static inline void net_dim_calc_stats(struct net_dim_sample *start,
struct net_dim_sample *end,
struct net_dim_stats *curr_stats)
{
/* u32 holds up to 71 minutes, should be enough */
u32 delta_us = ktime_us_delta(end->time, start->time);
u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
start->byte_ctr);
if (!delta_us)
return;
curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
curr_stats->epms = DIV_ROUND_UP(NET_DIM_NEVENTS * USEC_PER_MSEC,
delta_us);
}
static inline void net_dim(struct net_dim *dim,
struct net_dim_sample end_sample)
{
struct net_dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case NET_DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < NET_DIM_NEVENTS)
break;
net_dim_calc_stats(&dim->start_sample, &end_sample,
&curr_stats);
if (net_dim_decision(&curr_stats, dim)) {
dim->state = NET_DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
/* fall through */
case NET_DIM_START_MEASURE:
net_dim_sample(end_sample.event_ctr, end_sample.pkt_ctr, end_sample.byte_ctr,
&dim->start_sample);
dim->state = NET_DIM_MEASURE_IN_PROGRESS;
break;
case NET_DIM_APPLY_NEW_PROFILE:
break;
}
}
#endif /* NET_DIM_H */

View File

@ -562,6 +562,14 @@ config SIGNATURE
Digital signature verification. Currently only RSA is supported.
Implementation is done using GnuPG MPI library
config DIMLIB
bool "DIM library"
default y
help
Dynamic Interrupt Moderation library.
Implements an algorithm for dynamically change CQ modertion values
according to run time performance.
#
# libfdt files, only selected if needed.
#

View File

@ -202,6 +202,7 @@ obj-$(CONFIG_GLOB) += glob.o
obj-$(CONFIG_GLOB_SELFTEST) += globtest.o
obj-$(CONFIG_MPILIB) += mpi/
obj-$(CONFIG_DIMLIB) += dim/
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o

9
lib/dim/Makefile Normal file
View File

@ -0,0 +1,9 @@
#
# DIM Dynamic Interrupt Moderation library
#
obj-$(CONFIG_DIMLIB) = net_dim.o
net_dim-y = \
dim.o \
net_dim.o

83
lib/dim/dim.c Normal file
View File

@ -0,0 +1,83 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
bool dim_on_top(struct dim *dim)
{
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
return true;
case DIM_GOING_RIGHT:
return (dim->steps_left > 1) && (dim->steps_right == 1);
default: /* DIM_GOING_LEFT */
return (dim->steps_right > 1) && (dim->steps_left == 1);
}
}
EXPORT_SYMBOL(dim_on_top);
void dim_turn(struct dim *dim)
{
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
dim->tune_state = DIM_GOING_LEFT;
dim->steps_left = 0;
break;
case DIM_GOING_LEFT:
dim->tune_state = DIM_GOING_RIGHT;
dim->steps_right = 0;
break;
}
}
EXPORT_SYMBOL(dim_turn);
void dim_park_on_top(struct dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tired = 0;
dim->tune_state = DIM_PARKING_ON_TOP;
}
EXPORT_SYMBOL(dim_park_on_top);
void dim_park_tired(struct dim *dim)
{
dim->steps_right = 0;
dim->steps_left = 0;
dim->tune_state = DIM_PARKING_TIRED;
}
EXPORT_SYMBOL(dim_park_tired);
void dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
struct dim_stats *curr_stats)
{
/* u32 holds up to 71 minutes, should be enough */
u32 delta_us = ktime_us_delta(end->time, start->time);
u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
start->byte_ctr);
u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr,
start->comp_ctr);
if (!delta_us)
return;
curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC,
delta_us);
curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us);
if (curr_stats->epms != 0)
curr_stats->cpe_ratio =
(curr_stats->cpms * 100) / curr_stats->epms;
else
curr_stats->cpe_ratio = 0;
}
EXPORT_SYMBOL(dim_calc_stats);

190
lib/dim/net_dim.c Normal file
View File

@ -0,0 +1,190 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/dim.h>
struct dim_cq_moder
net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_rx_moderation);
struct dim_cq_moder
net_dim_get_def_rx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
struct dim_cq_moder
net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
{
struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
cq_moder.cq_period_mode = cq_period_mode;
return cq_moder;
}
EXPORT_SYMBOL(net_dim_get_tx_moderation);
struct dim_cq_moder
net_dim_get_def_tx_moderation(u8 cq_period_mode)
{
u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
}
EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
static int net_dim_step(struct dim *dim)
{
if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
return DIM_TOO_TIRED;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
case DIM_PARKING_TIRED:
break;
case DIM_GOING_RIGHT:
if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
return DIM_ON_EDGE;
dim->profile_ix++;
dim->steps_right++;
break;
case DIM_GOING_LEFT:
if (dim->profile_ix == 0)
return DIM_ON_EDGE;
dim->profile_ix--;
dim->steps_left++;
break;
}
dim->tired++;
return DIM_STEPPED;
}
static void net_dim_exit_parking(struct dim *dim)
{
dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
net_dim_step(dim);
}
static int net_dim_stats_compare(struct dim_stats *curr,
struct dim_stats *prev)
{
if (!prev->bpms)
return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->ppms)
return curr->ppms ? DIM_STATS_BETTER :
DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
if (!prev->epms)
return DIM_STATS_SAME;
if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
DIM_STATS_WORSE;
return DIM_STATS_SAME;
}
static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
{
int prev_state = dim->tune_state;
int prev_ix = dim->profile_ix;
int stats_res;
int step_res;
switch (dim->tune_state) {
case DIM_PARKING_ON_TOP:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_SAME)
net_dim_exit_parking(dim);
break;
case DIM_PARKING_TIRED:
dim->tired--;
if (!dim->tired)
net_dim_exit_parking(dim);
break;
case DIM_GOING_RIGHT:
case DIM_GOING_LEFT:
stats_res = net_dim_stats_compare(curr_stats,
&dim->prev_stats);
if (stats_res != DIM_STATS_BETTER)
dim_turn(dim);
if (dim_on_top(dim)) {
dim_park_on_top(dim);
break;
}
step_res = net_dim_step(dim);
switch (step_res) {
case DIM_ON_EDGE:
dim_park_on_top(dim);
break;
case DIM_TOO_TIRED:
dim_park_tired(dim);
break;
}
break;
}
if (prev_state != DIM_PARKING_ON_TOP ||
dim->tune_state != DIM_PARKING_ON_TOP)
dim->prev_stats = *curr_stats;
return dim->profile_ix != prev_ix;
}
void net_dim(struct dim *dim, struct dim_sample end_sample)
{
struct dim_stats curr_stats;
u16 nevents;
switch (dim->state) {
case DIM_MEASURE_IN_PROGRESS:
nevents = BIT_GAP(BITS_PER_TYPE(u16),
end_sample.event_ctr,
dim->start_sample.event_ctr);
if (nevents < DIM_NEVENTS)
break;
dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats);
if (net_dim_decision(&curr_stats, dim)) {
dim->state = DIM_APPLY_NEW_PROFILE;
schedule_work(&dim->work);
break;
}
/* fall through */
case DIM_START_MEASURE:
dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
end_sample.byte_ctr, &dim->start_sample);
dim->state = DIM_MEASURE_IN_PROGRESS;
break;
case DIM_APPLY_NEW_PROFILE:
break;
}
}
EXPORT_SYMBOL(net_dim);