From 4ba67ef3a1fbb7d8dc5f00de9b93a583d05b38cc Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 11 Apr 2024 12:22:32 -0700 Subject: [PATCH] net: dqs: make struct dql more cache efficient With the previous change, struct dqs->stall_thrs will be in the hot path (at queue side), even if DQS is disabled. The other fields accessed in this function (last_obj_cnt and num_queued) are in the first cache line, let's move this field (stall_thrs) to the very first cache line, since there is a hole there. This does not change the structure size, since it moves an short (2 bytes) to 4-bytes whole in the first cache line. This is the new structure format now: struct dql { unsigned int num_queued; unsigned int last_obj_cnt; ... short unsigned int stall_thrs; /* XXX 2 bytes hole, try to pack */ ... /* --- cacheline 1 boundary (64 bytes) --- */ ... /* Longest stall detected, reported to user */ short unsigned int stall_max; /* XXX 2 bytes hole, try to pack */ }; Also, read the stall_thrs (now in the very first cache line) earlier, together with dql->num_queued (also in the first cache line). Suggested-by: Jakub Kicinski Suggested-by: Eric Dumazet Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20240411192241.2498631-5-leitao@debian.org Signed-off-by: Jakub Kicinski --- include/linux/dynamic_queue_limits.h | 5 +++-- lib/dynamic_queue_limits.c | 13 +++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 869afb800ea1..281298e77a15 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -50,6 +50,9 @@ struct dql { unsigned int adj_limit; /* limit + num_completed */ unsigned int last_obj_cnt; /* Count at last queuing */ + /* Stall threshold (in jiffies), defined by user */ + unsigned short stall_thrs; + unsigned long history_head; /* top 58 bits of jiffies */ /* stall entries, a bit per entry */ unsigned long history[DQL_HIST_LEN]; @@ -71,8 +74,6 @@ struct dql { unsigned int min_limit; /* Minimum limit */ unsigned int slack_hold_time; /* Time to measure slack */ - /* Stall threshold (in jiffies), defined by user */ - unsigned short stall_thrs; /* Longest stall detected, reported to user */ unsigned short stall_max; unsigned long last_reap; /* Last reap (in jiffies) */ diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index a1389db1c30a..e49deddd3de9 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -15,12 +15,10 @@ #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) -static void dql_check_stall(struct dql *dql) +static void dql_check_stall(struct dql *dql, unsigned short stall_thrs) { - unsigned short stall_thrs; unsigned long now; - stall_thrs = READ_ONCE(dql->stall_thrs); if (!stall_thrs) return; @@ -86,9 +84,16 @@ void dql_completed(struct dql *dql, unsigned int count) { unsigned int inprogress, prev_inprogress, limit; unsigned int ovlimit, completed, num_queued; + unsigned short stall_thrs; bool all_prev_completed; num_queued = READ_ONCE(dql->num_queued); + /* Read stall_thrs in advance since it belongs to the same (first) + * cache line as ->num_queued. This way, dql_check_stall() does not + * need to touch the first cache line again later, reducing the window + * of possible false sharing. + */ + stall_thrs = READ_ONCE(dql->stall_thrs); /* Can't complete more than what's in queue */ BUG_ON(count > num_queued - dql->num_completed); @@ -178,7 +183,7 @@ void dql_completed(struct dql *dql, unsigned int count) dql->num_completed = completed; dql->prev_num_queued = num_queued; - dql_check_stall(dql); + dql_check_stall(dql, stall_thrs); } EXPORT_SYMBOL(dql_completed);