summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPraveen Kumar Kannoju <praveen.kannoju@oracle.com>2024-05-08 19:06:17 +0530
committerJakub Kicinski <kuba@kernel.org>2024-05-09 20:24:13 -0700
commit33fb988b67050d9bb512f77f08453fa00088943c (patch)
tree5651c6f9c19265e56528c1d519a7ffb61dd6b013
parent60e0f986e89f10f2de874ff3ce8e2230701c9706 (diff)
net/sched: adjust device watchdog timer to detect stopped queue at right time
Applications are sensitive to long network latency, particularly heartbeat monitoring ones. Longer the tx timeout recovery higher the risk with such applications on a production machines. This patch remedies, yet honoring device set tx timeout. Modify watchdog next timeout to be shorter than the device specified. Compute the next timeout be equal to device watchdog timeout less the how long ago queue stop had been done. At next watchdog timeout tx timeout handler is called into if still in stopped state. Either called or not called, restore the watchdog timeout back to device specified. Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com> Link: https://lore.kernel.org/r/20240508133617.4424-1-praveen.kannoju@oracle.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--net/sched/sch_generic.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d3f6006b563c..2a637a17061b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -506,19 +506,22 @@ static void dev_watchdog(struct timer_list *t)
unsigned int timedout_ms = 0;
unsigned int i;
unsigned long trans_start;
+ unsigned long oldest_start = jiffies;
for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
trans_start = READ_ONCE(txq->trans_start);
- if (netif_xmit_stopped(txq) &&
- time_after(jiffies, (trans_start +
- dev->watchdog_timeo))) {
+ if (!netif_xmit_stopped(txq))
+ continue;
+ if (time_after(jiffies, trans_start + dev->watchdog_timeo)) {
timedout_ms = jiffies_to_msecs(jiffies - trans_start);
atomic_long_inc(&txq->trans_timeout);
break;
}
+ if (time_after(oldest_start, trans_start))
+ oldest_start = trans_start;
}
if (unlikely(timedout_ms)) {
@@ -531,7 +534,7 @@ static void dev_watchdog(struct timer_list *t)
netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies +
+ round_jiffies(oldest_start +
dev->watchdog_timeo)))
release = false;
}