From a0e026e7b37e997f4fa3fcaa714e5484f3ce9e75 Mon Sep 17 00:00:00 2001 From: Serge Semin Date: Fri, 18 Aug 2023 15:54:45 +0300 Subject: net: phy: Fix deadlocking in phy_error() invocation Since commit 91a7cda1f4b8 ("net: phy: Fix race condition on link status change") all the phy_error() method invocations have been causing the nested-mutex-lock deadlock because it's normally done in the PHY-driver threaded IRQ handlers which since that change have been called with the phydev->lock mutex held. Here is the calls thread: IRQ: phy_interrupt() +-> mutex_lock(&phydev->lock); <--------------------+ drv->handle_interrupt() | Deadlock due +-> ERROR: phy_error() + to the nested +-> phy_process_error() | mutex lock +-> mutex_lock(&phydev->lock); <-+ phydev->state = PHY_ERROR; mutex_unlock(&phydev->lock); mutex_unlock(&phydev->lock); The problem can be easily reproduced just by calling phy_error() from any PHY-device threaded interrupt handler. Fix it by dropping the phydev->lock mutex lock from the phy_process_error() method and printing a nasty error message to the system log if the mutex isn't held in the caller execution context. Note for the fix to work correctly in the PHY-subsystem itself the phydev->lock mutex locking must be added to the phy_error_precise() function. Link: https://lore.kernel.org/netdev/20230816180944.19262-1-fancer.lancer@gmail.com Fixes: 91a7cda1f4b8 ("net: phy: Fix race condition on link status change") Suggested-by: Andrew Lunn Signed-off-by: Serge Semin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index bdf00b2b2c1d..a9ecfdd19624 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1184,9 +1184,11 @@ void phy_stop_machine(struct phy_device *phydev) static void phy_process_error(struct phy_device *phydev) { - mutex_lock(&phydev->lock); + /* phydev->lock must be held for the state change to be safe */ + if (!mutex_is_locked(&phydev->lock)) + phydev_err(phydev, "PHY-device data unsafe context\n"); + phydev->state = PHY_ERROR; - mutex_unlock(&phydev->lock); phy_trigger_machine(phydev); } @@ -1195,7 +1197,9 @@ static void phy_error_precise(struct phy_device *phydev, const void *func, int err) { WARN(1, "%pS: returned: %d\n", func, err); + mutex_lock(&phydev->lock); phy_process_error(phydev); + mutex_unlock(&phydev->lock); } /** @@ -1204,8 +1208,7 @@ static void phy_error_precise(struct phy_device *phydev, * * Moves the PHY to the ERROR state in response to a read * or write error, and tells the controller the link is down. - * Must not be called from interrupt context, or while the - * phydev->lock is held. + * Must be called with phydev->lock held. */ void phy_error(struct phy_device *phydev) { -- cgit v1.2.3