diff options
author | Edward Cree <ecree@solarflare.com> | 2014-04-16 19:27:48 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-04-16 14:33:57 -0400 |
commit | e283546c0465dd3026bc94f7b1a9de7f6b8969ec (patch) | |
tree | 3828d4faeed3986b0f01b93416b910b11cd33280 /drivers/net/ethernet/sfc/mcdi.c | |
parent | 10ec34fcb100412ab186c141a9c3557d1270effd (diff) |
sfc:On MCDI timeout, issue an FLR (and mark MCDI to fail-fast)
When an MCDI command times out (whether or not we find it
completed when we poll), call efx_mcdi_abandon(), which tells
all subsequent MCDI calls to fail-fast, and queues up an FLR.
Because an FLR doesn't lead to receiving any reboot even from
the MC (unlike most other types of reset), we have to call
efx_ef10_reset_mc_allocations.
In efx_start_all(), if a reset (of any kind) is pending, we
bail out.
Without this, attempts to reconfigure (e.g. change mtu) can
cause driver/mc state inconsistency if the first MCDI call
triggers an FLR.
For similar reasons, on EF10, in
efx_reset_down(method=RESET_TYPE_MCDI_TIMEOUT), set the number
of active queues to zero before calling efx_stop_all().
And, on farch, in efx_reset_up(method=RESET_TYPE_MCDI_TIMEOUT),
set active_queues and flushes pending & outstanding to zero.
efx_mcdi_mode_{poll,event}() should not take us out of fail-fast
mode. Instead, this is done by efx_mcdi_reset() after the FLR
completes.
The new FLR reset_type RESET_TYPE_MCDI_TIMEOUT doesn't really
fit into the hierarchy of reset 'scopes' whereby efx_reset()
decides some resets subsume others. Thus, it uses separate logic.
Also, fixed up some inconsistency around RESET_TYPE_MC_BIST,
which was in the wrong place in that hierarchy.
Signed-off-by: Shradha Shah <sshah@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/sfc/mcdi.c')
-rw-r--r-- | drivers/net/ethernet/sfc/mcdi.c | 55 |
1 files changed, 44 insertions, 11 deletions
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 7bd4b14bf3b3..5239cf9bdc56 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -52,12 +52,7 @@ static void efx_mcdi_timeout_async(unsigned long context); static int efx_mcdi_drv_attach(struct efx_nic *efx, bool driver_operating, bool *was_attached_out); static bool efx_mcdi_poll_once(struct efx_nic *efx); - -static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx) -{ - EFX_BUG_ON_PARANOID(!efx->mcdi); - return &efx->mcdi->iface; -} +static void efx_mcdi_abandon(struct efx_nic *efx); int efx_mcdi_init(struct efx_nic *efx) { @@ -558,6 +553,8 @@ static int _efx_mcdi_rpc_finish(struct efx_nic *efx, unsigned cmd, size_t inlen, rc = 0; } + efx_mcdi_abandon(efx); + /* Close the race with efx_mcdi_ev_cpl() executing just too late * and completing a request we've just cancelled, by ensuring * that the seqno check therein fails. @@ -672,6 +669,9 @@ int efx_mcdi_rpc_start(struct efx_nic *efx, unsigned cmd, if (efx->mc_bist_for_other_fn) return -ENETDOWN; + if (mcdi->mode == MCDI_MODE_FAIL) + return -ENETDOWN; + efx_mcdi_acquire_sync(mcdi); efx_mcdi_send_request(efx, cmd, inbuf, inlen); return 0; @@ -812,7 +812,11 @@ void efx_mcdi_mode_poll(struct efx_nic *efx) return; mcdi = efx_mcdi(efx); - if (mcdi->mode == MCDI_MODE_POLL) + /* If already in polling mode, nothing to do. + * If in fail-fast state, don't switch to polled completion. + * FLR recovery will do that later. + */ + if (mcdi->mode == MCDI_MODE_POLL || mcdi->mode == MCDI_MODE_FAIL) return; /* We can switch from event completion to polled completion, because @@ -841,8 +845,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) mcdi = efx_mcdi(efx); - /* We must be in polling mode so no more requests can be queued */ - BUG_ON(mcdi->mode != MCDI_MODE_POLL); + /* We must be in poll or fail mode so no more requests can be queued */ + BUG_ON(mcdi->mode == MCDI_MODE_EVENTS); del_timer_sync(&mcdi->async_timer); @@ -875,8 +879,11 @@ void efx_mcdi_mode_event(struct efx_nic *efx) return; mcdi = efx_mcdi(efx); - - if (mcdi->mode == MCDI_MODE_EVENTS) + /* If already in event completion mode, nothing to do. + * If in fail-fast state, don't switch to event completion. FLR + * recovery will do that later. + */ + if (mcdi->mode == MCDI_MODE_EVENTS || mcdi->mode == MCDI_MODE_FAIL) return; /* We can't switch from polled to event completion in the middle of a @@ -966,6 +973,19 @@ static void efx_mcdi_ev_bist(struct efx_nic *efx) spin_unlock(&mcdi->iface_lock); } +/* MCDI timeouts seen, so make all MCDI calls fail-fast and issue an FLR to try + * to recover. + */ +static void efx_mcdi_abandon(struct efx_nic *efx) +{ + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + + if (xchg(&mcdi->mode, MCDI_MODE_FAIL) == MCDI_MODE_FAIL) + return; /* it had already been done */ + netif_dbg(efx, hw, efx->net_dev, "MCDI is timing out; trying to recover\n"); + efx_schedule_reset(efx, RESET_TYPE_MCDI_TIMEOUT); +} + /* Called from falcon_process_eventq for MCDI events */ void efx_mcdi_process_event(struct efx_channel *channel, efx_qword_t *event) @@ -1512,6 +1532,19 @@ int efx_mcdi_reset(struct efx_nic *efx, enum reset_type method) { int rc; + /* If MCDI is down, we can't handle_assertion */ + if (method == RESET_TYPE_MCDI_TIMEOUT) { + rc = pci_reset_function(efx->pci_dev); + if (rc) + return rc; + /* Re-enable polled MCDI completion */ + if (efx->mcdi) { + struct efx_mcdi_iface *mcdi = efx_mcdi(efx); + mcdi->mode = MCDI_MODE_POLL; + } + return 0; + } + /* Recover from a failed assertion pre-reset */ rc = efx_mcdi_handle_assertion(efx); if (rc) |