summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/ata/libata-eh.c184
-rw-r--r--include/linux/libata.h1
2 files changed, 125 insertions, 60 deletions
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 1abfdba8d99b..31738627ec64 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -44,6 +44,12 @@
#include "libata.h"
+enum {
+ ATA_EH_SPDN_NCQ_OFF = (1 << 0),
+ ATA_EH_SPDN_SPEED_DOWN = (1 << 1),
+ ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2),
+};
+
static void __ata_port_freeze(struct ata_port *ap);
static void ata_eh_finish(struct ata_port *ap);
static void ata_eh_handle_port_suspend(struct ata_port *ap);
@@ -65,12 +71,9 @@ static void ata_ering_record(struct ata_ering *ering, int is_io,
ent->timestamp = get_jiffies_64();
}
-static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
+static void ata_ering_clear(struct ata_ering *ering)
{
- struct ata_ering_entry *ent = &ering->ring[ering->cursor];
- if (!ent->err_mask)
- return NULL;
- return ent;
+ memset(ering, 0, sizeof(*ering));
}
static int ata_ering_map(struct ata_ering *ering,
@@ -1159,87 +1162,99 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
return action;
}
-static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
+static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
{
- if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
+ if (err_mask & AC_ERR_ATA_BUS)
return 1;
- if (ent->is_io) {
- if (ent->err_mask & AC_ERR_HSM)
- return 1;
- if ((ent->err_mask &
- (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+ if (err_mask & AC_ERR_TIMEOUT)
+ return 2;
+
+ if (is_io) {
+ if (err_mask & AC_ERR_HSM)
return 2;
+ if ((err_mask &
+ (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
+ return 3;
}
return 0;
}
-struct speed_down_needed_arg {
+struct speed_down_verdict_arg {
u64 since;
- int nr_errors[3];
+ int nr_errors[4];
};
-static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
+static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
{
- struct speed_down_needed_arg *arg = void_arg;
+ struct speed_down_verdict_arg *arg = void_arg;
+ int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);
if (ent->timestamp < arg->since)
return -1;
- arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
+ arg->nr_errors[cat]++;
return 0;
}
/**
- * ata_eh_speed_down_needed - Determine wheter speed down is necessary
+ * ata_eh_speed_down_verdict - Determine speed down verdict
* @dev: Device of interest
*
* This function examines error ring of @dev and determines
- * whether speed down is necessary. Speed down is necessary if
- * there have been more than 3 of Cat-1 errors or 10 of Cat-2
- * errors during last 15 minutes.
+ * whether NCQ needs to be turned off, transfer speed should be
+ * stepped down, or falling back to PIO is necessary.
+ *
+ * Cat-1 is ATA_BUS error for any command.
*
- * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
- * violation for known supported commands.
+ * Cat-2 is TIMEOUT for any command or HSM violation for known
+ * supported commands.
*
- * Cat-2 errors are unclassified DEV error for known supported
+ * Cat-3 is is unclassified DEV error for known supported
* command.
*
+ * NCQ needs to be turned off if there have been more than 3
+ * Cat-2 + Cat-3 errors during last 10 minutes.
+ *
+ * Speed down is necessary if there have been more than 3 Cat-1 +
+ * Cat-2 errors or 10 Cat-3 errors during last 10 minutes.
+ *
+ * Falling back to PIO mode is necessary if there have been more
+ * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes.
+ *
* LOCKING:
* Inherited from caller.
*
* RETURNS:
- * 1 if speed down is necessary, 0 otherwise
+ * OR of ATA_EH_SPDN_* flags.
*/
-static int ata_eh_speed_down_needed(struct ata_device *dev)
+static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
{
- const u64 interval = 15LLU * 60 * HZ;
- static const int err_limits[3] = { -1, 3, 10 };
- struct speed_down_needed_arg arg;
- struct ata_ering_entry *ent;
- int err_cat;
- u64 j64;
+ const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
+ u64 j64 = get_jiffies_64();
+ struct speed_down_verdict_arg arg;
+ unsigned int verdict = 0;
- ent = ata_ering_top(&dev->ering);
- if (!ent)
- return 0;
+ /* scan past 10 mins of error history */
+ memset(&arg, 0, sizeof(arg));
+ arg.since = j64 - min(j64, j10mins);
+ ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
- err_cat = ata_eh_categorize_ering_entry(ent);
- if (err_cat == 0)
- return 0;
+ if (arg.nr_errors[2] + arg.nr_errors[3] > 3)
+ verdict |= ATA_EH_SPDN_NCQ_OFF;
+ if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)
+ verdict |= ATA_EH_SPDN_SPEED_DOWN;
+ /* scan past 3 mins of error history */
memset(&arg, 0, sizeof(arg));
+ arg.since = j64 - min(j64, j5mins);
+ ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
- j64 = get_jiffies_64();
- if (j64 >= interval)
- arg.since = j64 - interval;
- else
- arg.since = 0;
-
- ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
+ if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10)
+ verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
- return arg.nr_errors[err_cat] > err_limits[err_cat];
+ return verdict;
}
/**
@@ -1257,31 +1272,80 @@ static int ata_eh_speed_down_needed(struct ata_device *dev)
* Kernel thread context (may sleep).
*
* RETURNS:
- * 0 on success, -errno otherwise
+ * Determined recovery action.
*/
-static int ata_eh_speed_down(struct ata_device *dev, int is_io,
- unsigned int err_mask)
+static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
+ unsigned int err_mask)
{
- if (!err_mask)
+ unsigned int verdict;
+ unsigned int action = 0;
+
+ /* don't bother if Cat-0 error */
+ if (ata_eh_categorize_error(is_io, err_mask) == 0)
return 0;
/* record error and determine whether speed down is necessary */
ata_ering_record(&dev->ering, is_io, err_mask);
+ verdict = ata_eh_speed_down_verdict(dev);
- if (!ata_eh_speed_down_needed(dev))
- return 0;
+ /* turn off NCQ? */
+ if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
+ (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
+ ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
+ dev->flags |= ATA_DFLAG_NCQ_OFF;
+ ata_dev_printk(dev, KERN_WARNING,
+ "NCQ disabled due to excessive errors\n");
+ goto done;
+ }
+
+ /* speed down? */
+ if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
+ /* speed down SATA link speed if possible */
+ if (sata_down_spd_limit(dev->ap) == 0) {
+ action |= ATA_EH_HARDRESET;
+ goto done;
+ }
- /* speed down SATA link speed if possible */
- if (sata_down_spd_limit(dev->ap) == 0)
- return ATA_EH_HARDRESET;
+ /* lower transfer mode */
+ if (dev->spdn_cnt < 2) {
+ static const int dma_dnxfer_sel[] =
+ { ATA_DNXFER_DMA, ATA_DNXFER_40C };
+ static const int pio_dnxfer_sel[] =
+ { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
+ int sel;
- /* lower transfer mode */
- if (ata_down_xfermask_limit(dev, ATA_DNXFER_ANY) == 0)
- return ATA_EH_SOFTRESET;
+ if (dev->xfer_shift != ATA_SHIFT_PIO)
+ sel = dma_dnxfer_sel[dev->spdn_cnt];
+ else
+ sel = pio_dnxfer_sel[dev->spdn_cnt];
+
+ dev->spdn_cnt++;
+
+ if (ata_down_xfermask_limit(dev, sel) == 0) {
+ action |= ATA_EH_SOFTRESET;
+ goto done;
+ }
+ }
+ }
+
+ /* Fall back to PIO? Slowing down to PIO is meaningless for
+ * SATA. Consider it only for PATA.
+ */
+ if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
+ (dev->ap->cbl != ATA_CBL_SATA) &&
+ (dev->xfer_shift != ATA_SHIFT_PIO)) {
+ if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
+ dev->spdn_cnt = 0;
+ action |= ATA_EH_SOFTRESET;
+ goto done;
+ }
+ }
- ata_dev_printk(dev, KERN_ERR,
- "speed down requested but no transfer mode left\n");
return 0;
+ done:
+ /* device has been slowed down, blow error history */
+ ata_ering_clear(&dev->ering);
+ return action;
}
/**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 60dfc5f7e882..5db50fa594ac 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -495,6 +495,7 @@ struct ata_device {
/* error history */
struct ata_ering ering;
+ int spdn_cnt;
unsigned int horkage; /* List of broken features */
#ifdef CONFIG_SATA_ACPI
/* ACPI objects info */