diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2019-05-18 15:42:35 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2019-05-18 16:48:48 +0100 |
commit | ebc05fe448516d20167d9a62e975511977739e0e (patch) | |
tree | 619bd89562821f3cc24a746f436b7b1e1c844149 | |
parent | 96282bea5983aaa475d81b916e50f108a21467a0 (diff) |
tukey
-rw-r--r-- | ministat.c | 44 |
1 files changed, 25 insertions, 19 deletions
@@ -211,25 +211,25 @@ Stddev(struct dataset *ds) static int tukey(struct dataset *ds, int *low, int *high) { - int range = *high - *low; - int mid = range / 2 + *low; - double iqm = (ds->points[mid] + ds->points[ds->count - mid - 1]) / 2; - double iqr = ds->points[3*range/4 + *low] - ds->points[(range + 3)/4 + *low]; - double lower = iqm - 1.5 * iqr; - double upper = iqm + 1.5 * iqr; - int shrunk = 0; - int i; + double iqr = ds->points[(3**high + *low)/4] - ds->points[(*high + 3**low + 3)/4]; - for (i = *low; i < *high; i++) { - double a = ds->points[i]; + double median = Median(ds, *low, *high); + double dlow = fabs(ds->points[*low] - median); + double dhigh = fabs(ds->points[*high - 1] - median); - if (a < lower) - shrunk = 1, *low = i + 1; - if (a > upper) - shrunk = 1, *high = i; + if (dlow > dhigh) { + if (dlow > 1.5 * iqr) { + ++*low; + return 1; + } + } else { + if (dhigh > 1.5 * iqr) { + --*high; + return 1; + } } - return shrunk; + return 0; } static double tau(struct dataset *ds) @@ -276,12 +276,12 @@ thompson(struct dataset *ds, int *low, int *high) return 0; } -static void +static int FilterOutliers(struct dataset *ds) { int i, low = 0, high = ds->count; - while (tukey(ds, &low, &high)) + while (high > low && tukey(ds, &low, &high)) ; for (i = low; i < high; i++) { @@ -292,10 +292,11 @@ FilterOutliers(struct dataset *ds) ds->syy += a * a; } - while (thompson(ds, &low, &high)) + while (ds->n > 5 && thompson(ds, &low, &high)) ; ds->median = Median(ds, low, high); + return high - low; } static void @@ -664,8 +665,13 @@ ReadSet(const char *n, int column, const char *delim) "Dataset %s must contain at least 7 data points\n", n); return NULL; } + qsort(s->points, s->count, sizeof *s->points, dbl_cmp); - FilterOutliers(s); + if (FilterOutliers(s) < 3) { + fprintf(stderr, + "Dataset %s must contain at least 3 filtered data points\n", n); + return NULL; + } return (s); } |