summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2019-05-18 15:42:35 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2019-05-18 16:48:48 +0100
commitebc05fe448516d20167d9a62e975511977739e0e (patch)
tree619bd89562821f3cc24a746f436b7b1e1c844149
parent96282bea5983aaa475d81b916e50f108a21467a0 (diff)
tukey
-rw-r--r--ministat.c44
1 files changed, 25 insertions, 19 deletions
diff --git a/ministat.c b/ministat.c
index 3d2b5b1..52a7745 100644
--- a/ministat.c
+++ b/ministat.c
@@ -211,25 +211,25 @@ Stddev(struct dataset *ds)
static int
tukey(struct dataset *ds, int *low, int *high)
{
- int range = *high - *low;
- int mid = range / 2 + *low;
- double iqm = (ds->points[mid] + ds->points[ds->count - mid - 1]) / 2;
- double iqr = ds->points[3*range/4 + *low] - ds->points[(range + 3)/4 + *low];
- double lower = iqm - 1.5 * iqr;
- double upper = iqm + 1.5 * iqr;
- int shrunk = 0;
- int i;
+ double iqr = ds->points[(3**high + *low)/4] - ds->points[(*high + 3**low + 3)/4];
- for (i = *low; i < *high; i++) {
- double a = ds->points[i];
+ double median = Median(ds, *low, *high);
+ double dlow = fabs(ds->points[*low] - median);
+ double dhigh = fabs(ds->points[*high - 1] - median);
- if (a < lower)
- shrunk = 1, *low = i + 1;
- if (a > upper)
- shrunk = 1, *high = i;
+ if (dlow > dhigh) {
+ if (dlow > 1.5 * iqr) {
+ ++*low;
+ return 1;
+ }
+ } else {
+ if (dhigh > 1.5 * iqr) {
+ --*high;
+ return 1;
+ }
}
- return shrunk;
+ return 0;
}
static double tau(struct dataset *ds)
@@ -276,12 +276,12 @@ thompson(struct dataset *ds, int *low, int *high)
return 0;
}
-static void
+static int
FilterOutliers(struct dataset *ds)
{
int i, low = 0, high = ds->count;
- while (tukey(ds, &low, &high))
+ while (high > low && tukey(ds, &low, &high))
;
for (i = low; i < high; i++) {
@@ -292,10 +292,11 @@ FilterOutliers(struct dataset *ds)
ds->syy += a * a;
}
- while (thompson(ds, &low, &high))
+ while (ds->n > 5 && thompson(ds, &low, &high))
;
ds->median = Median(ds, low, high);
+ return high - low;
}
static void
@@ -664,8 +665,13 @@ ReadSet(const char *n, int column, const char *delim)
"Dataset %s must contain at least 7 data points\n", n);
return NULL;
}
+
qsort(s->points, s->count, sizeof *s->points, dbl_cmp);
- FilterOutliers(s);
+ if (FilterOutliers(s) < 3) {
+ fprintf(stderr,
+ "Dataset %s must contain at least 3 filtered data points\n", n);
+ return NULL;
+ }
return (s);
}