From 34860d9a50d0a62473d89b8753107e34c241c74d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 May 2019 16:06:38 +0100 Subject: welch --- ministat.c | 87 +++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 23 deletions(-) diff --git a/ministat.c b/ministat.c index e1c5982..96fb407 100644 --- a/ministat.c +++ b/ministat.c @@ -311,33 +311,74 @@ Vitals(struct dataset *ds, int flag) printf("\n"); } +static double dof(struct dataset *a, struct dataset *b) +{ + double v; + + v = Var(a) / a->sn + Var(b) / b->sn; + v *= v; + + v /= (Var(a) * Var(a) / (a->sn * a->sn * (a->sn - 1)) + + (Var(b) * Var(b) / (b->sn * b->sn * (b->sn - 1)))); + + return v; +} + +static double welch(struct dataset *a, struct dataset *b) +{ + return (Avg(a) - Avg(b)) / sqrt(Var(a) / a->n + Var(b) / b->n); +} + +struct welch_pvalue { + double w, v, gamma; +}; + +static double incomplete_bessel(double x, void *data) +{ + struct welch_pvalue *pv = data; + + return pow(x, pv->v / 2 - 1) / sqrt(1 - x) / pv->gamma; +} + +static double simpson(int n, double upper, + double (*fn)(double, void *), void *data) +{ + double dx = upper / n, x = dx; + double sum = 0; + + sum += fn(0, data) * dx; + sum += fn(dx / 2, data) * dx * 4; + while (--n) { + sum += fn(x, data) * dx * 2; + sum += fn(x + dx / 2, data) * dx * 4; + x += dx; + } + sum += fn(upper, data) * dx; + + return sum / 6; +} + +static double pvalue(double w, double v) +{ + struct welch_pvalue pv = { + .w = w, + .v = v, + .gamma = exp(lgamma(v / 2) + lgamma(.5) - lgamma(v / 2 + .5)), + }; + + return simpson(2000, v / (w * w + v), incomplete_bessel, &pv); +} + static void Relative(struct dataset *ds, struct dataset *rs, int confidx) { - double spool, s, d, e, t; - int i; + double p = 100 *(1 - pvalue(welch(ds, rs), dof(ds, rs))); - i = ds->sn + rs->sn - 2; - if (i > NSTUDENT) - t = student[0][confidx]; - else - t = student[i][confidx]; - spool = (ds->sn - 1) * Var(ds) + (rs->sn - 1) * Var(rs); - spool /= ds->sn + rs->sn - 2; - spool = sqrt(spool); - s = spool * sqrt(1.0 / ds->sn + 1.0 / rs->sn); - d = Avg(ds) - Avg(rs); - e = t * s; - - if (fabs(d) > e) { - - printf("Difference at %.1f%% confidence\n", studentpct[confidx]); - printf(" %g +/- %g\n", d, e); - printf(" %g%% +/- %g%%\n", d * 100 / Avg(rs), e * 100 / Avg(rs)); - printf(" (Student's t, pooled s = %g)\n", spool); - } else { - printf("No difference proven at %.1f%% confidence\n", - studentpct[confidx]); + if (p > studentpct[confidx]) { + printf("Difference at %.1f%% confidence: %g (%.2f%%)\n", + p, + Avg(ds) - Avg(rs), + (Avg(ds) - Avg(rs)) / Avg(ds) * 100); } } -- cgit v1.2.3