diff options
author | Jehan <jehan@girinstud.io> | 2021-11-09 15:05:38 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 00:24:53 +0100 |
commit | a3ff09bece9ee0e787d1964bda06c9ea341a8982 (patch) | |
tree | 908f07cb8e7b6910bbfe0c75fa364edcc54e57c2 | |
parent | c9446e540dda64cfdb9cb380f5cfaf6fa4378e84 (diff) |
test: improve test error output even more.
Adding the found confidence, but also the confidence matched by the
expected (lang, charset) couple, and its candidate order, if it even
matched.
-rw-r--r-- | test/uchardet-tests.c | 69 |
1 files changed, 61 insertions, 8 deletions
diff --git a/test/uchardet-tests.c b/test/uchardet-tests.c index 63cd5d9..5d9ee01 100644 --- a/test/uchardet-tests.c +++ b/test/uchardet-tests.c @@ -37,6 +37,7 @@ #include <assert.h> #include <ctype.h> +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -53,7 +54,10 @@ #endif void -detect(FILE *fp, char **charset, char **lang) +detect(FILE *fp, + char *expected_charset, char *expected_lang, + float *expected_confidence, size_t *expected_candidate, + char **charset, char **lang, float *confidence) { uchardet_t handle = uchardet_new(); char buffer[BUFFER_SIZE]; @@ -79,12 +83,42 @@ detect(FILE *fp, char **charset, char **lang) *lang = strdup(uchardet_get_language(handle, 0)); else *lang = NULL; + *confidence = uchardet_get_confidence(handle, 0); for (i = 0; (*charset)[i]; i++) { /* Our test files are lowercase. */ (*charset)[i] = tolower((*charset)[i]); } + if (strcmp(expected_charset, *charset) != 0 || + *lang == NULL || + strcmp(expected_lang, *lang) != 0) + { + size_t n_candidates = uchardet_get_candidates(handle); + + *expected_confidence = 0.0f; + *expected_candidate = SIZE_MAX; + for (i = 0; i < n_candidates; i++) + { + if (uchardet_get_language(handle, i) == NULL) + /* Skip this case for now. */ + continue; + + if (strcasecmp(expected_charset, uchardet_get_encoding(handle, i)) == 0 && + strcasecmp(expected_lang, uchardet_get_language(handle, i)) == 0) + { + *expected_confidence = uchardet_get_confidence(handle, i); + *expected_candidate = i; + break; + } + } + } + else + { + *expected_confidence = *confidence; + *expected_candidate = 0; + } + uchardet_delete(handle); } @@ -98,6 +132,9 @@ main(int argc, char ** argv) char *expected_lang = NULL; char *charset; char *lang; + float expected_confidence = 0.0f; + float confidence = 0.0f; + size_t expected_candidate = 0; /* In a unit test, 0 means success, other returned values mean failure. */ int success = 1; @@ -133,7 +170,7 @@ main(int argc, char ** argv) assert(expected_lang); expected_lang++; - detect(f, &charset, &lang); + detect(f, expected_charset, expected_lang, &expected_confidence, &expected_candidate, &charset, &lang, &confidence); fclose (f); /* No lang detection is a failure, except for a few charset for @@ -145,19 +182,35 @@ main(int argc, char ** argv) strcmp(expected_charset, "utf-16") == 0 || strcmp(expected_charset, "utf-32") == 0) { - success = (strcmp(charset, expected_charset) != 0); + success = (strcmp(charset, expected_charset) != 0); } else if (lang) { - success = (strcmp(charset, expected_charset) != 0) + - (strcmp(lang, expected_lang) != 0); + success = (strcmp(charset, expected_charset) != 0) + + (strcmp(lang, expected_lang) != 0); } - if (success == 1) + if (success != 0) { + char *candidate_str = NULL; + + if (expected_confidence != 0.0f) + { + size_t candidate_len; + + candidate_len = snprintf(candidate_str, 0, "%d", expected_candidate + 1); + candidate_str = malloc(candidate_len + 1); + snprintf(candidate_str, candidate_len + 1, "%d", expected_candidate + 1); + } + fprintf(stderr, - "uchardet-tests FAILED: found %s/%s - expecting %s/%s\n", - lang, charset, expected_lang, expected_charset); + "uchardet-tests FAILED: found %s/%s (confidence: %f) - expecting %s/%s (confidence: %f - candidate: %s)\n", + lang ? lang : "n/a", charset, confidence, + expected_lang, expected_charset, expected_confidence, + candidate_str == NULL ? "n/a" : candidate_str); + + if (candidate_str != NULL) + free(candidate_str); } free(path); |