test: improve test error output even more.

Adding the found confidence, but also the confidence matched by the expected (lang, charset) couple, and its candidate order, if it even matched.
author: Jehan <jehan@girinstud.io> 2021-11-09 15:05:38 +0100
committer: Jehan <jehan@girinstud.io> 2022-12-14 00:24:53 +0100
commit: a3ff09bece9ee0e787d1964bda06c9ea341a8982 (patch)
tree: 908f07cb8e7b6910bbfe0c75fa364edcc54e57c2
parent: c9446e540dda64cfdb9cb380f5cfaf6fa4378e84 (diff)
1 files changed, 61 insertions, 8 deletions
diff --git a/test/uchardet-tests.c b/test/uchardet-tests.c
index 63cd5d9..5d9ee01 100644
--- a/test/uchardet-tests.c
+++ b/test/uchardet-tests.c
@@ -37,6 +37,7 @@
 
 #include <assert.h>
 #include <ctype.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -53,7 +54,10 @@
 #endif
 
 void
-detect(FILE *fp, char **charset, char **lang)
+detect(FILE *fp,
+       char *expected_charset, char *expected_lang,
+       float *expected_confidence, size_t *expected_candidate,
+       char **charset, char **lang, float *confidence)
 {
     uchardet_t  handle = uchardet_new();
     char        buffer[BUFFER_SIZE];
@@ -79,12 +83,42 @@ detect(FILE *fp, char **charset, char **lang)
       *lang = strdup(uchardet_get_language(handle, 0));
     else
       *lang = NULL;
+    *confidence = uchardet_get_confidence(handle, 0);
     for (i = 0; (*charset)[i]; i++)
     {
         /* Our test files are lowercase. */
         (*charset)[i] = tolower((*charset)[i]);
     }
 
+    if (strcmp(expected_charset, *charset) != 0 ||
+        *lang == NULL                           ||
+        strcmp(expected_lang, *lang) != 0)
+    {
+        size_t n_candidates = uchardet_get_candidates(handle);
+
+        *expected_confidence = 0.0f;
+        *expected_candidate = SIZE_MAX;
+        for (i = 0; i < n_candidates; i++)
+        {
+            if (uchardet_get_language(handle, i) == NULL)
+                /* Skip this case for now. */
+                continue;
+
+            if (strcasecmp(expected_charset, uchardet_get_encoding(handle, i)) == 0 &&
+                strcasecmp(expected_lang, uchardet_get_language(handle, i)) == 0)
+            {
+                *expected_confidence = uchardet_get_confidence(handle, i);
+                *expected_candidate = i;
+                break;
+            }
+        }
+    }
+    else
+    {
+        *expected_confidence = *confidence;
+        *expected_candidate = 0;
+    }
+
     uchardet_delete(handle);
 }
 
@@ -98,6 +132,9 @@ main(int argc, char ** argv)
     char *expected_lang = NULL;
     char *charset;
     char *lang;
+    float expected_confidence = 0.0f;
+    float confidence = 0.0f;
+    size_t expected_candidate = 0;
     /* In a unit test, 0 means success, other returned values mean failure. */
     int   success = 1;
 
@@ -133,7 +170,7 @@ main(int argc, char ** argv)
     assert(expected_lang);
     expected_lang++;
 
-    detect(f, &charset, &lang);
+    detect(f, expected_charset, expected_lang, &expected_confidence, &expected_candidate, &charset, &lang, &confidence);
     fclose (f);
 
     /* No lang detection is a failure, except for a few charset for
@@ -145,19 +182,35 @@ main(int argc, char ** argv)
         strcmp(expected_charset, "utf-16") == 0 ||
         strcmp(expected_charset, "utf-32") == 0)
     {
-      success = (strcmp(charset, expected_charset) != 0);
+        success = (strcmp(charset, expected_charset) != 0);
     }
     else if (lang)
     {
-      success = (strcmp(charset, expected_charset) != 0) +
-                (strcmp(lang, expected_lang) != 0);
+        success = (strcmp(charset, expected_charset) != 0) +
+                  (strcmp(lang, expected_lang) != 0);
     }
 
-    if (success == 1)
+    if (success != 0)
     {
+        char *candidate_str = NULL;
+
+        if (expected_confidence != 0.0f)
+        {
+            size_t candidate_len;
+
+            candidate_len = snprintf(candidate_str, 0, "%d", expected_candidate + 1);
+            candidate_str = malloc(candidate_len + 1);
+            snprintf(candidate_str, candidate_len + 1, "%d", expected_candidate + 1);
+        }
+
         fprintf(stderr,
-                "uchardet-tests FAILED: found %s/%s - expecting %s/%s\n",
-                lang, charset, expected_lang, expected_charset);
+                "uchardet-tests FAILED: found %s/%s (confidence: %f) - expecting %s/%s (confidence: %f - candidate: %s)\n",
+                lang ? lang : "n/a", charset, confidence,
+                expected_lang, expected_charset, expected_confidence,
+                candidate_str == NULL ? "n/a" : candidate_str);
+
+        if (candidate_str != NULL)
+            free(candidate_str);
     }
 
     free(path);
author	Jehan <jehan@girinstud.io>	2021-11-09 15:05:38 +0100
committer	Jehan <jehan@girinstud.io>	2022-12-14 00:24:53 +0100
commit	a3ff09bece9ee0e787d1964bda06c9ea341a8982 (patch)
tree	908f07cb8e7b6910bbfe0c75fa364edcc54e57c2
parent	c9446e540dda64cfdb9cb380f5cfaf6fa4378e84 (diff)