summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2020-04-23 16:40:02 +0200
committerJehan <jehan@girinstud.io>2022-12-14 00:23:13 +0100
commit8118133e0017c4e1b3ddc9fad104c0ba19692cd7 (patch)
tree9bbbaa5c86fd0ef8959ab76cfea919b3b4d8e702
parent15fc8f0a0f55d6ec9373cf16ed96ebf6b35feef3 (diff)
src: new API to get all candidates and their confidence.
Adding: - uchardet_get_candidates() - uchardet_get_encoding() - uchardet_get_confidence() Also deprecating uchardet_get_charset() to have developers look at the new API instead. I was unsure if this should really get deprecated as it makes the basic case simple, but the new API is just as easy anyway. You can also directly call uchardet_get_encoding() with candidate 0 (same as uchardet_get_charset(), it would then return "" when no candidate was found).
-rw-r--r--src/symbols.cmake3
-rw-r--r--src/uchardet.cpp33
-rw-r--r--src/uchardet.h18
3 files changed, 51 insertions, 3 deletions
diff --git a/src/symbols.cmake b/src/symbols.cmake
index c7b1652..a6690ff 100644
--- a/src/symbols.cmake
+++ b/src/symbols.cmake
@@ -6,6 +6,9 @@ set(
uchardet_data_end
uchardet_reset
uchardet_get_charset
+ uchardet_get_candidates
+ uchardet_get_encoding
+ uchardet_get_confidence
)
set (LINK_FLAGS "")
diff --git a/src/uchardet.cpp b/src/uchardet.cpp
index 08eec17..f5391ea 100644
--- a/src/uchardet.cpp
+++ b/src/uchardet.cpp
@@ -111,9 +111,19 @@ public:
candidates.clear();
}
- const char* GetCharset() const
+ size_t GetCandidates() const
{
- return (candidates.size() > 0) ? candidates[0].encoding : "";
+ return candidates.size();
+ }
+
+ const char* GetCharset(size_t i) const
+ {
+ return (candidates.size() > i) ? candidates[i].encoding : "";
+ }
+
+ float GetConfidence(size_t i) const
+ {
+ return (candidates.size() > i) ? candidates[i].confidence : 0.0;
}
};
@@ -149,5 +159,22 @@ void uchardet_reset(uchardet_t ud)
const char* uchardet_get_charset(uchardet_t ud)
{
- return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset();
+ return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(0);
+}
+
+size_t uchardet_get_candidates (uchardet_t ud)
+{
+ return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCandidates();
+}
+
+float uchardet_get_confidence (uchardet_t ud,
+ size_t candidate)
+{
+ return reinterpret_cast<HandleUniversalDetector*>(ud)->GetConfidence(candidate);
+}
+
+const char * uchardet_get_encoding (uchardet_t ud,
+ size_t candidate)
+{
+ return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(candidate);
}
diff --git a/src/uchardet.h b/src/uchardet.h
index 271d98d..c452a69 100644
--- a/src/uchardet.h
+++ b/src/uchardet.h
@@ -54,6 +54,16 @@ extern "C" {
#define UCHARDET_INTERFACE
#endif
+#if defined(__cplusplus) && (__cplusplus >= 201402L)
+#define DEPRECATED(message) [[deprecated(message)]]
+#elif defined(__GNUC__) || defined(__clang__)
+#define DEPRECATED(message) __attribute__ ((deprecated))
+#elif defined(_MSC_VER)
+#define DEPRECATED(message) __declspec(deprecated) func
+#else
+#warning("DEPRECATED macro not available")
+#define DEPRECATED(message)
+#endif
/**
* A handle for a uchardet encoding detector.
@@ -102,8 +112,16 @@ UCHARDET_INTERFACE void uchardet_reset(uchardet_t ud);
* @param ud [in] handle of an instance of uchardet
* @return name of charset on success and "" on failure.
*/
+DEPRECATED("use uchardet_get_candidates() and uchardet_get_encoding() instead (since 0.1.0)")
UCHARDET_INTERFACE const char * uchardet_get_charset(uchardet_t ud);
+UCHARDET_INTERFACE size_t uchardet_get_candidates (uchardet_t ud);
+UCHARDET_INTERFACE float uchardet_get_confidence (uchardet_t ud,
+ size_t candidate);
+UCHARDET_INTERFACE const char * uchardet_get_encoding (uchardet_t ud,
+ size_t candidate);
+
+
#ifdef __cplusplus
}
#endif