diff options
author | Jehan <jehan@girinstud.io> | 2020-04-23 16:40:02 +0200 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2022-12-14 00:23:13 +0100 |
commit | 8118133e0017c4e1b3ddc9fad104c0ba19692cd7 (patch) | |
tree | 9bbbaa5c86fd0ef8959ab76cfea919b3b4d8e702 | |
parent | 15fc8f0a0f55d6ec9373cf16ed96ebf6b35feef3 (diff) |
src: new API to get all candidates and their confidence.
Adding:
- uchardet_get_candidates()
- uchardet_get_encoding()
- uchardet_get_confidence()
Also deprecating uchardet_get_charset() to have developers look at the
new API instead. I was unsure if this should really get deprecated as it
makes the basic case simple, but the new API is just as easy anyway. You
can also directly call uchardet_get_encoding() with candidate 0 (same as
uchardet_get_charset(), it would then return "" when no candidate was
found).
-rw-r--r-- | src/symbols.cmake | 3 | ||||
-rw-r--r-- | src/uchardet.cpp | 33 | ||||
-rw-r--r-- | src/uchardet.h | 18 |
3 files changed, 51 insertions, 3 deletions
diff --git a/src/symbols.cmake b/src/symbols.cmake index c7b1652..a6690ff 100644 --- a/src/symbols.cmake +++ b/src/symbols.cmake @@ -6,6 +6,9 @@ set( uchardet_data_end uchardet_reset uchardet_get_charset + uchardet_get_candidates + uchardet_get_encoding + uchardet_get_confidence ) set (LINK_FLAGS "") diff --git a/src/uchardet.cpp b/src/uchardet.cpp index 08eec17..f5391ea 100644 --- a/src/uchardet.cpp +++ b/src/uchardet.cpp @@ -111,9 +111,19 @@ public: candidates.clear(); } - const char* GetCharset() const + size_t GetCandidates() const { - return (candidates.size() > 0) ? candidates[0].encoding : ""; + return candidates.size(); + } + + const char* GetCharset(size_t i) const + { + return (candidates.size() > i) ? candidates[i].encoding : ""; + } + + float GetConfidence(size_t i) const + { + return (candidates.size() > i) ? candidates[i].confidence : 0.0; } }; @@ -149,5 +159,22 @@ void uchardet_reset(uchardet_t ud) const char* uchardet_get_charset(uchardet_t ud) { - return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(); + return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(0); +} + +size_t uchardet_get_candidates (uchardet_t ud) +{ + return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCandidates(); +} + +float uchardet_get_confidence (uchardet_t ud, + size_t candidate) +{ + return reinterpret_cast<HandleUniversalDetector*>(ud)->GetConfidence(candidate); +} + +const char * uchardet_get_encoding (uchardet_t ud, + size_t candidate) +{ + return reinterpret_cast<HandleUniversalDetector*>(ud)->GetCharset(candidate); } diff --git a/src/uchardet.h b/src/uchardet.h index 271d98d..c452a69 100644 --- a/src/uchardet.h +++ b/src/uchardet.h @@ -54,6 +54,16 @@ extern "C" { #define UCHARDET_INTERFACE #endif +#if defined(__cplusplus) && (__cplusplus >= 201402L) +#define DEPRECATED(message) [[deprecated(message)]] +#elif defined(__GNUC__) || defined(__clang__) +#define DEPRECATED(message) __attribute__ ((deprecated)) +#elif defined(_MSC_VER) +#define DEPRECATED(message) __declspec(deprecated) func +#else +#warning("DEPRECATED macro not available") +#define DEPRECATED(message) +#endif /** * A handle for a uchardet encoding detector. @@ -102,8 +112,16 @@ UCHARDET_INTERFACE void uchardet_reset(uchardet_t ud); * @param ud [in] handle of an instance of uchardet * @return name of charset on success and "" on failure. */ +DEPRECATED("use uchardet_get_candidates() and uchardet_get_encoding() instead (since 0.1.0)") UCHARDET_INTERFACE const char * uchardet_get_charset(uchardet_t ud); +UCHARDET_INTERFACE size_t uchardet_get_candidates (uchardet_t ud); +UCHARDET_INTERFACE float uchardet_get_confidence (uchardet_t ud, + size_t candidate); +UCHARDET_INTERFACE const char * uchardet_get_encoding (uchardet_t ud, + size_t candidate); + + #ifdef __cplusplus } #endif |