diff options
author | Jehan <jehan@girinstud.io> | 2021-03-17 13:23:33 +0100 |
---|---|---|
committer | Jehan <jehan@girinstud.io> | 2021-03-17 13:29:13 +0100 |
commit | 49ed0e6f451c60a51e920a254e47a1bb38ed595d (patch) | |
tree | 1146ce1c8c1af036118a443a2847a1f53df7b096 | |
parent | 41fc0f235ba2d05e148e33c8c8fe37a74e1482b3 (diff) |
src: allow for nsCharSetProber to return several candidates.
No functional change yet because all probers still return 1 candidate.
Yet now we add a GetCandidates() method to return a number of
candidates.
GetCharSetName(), GetLanguage() and GetConfidence() now take a parameter
which is the candidate index (which must be below the return value of
GetCandidates()). We can now consider that nsCharSetProber computes a
couple (charset, language) and that the confidence is for this specific
couple, not just the confidence for charset detection.
-rw-r--r-- | src/nsBig5Prober.cpp | 4 | ||||
-rw-r--r-- | src/nsBig5Prober.h | 7 | ||||
-rw-r--r-- | src/nsCharSetProber.h | 7 | ||||
-rw-r--r-- | src/nsEUCJPProber.cpp | 4 | ||||
-rw-r--r-- | src/nsEUCJPProber.h | 7 | ||||
-rw-r--r-- | src/nsEUCKRProber.cpp | 4 | ||||
-rw-r--r-- | src/nsEUCKRProber.h | 7 | ||||
-rw-r--r-- | src/nsEUCTWProber.cpp | 4 | ||||
-rw-r--r-- | src/nsEUCTWProber.h | 7 | ||||
-rw-r--r-- | src/nsEscCharsetProber.h | 7 | ||||
-rw-r--r-- | src/nsGB2312Prober.cpp | 4 | ||||
-rw-r--r-- | src/nsGB2312Prober.h | 7 | ||||
-rw-r--r-- | src/nsHebrewProber.cpp | 4 | ||||
-rw-r--r-- | src/nsHebrewProber.h | 7 | ||||
-rw-r--r-- | src/nsLatin1Prober.cpp | 4 | ||||
-rw-r--r-- | src/nsLatin1Prober.h | 7 | ||||
-rw-r--r-- | src/nsMBCSGroupProber.cpp | 20 | ||||
-rw-r--r-- | src/nsMBCSGroupProber.h | 7 | ||||
-rw-r--r-- | src/nsSBCSGroupProber.cpp | 22 | ||||
-rw-r--r-- | src/nsSBCSGroupProber.h | 7 | ||||
-rw-r--r-- | src/nsSBCharSetProber.cpp | 14 | ||||
-rw-r--r-- | src/nsSBCharSetProber.h | 7 | ||||
-rw-r--r-- | src/nsSJISProber.cpp | 4 | ||||
-rw-r--r-- | src/nsSJISProber.h | 7 | ||||
-rw-r--r-- | src/nsUTF8Prober.cpp | 4 | ||||
-rw-r--r-- | src/nsUTF8Prober.h | 7 | ||||
-rw-r--r-- | src/nsUniversalDetector.cpp | 16 |
27 files changed, 110 insertions, 96 deletions
diff --git a/src/nsBig5Prober.cpp b/src/nsBig5Prober.cpp index 46aea0f..0a9b585 100644 --- a/src/nsBig5Prober.cpp +++ b/src/nsBig5Prober.cpp @@ -75,13 +75,13 @@ nsProbingState nsBig5Prober::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; return mState; } -float nsBig5Prober::GetConfidence(void) +float nsBig5Prober::GetConfidence(int candidate) { float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsBig5Prober.h b/src/nsBig5Prober.h index 61726af..a9bc918 100644 --- a/src/nsBig5Prober.h +++ b/src/nsBig5Prober.h @@ -52,11 +52,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "BIG5";} - const char* GetLanguage() {return "zh";} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "BIG5";} + const char* GetLanguage(int) {return "zh";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int); void SetOpion() {} protected: diff --git a/src/nsCharSetProber.h b/src/nsCharSetProber.h index 1aa7dbc..6228604 100644 --- a/src/nsCharSetProber.h +++ b/src/nsCharSetProber.h @@ -53,15 +53,16 @@ typedef enum { class nsCharSetProber { public: virtual ~nsCharSetProber() {} - virtual const char* GetCharSetName() = 0; - virtual const char* GetLanguage() = 0; + virtual int GetCandidates() = 0; + virtual const char* GetCharSetName(int candidate) = 0; + virtual const char* GetLanguage(int candidate) = 0; virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx) = 0; virtual bool DecodeToUnicode() {return false;} virtual nsProbingState GetState(void) = 0; virtual void Reset(void) = 0; - virtual float GetConfidence(void) = 0; + virtual float GetConfidence(int candidate) = 0; virtual void SetOpion() = 0; #ifdef DEBUG_chardet diff --git a/src/nsEUCJPProber.cpp b/src/nsEUCJPProber.cpp index 12c82a1..1c54029 100644 --- a/src/nsEUCJPProber.cpp +++ b/src/nsEUCJPProber.cpp @@ -85,13 +85,13 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mContextAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; return mState; } -float nsEUCJPProber::GetConfidence(void) +float nsEUCJPProber::GetConfidence(int candidate) { float contxtCf = mContextAnalyser.GetConfidence(); float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsEUCJPProber.h b/src/nsEUCJPProber.h index 16fe558..5468123 100644 --- a/src/nsEUCJPProber.h +++ b/src/nsEUCJPProber.h @@ -58,11 +58,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "EUC-JP";} - const char* GetLanguage() {return "ja";} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "EUC-JP";} + const char* GetLanguage(int) {return "ja";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int); void SetOpion() {} protected: diff --git a/src/nsEUCKRProber.cpp b/src/nsEUCKRProber.cpp index eff70ef..f9f1110 100644 --- a/src/nsEUCKRProber.cpp +++ b/src/nsEUCKRProber.cpp @@ -76,7 +76,7 @@ nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; // else // mDistributionAnalyser.HandleData(aBuf, aLen); @@ -84,7 +84,7 @@ nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen, return mState; } -float nsEUCKRProber::GetConfidence(void) +float nsEUCKRProber::GetConfidence(int candidate) { float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsEUCKRProber.h b/src/nsEUCKRProber.h index d41234f..7f61b53 100644 --- a/src/nsEUCKRProber.h +++ b/src/nsEUCKRProber.h @@ -53,16 +53,17 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); + virtual int GetCandidates() { return 1; } /* "Unified Hangul Code", also called "CP949" or "Windows-949" is a * superset of EUC-KR. Though not fully ok to return UHC here (a * separate prober would be better), it is acceptable, since many * Korean documents are actually created with this character set. */ - const char* GetCharSetName() {return "UHC";} - const char* GetLanguage() {return "ko";} + const char* GetCharSetName(int) {return "UHC";} + const char* GetLanguage(int) {return "ko";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int); void SetOpion() {} protected: diff --git a/src/nsEUCTWProber.cpp b/src/nsEUCTWProber.cpp index a11b81a..c7f6f05 100644 --- a/src/nsEUCTWProber.cpp +++ b/src/nsEUCTWProber.cpp @@ -76,7 +76,7 @@ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; // else // mDistributionAnalyser.HandleData(aBuf, aLen); @@ -84,7 +84,7 @@ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen, return mState; } -float nsEUCTWProber::GetConfidence(void) +float nsEUCTWProber::GetConfidence(int candidate) { float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsEUCTWProber.h b/src/nsEUCTWProber.h index 7e7faf3..012169b 100644 --- a/src/nsEUCTWProber.h +++ b/src/nsEUCTWProber.h @@ -52,11 +52,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "EUC-TW";} - const char* GetLanguage() {return "zh";} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "EUC-TW";} + const char* GetLanguage(int) {return "zh";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int); void SetOpion() {} protected: diff --git a/src/nsEscCharsetProber.h b/src/nsEscCharsetProber.h index 116153e..e3167da 100644 --- a/src/nsEscCharsetProber.h +++ b/src/nsEscCharsetProber.h @@ -52,11 +52,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return mDetectedCharset;} - const char* GetLanguage() {return NULL;} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return mDetectedCharset;} + const char* GetLanguage(int) {return NULL;} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void){return (float)0.99;} + float GetConfidence(int){return (float)0.99;} void SetOpion() {} protected: diff --git a/src/nsGB2312Prober.cpp b/src/nsGB2312Prober.cpp index 193358d..4383c97 100644 --- a/src/nsGB2312Prober.cpp +++ b/src/nsGB2312Prober.cpp @@ -81,7 +81,7 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; // else // mDistributionAnalyser.HandleData(aBuf, aLen); @@ -89,7 +89,7 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen, return mState; } -float nsGB18030Prober::GetConfidence(void) +float nsGB18030Prober::GetConfidence(int candidate) { float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsGB2312Prober.h b/src/nsGB2312Prober.h index 5290bd1..2566fb5 100644 --- a/src/nsGB2312Prober.h +++ b/src/nsGB2312Prober.h @@ -54,11 +54,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "GB18030";} - const char* GetLanguage() {return "zh";} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "GB18030";} + const char* GetLanguage(int) {return "zh";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} protected: diff --git a/src/nsHebrewProber.cpp b/src/nsHebrewProber.cpp index ac5a9af..e4d2a9f 100644 --- a/src/nsHebrewProber.cpp +++ b/src/nsHebrewProber.cpp @@ -144,7 +144,7 @@ nsProbingState nsHebrewProber::HandleData(const char* aBuf, PRUint32 aLen, } // Make the decision: is it Logical or Visual? -const char* nsHebrewProber::GetCharSetName() +const char* nsHebrewProber::GetCharSetName(int candidate) { // If the final letter score distance is dominant enough, rely on it. PRInt32 finalsub = mFinalCharLogicalScore - mFinalCharVisualScore; @@ -154,7 +154,7 @@ const char* nsHebrewProber::GetCharSetName() return VISUAL_HEBREW_NAME; // It's not dominant enough, try to rely on the model scores instead. - float modelsub = mLogicalProb->GetConfidence() - mVisualProb->GetConfidence(); + float modelsub = mLogicalProb->GetConfidence(0) - mVisualProb->GetConfidence(0); if (modelsub > MIN_MODEL_DISTANCE) return LOGICAL_HEBREW_NAME; if (modelsub < -(MIN_MODEL_DISTANCE)) diff --git a/src/nsHebrewProber.h b/src/nsHebrewProber.h index 421a7aa..0b30d29 100644 --- a/src/nsHebrewProber.h +++ b/src/nsHebrewProber.h @@ -51,13 +51,14 @@ public: virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - virtual const char *GetCharSetName(); - virtual const char *GetLanguage(void) { return "he"; } + virtual int GetCandidates() { return 1; } + virtual const char *GetCharSetName(int candidate); + virtual const char *GetLanguage(int) { return "he"; } virtual void Reset(void); virtual nsProbingState GetState(void); - virtual float GetConfidence(void) { return (float)0.0; } + virtual float GetConfidence(int) { return (float)0.0; } virtual void SetOpion() {} void SetModelProbers(nsCharSetProber *logicalPrb, nsCharSetProber *visualPrb) diff --git a/src/nsLatin1Prober.cpp b/src/nsLatin1Prober.cpp index cffb391..9b33d20 100644 --- a/src/nsLatin1Prober.cpp +++ b/src/nsLatin1Prober.cpp @@ -146,7 +146,7 @@ nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen, return mState; } -float nsLatin1Prober::GetConfidence(void) +float nsLatin1Prober::GetConfidence(int candidate) { if (mState == eNotMe) return 0.01f; @@ -177,7 +177,7 @@ float nsLatin1Prober::GetConfidence(void) #ifdef DEBUG_chardet void nsLatin1Prober::DumpStatus() { - printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName()); + printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(0), GetCharSetName()); } #endif diff --git a/src/nsLatin1Prober.h b/src/nsLatin1Prober.h index 77ff331..e9615b9 100644 --- a/src/nsLatin1Prober.h +++ b/src/nsLatin1Prober.h @@ -52,11 +52,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "WINDOWS-1252";} - const char* GetLanguage() {return NULL;} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "WINDOWS-1252";} + const char* GetLanguage(int) {return NULL;} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} #ifdef DEBUG_chardet diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp index ea2f88f..3b21530 100644 --- a/src/nsMBCSGroupProber.cpp +++ b/src/nsMBCSGroupProber.cpp @@ -138,18 +138,18 @@ nsMBCSGroupProber::~nsMBCSGroupProber() } } -const char* nsMBCSGroupProber::GetCharSetName() +const char* nsMBCSGroupProber::GetCharSetName(int candidate) { if (mBestGuess == -1) { - GetConfidence(); + GetConfidence(0); if (mBestGuess == -1) mBestGuess = 0; } - return mProbers[mBestGuess]->GetCharSetName(); + return mProbers[mBestGuess]->GetCharSetName(0); } -const char* nsMBCSGroupProber::GetLanguage(void) +const char* nsMBCSGroupProber::GetLanguage(int candidate) { const char* maxLang = NULL; int maxLangIdx = -1; @@ -158,7 +158,7 @@ const char* nsMBCSGroupProber::GetLanguage(void) if (mBestGuess == -1) return NULL; else - maxLang = mProbers[mBestGuess]->GetLanguage(); + maxLang = mProbers[mBestGuess]->GetLanguage(0); if (maxLang == NULL && mProbers[mBestGuess]->DecodeToUnicode()) { @@ -299,7 +299,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen, return mState; } -float nsMBCSGroupProber::GetConfidence(void) +float nsMBCSGroupProber::GetConfidence(int candidate) { PRUint32 i; float bestConf = 0.0, cf; @@ -316,7 +316,7 @@ float nsMBCSGroupProber::GetConfidence(void) if (!mIsActive[i]) continue; - cf = mProbers[i]->GetConfidence(); + cf = mProbers[i]->GetConfidence(0); if (mProbers[i]->DecodeToUnicode()) { @@ -346,14 +346,14 @@ void nsMBCSGroupProber::DumpStatus() PRUint32 i; float cf; - GetConfidence(); + GetConfidence(0); for (i = 0; i < NUM_OF_PROBERS; i++) { if (!mIsActive[i]) printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); else { - cf = mProbers[i]->GetConfidence(); + cf = mProbers[i]->GetConfidence(0); printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); } } @@ -366,7 +366,7 @@ void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&st for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) { states[offset].name = ProberName[i]; states[offset].isActive = mIsActive[i]; - states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0; + states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence(0) : 0.0; ++offset; } } diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h index 2ed028e..2da8d79 100644 --- a/src/nsMBCSGroupProber.h +++ b/src/nsMBCSGroupProber.h @@ -57,11 +57,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName(); - const char* GetLanguage(); + int GetCandidates() { return 1; } + const char* GetCharSetName(int candidate); + const char* GetLanguage(int candidate); nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} #ifdef DEBUG_chardet diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp index 8809576..a41768b 100644 --- a/src/nsSBCSGroupProber.cpp +++ b/src/nsSBCSGroupProber.cpp @@ -203,29 +203,29 @@ nsSBCSGroupProber::~nsSBCSGroupProber() } -const char* nsSBCSGroupProber::GetCharSetName() +const char* nsSBCSGroupProber::GetCharSetName(int candidate) { //if we have no answer yet if (mBestGuess == -1) { - GetConfidence(); + GetConfidence(0); //no charset seems positive if (mBestGuess == -1) //we will use default. mBestGuess = 0; } - return mProbers[mBestGuess]->GetCharSetName(); + return mProbers[mBestGuess]->GetCharSetName(0); } -const char* nsSBCSGroupProber::GetLanguage() +const char* nsSBCSGroupProber::GetLanguage(int candidate) { if (mBestGuess == -1) { - GetConfidence(); + GetConfidence(0); if (mBestGuess == -1) mBestGuess = 0; } - return mProbers[mBestGuess]->GetLanguage(); + return mProbers[mBestGuess]->GetLanguage(0); } void nsSBCSGroupProber::Reset(void) @@ -297,7 +297,7 @@ done: return mState; } -float nsSBCSGroupProber::GetConfidence(void) +float nsSBCSGroupProber::GetConfidence(int candidate) { PRUint32 i; float bestConf = 0.0, cf; @@ -313,7 +313,7 @@ float nsSBCSGroupProber::GetConfidence(void) { if (!mIsActive[i]) continue; - cf = mProbers[i]->GetConfidence(); + cf = mProbers[i]->GetConfidence(0); if (bestConf < cf) { bestConf = cf; @@ -330,16 +330,16 @@ void nsSBCSGroupProber::DumpStatus() PRUint32 i; float cf; - cf = GetConfidence(); + cf = GetConfidence(0); printf(" SBCS Group Prober --------begin status \r\n"); for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) { if (!mIsActive[i]) - printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); + printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName(0)); else mProbers[i]->DumpStatus(); } printf(" SBCS Group found best match [%s] confidence %f.\r\n", - mProbers[mBestGuess]->GetCharSetName(), cf); + mProbers[mBestGuess]->GetCharSetName(0), cf); } #endif diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h index 51a8930..56b15ec 100644 --- a/src/nsSBCSGroupProber.h +++ b/src/nsSBCSGroupProber.h @@ -50,11 +50,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName(); - const char* GetLanguage(); + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int); + const char* GetLanguage(int); nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int); void SetOpion() {} #ifdef DEBUG_chardet diff --git a/src/nsSBCharSetProber.cpp b/src/nsSBCharSetProber.cpp index 71f6be3..fe6fba1 100644 --- a/src/nsSBCharSetProber.cpp +++ b/src/nsSBCharSetProber.cpp @@ -82,7 +82,7 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32 if (mState == eDetecting) if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD) { - float cf = GetConfidence(); + float cf = GetConfidence(0); if (cf > POSITIVE_SHORTCUT_THRESHOLD) mState = eFoundIt; else if (cf < NEGATIVE_SHORTCUT_THRESHOLD) @@ -106,7 +106,7 @@ void nsSingleByteCharSetProber::Reset(void) //#define NEGATIVE_APPROACH 1 -float nsSingleByteCharSetProber::GetConfidence(void) +float nsSingleByteCharSetProber::GetConfidence(int candidate) { #ifdef NEGATIVE_APPROACH if (mTotalSeqs > 0) @@ -140,23 +140,23 @@ float nsSingleByteCharSetProber::GetConfidence(void) #endif } -const char* nsSingleByteCharSetProber::GetCharSetName() +const char* nsSingleByteCharSetProber::GetCharSetName(int candidate) { if (!mNameProber) return mModel->charsetName; - return mNameProber->GetCharSetName(); + return mNameProber->GetCharSetName(0); } -const char* nsSingleByteCharSetProber::GetLanguage() +const char* nsSingleByteCharSetProber::GetLanguage(int candidate) { if (!mNameProber) return mModel->langName; - return mNameProber->GetLanguage(); + return mNameProber->GetLanguage(0); } #ifdef DEBUG_chardet void nsSingleByteCharSetProber::DumpStatus() { - printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName()); + printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(0), GetCharSetName(0)); } #endif diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h index 735e3aa..05b8193 100644 --- a/src/nsSBCharSetProber.h +++ b/src/nsSBCharSetProber.h @@ -86,14 +86,15 @@ public: nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber) :mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); } - virtual const char* GetCharSetName(); - virtual const char* GetLanguage(); + virtual int GetCandidates() { return 1; } + virtual const char* GetCharSetName(int candidate); + virtual const char* GetLanguage(int candidate); virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); virtual nsProbingState GetState(void) {return mState;} virtual void Reset(void); - virtual float GetConfidence(void); + virtual float GetConfidence(int candidate); virtual void SetOpion() {} // This feature is not implemented yet. any current language model diff --git a/src/nsSJISProber.cpp b/src/nsSJISProber.cpp index 82b771e..5a82fe0 100644 --- a/src/nsSJISProber.cpp +++ b/src/nsSJISProber.cpp @@ -84,13 +84,13 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen, mLastChar[0] = aBuf[aLen-1]; if (mState == eDetecting) - if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) + if (mContextAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; return mState; } -float nsSJISProber::GetConfidence(void) +float nsSJISProber::GetConfidence(int candidate) { float contxtCf = mContextAnalyser.GetConfidence(); float distribCf = mDistributionAnalyser.GetConfidence(); diff --git a/src/nsSJISProber.h b/src/nsSJISProber.h index d211412..1d561a4 100644 --- a/src/nsSJISProber.h +++ b/src/nsSJISProber.h @@ -59,11 +59,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "SHIFT_JIS";} - const char* GetLanguage() {return "ja";} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "SHIFT_JIS";} + const char* GetLanguage(int) {return "ja";} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} protected: diff --git a/src/nsUTF8Prober.cpp b/src/nsUTF8Prober.cpp index f6f057c..744c66d 100644 --- a/src/nsUTF8Prober.cpp +++ b/src/nsUTF8Prober.cpp @@ -88,14 +88,14 @@ nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen, } if (mState == eDetecting) - if (GetConfidence() > SHORTCUT_THRESHOLD) + if (GetConfidence(0) > SHORTCUT_THRESHOLD) mState = eFoundIt; return mState; } #define ONE_CHAR_PROB (float)0.50 -float nsUTF8Prober::GetConfidence(void) +float nsUTF8Prober::GetConfidence(int candidate) { float unlike = (float)0.99; diff --git a/src/nsUTF8Prober.h b/src/nsUTF8Prober.h index 180559a..311edfa 100644 --- a/src/nsUTF8Prober.h +++ b/src/nsUTF8Prober.h @@ -52,11 +52,12 @@ public: nsProbingState HandleData(const char* aBuf, PRUint32 aLen, int** codePointBuffer, int* codePointBufferIdx); - const char* GetCharSetName() {return "UTF-8";} - const char* GetLanguage() {return NULL;} + virtual int GetCandidates() { return 1; } + const char* GetCharSetName(int) {return "UTF-8";} + const char* GetLanguage(int) {return NULL;} nsProbingState GetState(void) {return mState;} void Reset(void); - float GetConfidence(void); + float GetConfidence(int candidate); void SetOpion() {} virtual bool DecodeToUnicode() {return true;} diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp index 354b253..184a114 100644 --- a/src/nsUniversalDetector.cpp +++ b/src/nsUniversalDetector.cpp @@ -253,8 +253,8 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) if (st == eFoundIt) { mDone = PR_TRUE; - mDetectedCharset = mEscCharSetProber->GetCharSetName(); - mDetectedConfidence = mEscCharSetProber->GetConfidence(); + mDetectedCharset = mEscCharSetProber->GetCharSetName(0); + mDetectedConfidence = mEscCharSetProber->GetConfidence(0); } break; case eHighbyte: @@ -266,9 +266,9 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen) if (st == eFoundIt) { mDone = PR_TRUE; - mDetectedCharset = mCharSetProbers[i]->GetCharSetName(); - mDetectedLanguage = mCharSetProbers[i]->GetLanguage(); - mDetectedConfidence = mCharSetProbers[i]->GetConfidence(); + mDetectedCharset = mCharSetProbers[i]->GetCharSetName(0); + mDetectedLanguage = mCharSetProbers[i]->GetLanguage(0); + mDetectedConfidence = mCharSetProbers[i]->GetConfidence(0); return NS_OK; } } @@ -335,12 +335,12 @@ void nsUniversalDetector::DataEnd() { if (mCharSetProbers[i]) { - proberConfidence = mCharSetProbers[i]->GetConfidence(); + proberConfidence = mCharSetProbers[i]->GetConfidence(0); if (proberConfidence > MINIMUM_THRESHOLD) /* Only report what we are confident in. */ - Report(mCharSetProbers[i]->GetCharSetName(), - mCharSetProbers[i]->GetLanguage(), + Report(mCharSetProbers[i]->GetCharSetName(0), + mCharSetProbers[i]->GetLanguage(0), proberConfidence); } } |