summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJehan <jehan@girinstud.io>2021-03-17 13:23:33 +0100
committerJehan <jehan@girinstud.io>2021-03-17 13:29:13 +0100
commit49ed0e6f451c60a51e920a254e47a1bb38ed595d (patch)
tree1146ce1c8c1af036118a443a2847a1f53df7b096
parent41fc0f235ba2d05e148e33c8c8fe37a74e1482b3 (diff)
src: allow for nsCharSetProber to return several candidates.
No functional change yet because all probers still return 1 candidate. Yet now we add a GetCandidates() method to return a number of candidates. GetCharSetName(), GetLanguage() and GetConfidence() now take a parameter which is the candidate index (which must be below the return value of GetCandidates()). We can now consider that nsCharSetProber computes a couple (charset, language) and that the confidence is for this specific couple, not just the confidence for charset detection.
-rw-r--r--src/nsBig5Prober.cpp4
-rw-r--r--src/nsBig5Prober.h7
-rw-r--r--src/nsCharSetProber.h7
-rw-r--r--src/nsEUCJPProber.cpp4
-rw-r--r--src/nsEUCJPProber.h7
-rw-r--r--src/nsEUCKRProber.cpp4
-rw-r--r--src/nsEUCKRProber.h7
-rw-r--r--src/nsEUCTWProber.cpp4
-rw-r--r--src/nsEUCTWProber.h7
-rw-r--r--src/nsEscCharsetProber.h7
-rw-r--r--src/nsGB2312Prober.cpp4
-rw-r--r--src/nsGB2312Prober.h7
-rw-r--r--src/nsHebrewProber.cpp4
-rw-r--r--src/nsHebrewProber.h7
-rw-r--r--src/nsLatin1Prober.cpp4
-rw-r--r--src/nsLatin1Prober.h7
-rw-r--r--src/nsMBCSGroupProber.cpp20
-rw-r--r--src/nsMBCSGroupProber.h7
-rw-r--r--src/nsSBCSGroupProber.cpp22
-rw-r--r--src/nsSBCSGroupProber.h7
-rw-r--r--src/nsSBCharSetProber.cpp14
-rw-r--r--src/nsSBCharSetProber.h7
-rw-r--r--src/nsSJISProber.cpp4
-rw-r--r--src/nsSJISProber.h7
-rw-r--r--src/nsUTF8Prober.cpp4
-rw-r--r--src/nsUTF8Prober.h7
-rw-r--r--src/nsUniversalDetector.cpp16
27 files changed, 110 insertions, 96 deletions
diff --git a/src/nsBig5Prober.cpp b/src/nsBig5Prober.cpp
index 46aea0f..0a9b585 100644
--- a/src/nsBig5Prober.cpp
+++ b/src/nsBig5Prober.cpp
@@ -75,13 +75,13 @@ nsProbingState nsBig5Prober::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
-float nsBig5Prober::GetConfidence(void)
+float nsBig5Prober::GetConfidence(int candidate)
{
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsBig5Prober.h b/src/nsBig5Prober.h
index 61726af..a9bc918 100644
--- a/src/nsBig5Prober.h
+++ b/src/nsBig5Prober.h
@@ -52,11 +52,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "BIG5";}
- const char* GetLanguage() {return "zh";}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "BIG5";}
+ const char* GetLanguage(int) {return "zh";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int);
void SetOpion() {}
protected:
diff --git a/src/nsCharSetProber.h b/src/nsCharSetProber.h
index 1aa7dbc..6228604 100644
--- a/src/nsCharSetProber.h
+++ b/src/nsCharSetProber.h
@@ -53,15 +53,16 @@ typedef enum {
class nsCharSetProber {
public:
virtual ~nsCharSetProber() {}
- virtual const char* GetCharSetName() = 0;
- virtual const char* GetLanguage() = 0;
+ virtual int GetCandidates() = 0;
+ virtual const char* GetCharSetName(int candidate) = 0;
+ virtual const char* GetLanguage(int candidate) = 0;
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx) = 0;
virtual bool DecodeToUnicode() {return false;}
virtual nsProbingState GetState(void) = 0;
virtual void Reset(void) = 0;
- virtual float GetConfidence(void) = 0;
+ virtual float GetConfidence(int candidate) = 0;
virtual void SetOpion() = 0;
#ifdef DEBUG_chardet
diff --git a/src/nsEUCJPProber.cpp b/src/nsEUCJPProber.cpp
index 12c82a1..1c54029 100644
--- a/src/nsEUCJPProber.cpp
+++ b/src/nsEUCJPProber.cpp
@@ -85,13 +85,13 @@ nsProbingState nsEUCJPProber::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mContextAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
-float nsEUCJPProber::GetConfidence(void)
+float nsEUCJPProber::GetConfidence(int candidate)
{
float contxtCf = mContextAnalyser.GetConfidence();
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsEUCJPProber.h b/src/nsEUCJPProber.h
index 16fe558..5468123 100644
--- a/src/nsEUCJPProber.h
+++ b/src/nsEUCJPProber.h
@@ -58,11 +58,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "EUC-JP";}
- const char* GetLanguage() {return "ja";}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "EUC-JP";}
+ const char* GetLanguage(int) {return "ja";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int);
void SetOpion() {}
protected:
diff --git a/src/nsEUCKRProber.cpp b/src/nsEUCKRProber.cpp
index eff70ef..f9f1110 100644
--- a/src/nsEUCKRProber.cpp
+++ b/src/nsEUCKRProber.cpp
@@ -76,7 +76,7 @@ nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
// else
// mDistributionAnalyser.HandleData(aBuf, aLen);
@@ -84,7 +84,7 @@ nsProbingState nsEUCKRProber::HandleData(const char* aBuf, PRUint32 aLen,
return mState;
}
-float nsEUCKRProber::GetConfidence(void)
+float nsEUCKRProber::GetConfidence(int candidate)
{
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsEUCKRProber.h b/src/nsEUCKRProber.h
index d41234f..7f61b53 100644
--- a/src/nsEUCKRProber.h
+++ b/src/nsEUCKRProber.h
@@ -53,16 +53,17 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
+ virtual int GetCandidates() { return 1; }
/* "Unified Hangul Code", also called "CP949" or "Windows-949" is a
* superset of EUC-KR. Though not fully ok to return UHC here (a
* separate prober would be better), it is acceptable, since many
* Korean documents are actually created with this character set.
*/
- const char* GetCharSetName() {return "UHC";}
- const char* GetLanguage() {return "ko";}
+ const char* GetCharSetName(int) {return "UHC";}
+ const char* GetLanguage(int) {return "ko";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int);
void SetOpion() {}
protected:
diff --git a/src/nsEUCTWProber.cpp b/src/nsEUCTWProber.cpp
index a11b81a..c7f6f05 100644
--- a/src/nsEUCTWProber.cpp
+++ b/src/nsEUCTWProber.cpp
@@ -76,7 +76,7 @@ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
// else
// mDistributionAnalyser.HandleData(aBuf, aLen);
@@ -84,7 +84,7 @@ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen,
return mState;
}
-float nsEUCTWProber::GetConfidence(void)
+float nsEUCTWProber::GetConfidence(int candidate)
{
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsEUCTWProber.h b/src/nsEUCTWProber.h
index 7e7faf3..012169b 100644
--- a/src/nsEUCTWProber.h
+++ b/src/nsEUCTWProber.h
@@ -52,11 +52,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "EUC-TW";}
- const char* GetLanguage() {return "zh";}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "EUC-TW";}
+ const char* GetLanguage(int) {return "zh";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int);
void SetOpion() {}
protected:
diff --git a/src/nsEscCharsetProber.h b/src/nsEscCharsetProber.h
index 116153e..e3167da 100644
--- a/src/nsEscCharsetProber.h
+++ b/src/nsEscCharsetProber.h
@@ -52,11 +52,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return mDetectedCharset;}
- const char* GetLanguage() {return NULL;}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return mDetectedCharset;}
+ const char* GetLanguage(int) {return NULL;}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void){return (float)0.99;}
+ float GetConfidence(int){return (float)0.99;}
void SetOpion() {}
protected:
diff --git a/src/nsGB2312Prober.cpp b/src/nsGB2312Prober.cpp
index 193358d..4383c97 100644
--- a/src/nsGB2312Prober.cpp
+++ b/src/nsGB2312Prober.cpp
@@ -81,7 +81,7 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
// else
// mDistributionAnalyser.HandleData(aBuf, aLen);
@@ -89,7 +89,7 @@ nsProbingState nsGB18030Prober::HandleData(const char* aBuf, PRUint32 aLen,
return mState;
}
-float nsGB18030Prober::GetConfidence(void)
+float nsGB18030Prober::GetConfidence(int candidate)
{
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsGB2312Prober.h b/src/nsGB2312Prober.h
index 5290bd1..2566fb5 100644
--- a/src/nsGB2312Prober.h
+++ b/src/nsGB2312Prober.h
@@ -54,11 +54,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "GB18030";}
- const char* GetLanguage() {return "zh";}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "GB18030";}
+ const char* GetLanguage(int) {return "zh";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int candidate);
void SetOpion() {}
protected:
diff --git a/src/nsHebrewProber.cpp b/src/nsHebrewProber.cpp
index ac5a9af..e4d2a9f 100644
--- a/src/nsHebrewProber.cpp
+++ b/src/nsHebrewProber.cpp
@@ -144,7 +144,7 @@ nsProbingState nsHebrewProber::HandleData(const char* aBuf, PRUint32 aLen,
}
// Make the decision: is it Logical or Visual?
-const char* nsHebrewProber::GetCharSetName()
+const char* nsHebrewProber::GetCharSetName(int candidate)
{
// If the final letter score distance is dominant enough, rely on it.
PRInt32 finalsub = mFinalCharLogicalScore - mFinalCharVisualScore;
@@ -154,7 +154,7 @@ const char* nsHebrewProber::GetCharSetName()
return VISUAL_HEBREW_NAME;
// It's not dominant enough, try to rely on the model scores instead.
- float modelsub = mLogicalProb->GetConfidence() - mVisualProb->GetConfidence();
+ float modelsub = mLogicalProb->GetConfidence(0) - mVisualProb->GetConfidence(0);
if (modelsub > MIN_MODEL_DISTANCE)
return LOGICAL_HEBREW_NAME;
if (modelsub < -(MIN_MODEL_DISTANCE))
diff --git a/src/nsHebrewProber.h b/src/nsHebrewProber.h
index 421a7aa..0b30d29 100644
--- a/src/nsHebrewProber.h
+++ b/src/nsHebrewProber.h
@@ -51,13 +51,14 @@ public:
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- virtual const char *GetCharSetName();
- virtual const char *GetLanguage(void) { return "he"; }
+ virtual int GetCandidates() { return 1; }
+ virtual const char *GetCharSetName(int candidate);
+ virtual const char *GetLanguage(int) { return "he"; }
virtual void Reset(void);
virtual nsProbingState GetState(void);
- virtual float GetConfidence(void) { return (float)0.0; }
+ virtual float GetConfidence(int) { return (float)0.0; }
virtual void SetOpion() {}
void SetModelProbers(nsCharSetProber *logicalPrb, nsCharSetProber *visualPrb)
diff --git a/src/nsLatin1Prober.cpp b/src/nsLatin1Prober.cpp
index cffb391..9b33d20 100644
--- a/src/nsLatin1Prober.cpp
+++ b/src/nsLatin1Prober.cpp
@@ -146,7 +146,7 @@ nsProbingState nsLatin1Prober::HandleData(const char* aBuf, PRUint32 aLen,
return mState;
}
-float nsLatin1Prober::GetConfidence(void)
+float nsLatin1Prober::GetConfidence(int candidate)
{
if (mState == eNotMe)
return 0.01f;
@@ -177,7 +177,7 @@ float nsLatin1Prober::GetConfidence(void)
#ifdef DEBUG_chardet
void nsLatin1Prober::DumpStatus()
{
- printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
+ printf(" Latin1Prober: %1.3f [%s]\r\n", GetConfidence(0), GetCharSetName());
}
#endif
diff --git a/src/nsLatin1Prober.h b/src/nsLatin1Prober.h
index 77ff331..e9615b9 100644
--- a/src/nsLatin1Prober.h
+++ b/src/nsLatin1Prober.h
@@ -52,11 +52,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "WINDOWS-1252";}
- const char* GetLanguage() {return NULL;}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "WINDOWS-1252";}
+ const char* GetLanguage(int) {return NULL;}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int candidate);
void SetOpion() {}
#ifdef DEBUG_chardet
diff --git a/src/nsMBCSGroupProber.cpp b/src/nsMBCSGroupProber.cpp
index ea2f88f..3b21530 100644
--- a/src/nsMBCSGroupProber.cpp
+++ b/src/nsMBCSGroupProber.cpp
@@ -138,18 +138,18 @@ nsMBCSGroupProber::~nsMBCSGroupProber()
}
}
-const char* nsMBCSGroupProber::GetCharSetName()
+const char* nsMBCSGroupProber::GetCharSetName(int candidate)
{
if (mBestGuess == -1)
{
- GetConfidence();
+ GetConfidence(0);
if (mBestGuess == -1)
mBestGuess = 0;
}
- return mProbers[mBestGuess]->GetCharSetName();
+ return mProbers[mBestGuess]->GetCharSetName(0);
}
-const char* nsMBCSGroupProber::GetLanguage(void)
+const char* nsMBCSGroupProber::GetLanguage(int candidate)
{
const char* maxLang = NULL;
int maxLangIdx = -1;
@@ -158,7 +158,7 @@ const char* nsMBCSGroupProber::GetLanguage(void)
if (mBestGuess == -1)
return NULL;
else
- maxLang = mProbers[mBestGuess]->GetLanguage();
+ maxLang = mProbers[mBestGuess]->GetLanguage(0);
if (maxLang == NULL && mProbers[mBestGuess]->DecodeToUnicode())
{
@@ -299,7 +299,7 @@ nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, PRUint32 aLen,
return mState;
}
-float nsMBCSGroupProber::GetConfidence(void)
+float nsMBCSGroupProber::GetConfidence(int candidate)
{
PRUint32 i;
float bestConf = 0.0, cf;
@@ -316,7 +316,7 @@ float nsMBCSGroupProber::GetConfidence(void)
if (!mIsActive[i])
continue;
- cf = mProbers[i]->GetConfidence();
+ cf = mProbers[i]->GetConfidence(0);
if (mProbers[i]->DecodeToUnicode())
{
@@ -346,14 +346,14 @@ void nsMBCSGroupProber::DumpStatus()
PRUint32 i;
float cf;
- GetConfidence();
+ GetConfidence(0);
for (i = 0; i < NUM_OF_PROBERS; i++)
{
if (!mIsActive[i])
printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]);
else
{
- cf = mProbers[i]->GetConfidence();
+ cf = mProbers[i]->GetConfidence(0);
printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]);
}
}
@@ -366,7 +366,7 @@ void nsMBCSGroupProber::GetDetectorState(nsUniversalDetector::DetectorState (&st
for (PRUint32 i = 0; i < NUM_OF_PROBERS; ++i) {
states[offset].name = ProberName[i];
states[offset].isActive = mIsActive[i];
- states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence() : 0.0;
+ states[offset].confidence = mIsActive[i] ? mProbers[i]->GetConfidence(0) : 0.0;
++offset;
}
}
diff --git a/src/nsMBCSGroupProber.h b/src/nsMBCSGroupProber.h
index 2ed028e..2da8d79 100644
--- a/src/nsMBCSGroupProber.h
+++ b/src/nsMBCSGroupProber.h
@@ -57,11 +57,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName();
- const char* GetLanguage();
+ int GetCandidates() { return 1; }
+ const char* GetCharSetName(int candidate);
+ const char* GetLanguage(int candidate);
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int candidate);
void SetOpion() {}
#ifdef DEBUG_chardet
diff --git a/src/nsSBCSGroupProber.cpp b/src/nsSBCSGroupProber.cpp
index 8809576..a41768b 100644
--- a/src/nsSBCSGroupProber.cpp
+++ b/src/nsSBCSGroupProber.cpp
@@ -203,29 +203,29 @@ nsSBCSGroupProber::~nsSBCSGroupProber()
}
-const char* nsSBCSGroupProber::GetCharSetName()
+const char* nsSBCSGroupProber::GetCharSetName(int candidate)
{
//if we have no answer yet
if (mBestGuess == -1)
{
- GetConfidence();
+ GetConfidence(0);
//no charset seems positive
if (mBestGuess == -1)
//we will use default.
mBestGuess = 0;
}
- return mProbers[mBestGuess]->GetCharSetName();
+ return mProbers[mBestGuess]->GetCharSetName(0);
}
-const char* nsSBCSGroupProber::GetLanguage()
+const char* nsSBCSGroupProber::GetLanguage(int candidate)
{
if (mBestGuess == -1)
{
- GetConfidence();
+ GetConfidence(0);
if (mBestGuess == -1)
mBestGuess = 0;
}
- return mProbers[mBestGuess]->GetLanguage();
+ return mProbers[mBestGuess]->GetLanguage(0);
}
void nsSBCSGroupProber::Reset(void)
@@ -297,7 +297,7 @@ done:
return mState;
}
-float nsSBCSGroupProber::GetConfidence(void)
+float nsSBCSGroupProber::GetConfidence(int candidate)
{
PRUint32 i;
float bestConf = 0.0, cf;
@@ -313,7 +313,7 @@ float nsSBCSGroupProber::GetConfidence(void)
{
if (!mIsActive[i])
continue;
- cf = mProbers[i]->GetConfidence();
+ cf = mProbers[i]->GetConfidence(0);
if (bestConf < cf)
{
bestConf = cf;
@@ -330,16 +330,16 @@ void nsSBCSGroupProber::DumpStatus()
PRUint32 i;
float cf;
- cf = GetConfidence();
+ cf = GetConfidence(0);
printf(" SBCS Group Prober --------begin status \r\n");
for (i = 0; i < NUM_OF_SBCS_PROBERS; i++)
{
if (!mIsActive[i])
- printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName());
+ printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName(0));
else
mProbers[i]->DumpStatus();
}
printf(" SBCS Group found best match [%s] confidence %f.\r\n",
- mProbers[mBestGuess]->GetCharSetName(), cf);
+ mProbers[mBestGuess]->GetCharSetName(0), cf);
}
#endif
diff --git a/src/nsSBCSGroupProber.h b/src/nsSBCSGroupProber.h
index 51a8930..56b15ec 100644
--- a/src/nsSBCSGroupProber.h
+++ b/src/nsSBCSGroupProber.h
@@ -50,11 +50,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName();
- const char* GetLanguage();
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int);
+ const char* GetLanguage(int);
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int);
void SetOpion() {}
#ifdef DEBUG_chardet
diff --git a/src/nsSBCharSetProber.cpp b/src/nsSBCharSetProber.cpp
index 71f6be3..fe6fba1 100644
--- a/src/nsSBCharSetProber.cpp
+++ b/src/nsSBCharSetProber.cpp
@@ -82,7 +82,7 @@ nsProbingState nsSingleByteCharSetProber::HandleData(const char* aBuf, PRUint32
if (mState == eDetecting)
if (mTotalSeqs > SB_ENOUGH_REL_THRESHOLD)
{
- float cf = GetConfidence();
+ float cf = GetConfidence(0);
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
mState = eFoundIt;
else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
@@ -106,7 +106,7 @@ void nsSingleByteCharSetProber::Reset(void)
//#define NEGATIVE_APPROACH 1
-float nsSingleByteCharSetProber::GetConfidence(void)
+float nsSingleByteCharSetProber::GetConfidence(int candidate)
{
#ifdef NEGATIVE_APPROACH
if (mTotalSeqs > 0)
@@ -140,23 +140,23 @@ float nsSingleByteCharSetProber::GetConfidence(void)
#endif
}
-const char* nsSingleByteCharSetProber::GetCharSetName()
+const char* nsSingleByteCharSetProber::GetCharSetName(int candidate)
{
if (!mNameProber)
return mModel->charsetName;
- return mNameProber->GetCharSetName();
+ return mNameProber->GetCharSetName(0);
}
-const char* nsSingleByteCharSetProber::GetLanguage()
+const char* nsSingleByteCharSetProber::GetLanguage(int candidate)
{
if (!mNameProber)
return mModel->langName;
- return mNameProber->GetLanguage();
+ return mNameProber->GetLanguage(0);
}
#ifdef DEBUG_chardet
void nsSingleByteCharSetProber::DumpStatus()
{
- printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(), GetCharSetName());
+ printf(" SBCS: %1.3f [%s]\r\n", GetConfidence(0), GetCharSetName(0));
}
#endif
diff --git a/src/nsSBCharSetProber.h b/src/nsSBCharSetProber.h
index 735e3aa..05b8193 100644
--- a/src/nsSBCharSetProber.h
+++ b/src/nsSBCharSetProber.h
@@ -86,14 +86,15 @@ public:
nsSingleByteCharSetProber(const SequenceModel *model, PRBool reversed, nsCharSetProber* nameProber)
:mModel(model), mReversed(reversed), mNameProber(nameProber) { Reset(); }
- virtual const char* GetCharSetName();
- virtual const char* GetLanguage();
+ virtual int GetCandidates() { return 1; }
+ virtual const char* GetCharSetName(int candidate);
+ virtual const char* GetLanguage(int candidate);
virtual nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
virtual nsProbingState GetState(void) {return mState;}
virtual void Reset(void);
- virtual float GetConfidence(void);
+ virtual float GetConfidence(int candidate);
virtual void SetOpion() {}
// This feature is not implemented yet. any current language model
diff --git a/src/nsSJISProber.cpp b/src/nsSJISProber.cpp
index 82b771e..5a82fe0 100644
--- a/src/nsSJISProber.cpp
+++ b/src/nsSJISProber.cpp
@@ -84,13 +84,13 @@ nsProbingState nsSJISProber::HandleData(const char* aBuf, PRUint32 aLen,
mLastChar[0] = aBuf[aLen-1];
if (mState == eDetecting)
- if (mContextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
+ if (mContextAnalyser.GotEnoughData() && GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
-float nsSJISProber::GetConfidence(void)
+float nsSJISProber::GetConfidence(int candidate)
{
float contxtCf = mContextAnalyser.GetConfidence();
float distribCf = mDistributionAnalyser.GetConfidence();
diff --git a/src/nsSJISProber.h b/src/nsSJISProber.h
index d211412..1d561a4 100644
--- a/src/nsSJISProber.h
+++ b/src/nsSJISProber.h
@@ -59,11 +59,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "SHIFT_JIS";}
- const char* GetLanguage() {return "ja";}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "SHIFT_JIS";}
+ const char* GetLanguage(int) {return "ja";}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int candidate);
void SetOpion() {}
protected:
diff --git a/src/nsUTF8Prober.cpp b/src/nsUTF8Prober.cpp
index f6f057c..744c66d 100644
--- a/src/nsUTF8Prober.cpp
+++ b/src/nsUTF8Prober.cpp
@@ -88,14 +88,14 @@ nsProbingState nsUTF8Prober::HandleData(const char* aBuf, PRUint32 aLen,
}
if (mState == eDetecting)
- if (GetConfidence() > SHORTCUT_THRESHOLD)
+ if (GetConfidence(0) > SHORTCUT_THRESHOLD)
mState = eFoundIt;
return mState;
}
#define ONE_CHAR_PROB (float)0.50
-float nsUTF8Prober::GetConfidence(void)
+float nsUTF8Prober::GetConfidence(int candidate)
{
float unlike = (float)0.99;
diff --git a/src/nsUTF8Prober.h b/src/nsUTF8Prober.h
index 180559a..311edfa 100644
--- a/src/nsUTF8Prober.h
+++ b/src/nsUTF8Prober.h
@@ -52,11 +52,12 @@ public:
nsProbingState HandleData(const char* aBuf, PRUint32 aLen,
int** codePointBuffer,
int* codePointBufferIdx);
- const char* GetCharSetName() {return "UTF-8";}
- const char* GetLanguage() {return NULL;}
+ virtual int GetCandidates() { return 1; }
+ const char* GetCharSetName(int) {return "UTF-8";}
+ const char* GetLanguage(int) {return NULL;}
nsProbingState GetState(void) {return mState;}
void Reset(void);
- float GetConfidence(void);
+ float GetConfidence(int candidate);
void SetOpion() {}
virtual bool DecodeToUnicode() {return true;}
diff --git a/src/nsUniversalDetector.cpp b/src/nsUniversalDetector.cpp
index 354b253..184a114 100644
--- a/src/nsUniversalDetector.cpp
+++ b/src/nsUniversalDetector.cpp
@@ -253,8 +253,8 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
if (st == eFoundIt)
{
mDone = PR_TRUE;
- mDetectedCharset = mEscCharSetProber->GetCharSetName();
- mDetectedConfidence = mEscCharSetProber->GetConfidence();
+ mDetectedCharset = mEscCharSetProber->GetCharSetName(0);
+ mDetectedConfidence = mEscCharSetProber->GetConfidence(0);
}
break;
case eHighbyte:
@@ -266,9 +266,9 @@ nsresult nsUniversalDetector::HandleData(const char* aBuf, PRUint32 aLen)
if (st == eFoundIt)
{
mDone = PR_TRUE;
- mDetectedCharset = mCharSetProbers[i]->GetCharSetName();
- mDetectedLanguage = mCharSetProbers[i]->GetLanguage();
- mDetectedConfidence = mCharSetProbers[i]->GetConfidence();
+ mDetectedCharset = mCharSetProbers[i]->GetCharSetName(0);
+ mDetectedLanguage = mCharSetProbers[i]->GetLanguage(0);
+ mDetectedConfidence = mCharSetProbers[i]->GetConfidence(0);
return NS_OK;
}
}
@@ -335,12 +335,12 @@ void nsUniversalDetector::DataEnd()
{
if (mCharSetProbers[i])
{
- proberConfidence = mCharSetProbers[i]->GetConfidence();
+ proberConfidence = mCharSetProbers[i]->GetConfidence(0);
if (proberConfidence > MINIMUM_THRESHOLD)
/* Only report what we are confident in. */
- Report(mCharSetProbers[i]->GetCharSetName(),
- mCharSetProbers[i]->GetLanguage(),
+ Report(mCharSetProbers[i]->GetCharSetName(0),
+ mCharSetProbers[i]->GetLanguage(0),
proberConfidence);
}
}