summaryrefslogtreecommitdiff
path: root/src/hb-ot-shape-complex-khmer.cc
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@behdad.org>2018-01-05 16:01:17 +0000
committerBehdad Esfahbod <behdad@behdad.org>2018-01-05 16:02:55 +0000
commitc135324af107d3e33420cd0dca4d5bb12fd3f4a1 (patch)
treeb450e98cdecc19c35f4b66c8a5134d41a354a3a7 /src/hb-ot-shape-complex-khmer.cc
parentffcd6c7efd21bf89b41ddbf148b05672a42ecadd (diff)
[khmer] Remove indic_config->reph_pos
Diffstat (limited to 'src/hb-ot-shape-complex-khmer.cc')
-rw-r--r--src/hb-ot-shape-complex-khmer.cc333
1 files changed, 10 insertions, 323 deletions
diff --git a/src/hb-ot-shape-complex-khmer.cc b/src/hb-ot-shape-complex-khmer.cc
index 3437f1e0..586bb3f2 100644
--- a/src/hb-ot-shape-complex-khmer.cc
+++ b/src/hb-ot-shape-complex-khmer.cc
@@ -110,36 +110,6 @@ matra_position (hb_codepoint_t u, khmer_position_t side)
return side;
}
-/* XXX
- * This is a hack for now. We should move this data into the main Indic table.
- * Or completely remove it and just check in the tables.
- */
-static const hb_codepoint_t ra_chars[] = {
- 0x0930u, /* Devanagari */
- 0x09B0u, /* Bengali */
- 0x09F0u, /* Bengali */
- 0x0A30u, /* Gurmukhi */ /* No Reph */
- 0x0AB0u, /* Gujarati */
- 0x0B30u, /* Oriya */
- 0x0BB0u, /* Tamil */ /* No Reph */
- 0x0C30u, /* Telugu */ /* Reph formed only with ZWJ */
- 0x0CB0u, /* Kannada */
- 0x0D30u, /* Malayalam */ /* No Reph, Logical Repha */
-
- 0x0DBBu, /* Sinhala */ /* Reph formed only with ZWJ */
-
- 0x179Au, /* Khmer */ /* No Reph, Visual Repha */
-};
-
-static inline bool
-is_ra (hb_codepoint_t u)
-{
- for (unsigned int i = 0; i < ARRAY_LENGTH (ra_chars); i++)
- if (u == ra_chars[i])
- return true;
- return false;
-}
-
static inline bool
is_one_of (const hb_glyph_info_t &info, unsigned int flags)
{
@@ -240,7 +210,7 @@ set_khmer_properties (hb_glyph_info_t &info)
if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
{
pos = POS_BASE_C;
- if (is_ra (u))
+ if (u == 0x179Au)
cat = OT_Ra;
}
else if (cat == OT_M)
@@ -273,14 +243,6 @@ set_khmer_properties (hb_glyph_info_t &info)
* instead of adding a new flag in these structs.
*/
-enum reph_position_t {
- REPH_POS_AFTER_MAIN = POS_AFTER_MAIN,
- REPH_POS_BEFORE_SUB = POS_BEFORE_SUB,
- REPH_POS_AFTER_SUB = POS_AFTER_SUB,
- REPH_POS_BEFORE_POST = POS_BEFORE_POST,
- REPH_POS_AFTER_POST = POS_AFTER_POST,
- REPH_POS_DONT_CARE = POS_RA_TO_BECOME_REPH
-};
enum reph_mode_t {
REPH_MODE_IMPLICIT, /* Reph formed out of initial Ra,H sequence. */
REPH_MODE_EXPLICIT, /* Reph formed out of initial Ra,H,ZWJ sequence. */
@@ -293,14 +255,13 @@ enum blwf_mode_t {
};
struct indic_config_t
{
- reph_position_t reph_pos;
reph_mode_t reph_mode;
blwf_mode_t blwf_mode;
};
static const indic_config_t indic_configs[] =
{
- {REPH_POS_DONT_CARE, REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST},
+ {REPH_MODE_VIS_REPHA,BLWF_MODE_PRE_AND_POST},
};
@@ -590,119 +551,16 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
const khmer_shape_plan_t *khmer_plan = (const khmer_shape_plan_t *) plan->data;
hb_glyph_info_t *info = buffer->info;
- /* 1. Find base consonant:
- *
- * The shaping engine finds the base consonant of the syllable, using the
- * following algorithm: starting from the end of the syllable, move backwards
- * until a consonant is found that does not have a below-base or post-base
- * form (post-base forms have to follow below-base forms), or that is not a
- * pre-base-reordering Ra, or arrive at the first consonant. The consonant
- * stopped at will be the base.
- *
- * o If the syllable starts with Ra + Halant (in a script that has Reph)
- * and has more than one consonant, Ra is excluded from candidates for
- * base consonants.
- */
-
- unsigned int base = end;
- bool has_reph = false;
-
- {
- /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
- * and has more than one consonant, Ra is excluded from candidates for
- * base consonants. */
- unsigned int limit = start;
- if (khmer_plan->config->reph_pos != REPH_POS_DONT_CARE &&
- khmer_plan->mask_array[RPHF] &&
- start + 3 <= end &&
- (
- (khmer_plan->config->reph_mode == REPH_MODE_IMPLICIT && !is_joiner (info[start + 2])) ||
- (khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT && info[start + 2].khmer_category() == OT_ZWJ)
- ))
- {
- /* See if it matches the 'rphf' feature. */
- hb_codepoint_t glyphs[3] = {info[start].codepoint,
- info[start + 1].codepoint,
- khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT ?
- info[start + 2].codepoint : 0};
- if (khmer_plan->rphf.would_substitute (glyphs, 2, face) ||
- (khmer_plan->config->reph_mode == REPH_MODE_EXPLICIT &&
- khmer_plan->rphf.would_substitute (glyphs, 3, face)))
- {
- limit += 2;
- while (limit < end && is_joiner (info[limit]))
- limit++;
- base = start;
- has_reph = true;
- }
- } else if (khmer_plan->config->reph_mode == REPH_MODE_LOG_REPHA && info[start].khmer_category() == OT_Repha)
- {
- limit += 1;
- while (limit < end && is_joiner (info[limit]))
- limit++;
- base = start;
- has_reph = true;
- }
-
- /* The first consonant is always the base. */
- base = start;
-
- /* Mark all subsequent consonants as below. */
- for (unsigned int i = base + 1; i < end; i++)
- if (is_consonant (info[i]))
- info[i].khmer_position() = POS_BELOW_C;
-
- /* -> If the syllable starts with Ra + Halant (in a script that has Reph)
- * and has more than one consonant, Ra is excluded from candidates for
- * base consonants.
- *
- * Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
- if (has_reph && base == start && limit - base <= 2) {
- /* Have no other consonant, so Reph is not formed and Ra becomes base. */
- has_reph = false;
- }
- }
-
-
- /* 2. Decompose and reorder Matras:
- *
- * Each matra and any syllable modifier sign in the syllable are moved to the
- * appropriate position relative to the consonant(s) in the syllable. The
- * shaping engine decomposes two- or three-part matras into their constituent
- * parts before any repositioning. Matra characters are classified by which
- * consonant in a conjunct they have affinity for and are reordered to the
- * following positions:
- *
- * o Before first half form in the syllable
- * o After subjoined consonants
- * o After post-form consonant
- * o After main consonant (for above marks)
- *
- * IMPLEMENTATION NOTES:
- *
- * The normalize() routine has already decomposed matras for us, so we don't
- * need to worry about that.
- */
+ /* 1. Khmer shaping assumes that a syllable will begin with a Cons, IndV, or Number. */
+ /* The first consonant is always the base. */
+ unsigned int base = start;
+ info[base].khmer_position() = POS_BASE_C;
- /* 3. Reorder marks to canonical order:
- *
- * Adjacent nukta and halant or nukta and vedic sign are always repositioned
- * if necessary, so that the nukta is first.
- *
- * IMPLEMENTATION NOTES:
- *
- * We don't need to do this: the normalize() routine already did this for us.
- */
-
-
- /* Reorder characters */
-
- for (unsigned int i = start; i < base; i++)
- info[i].khmer_position() = MIN (POS_PRE_C, (khmer_position_t) info[i].khmer_position());
-
- if (base < end)
- info[base].khmer_position() = POS_BASE_C;
+ /* Mark all subsequent consonants as below. */
+ for (unsigned int i = base + 1; i < end; i++)
+ if (is_consonant (info[i]))
+ info[i].khmer_position() = POS_BELOW_C;
/* Mark final consonants. A final consonant is one appearing after a matra,
* like in Khmer. */
@@ -716,10 +574,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
break;
}
- /* Handle beginning Ra */
- if (has_reph)
- info[start].khmer_position() = POS_RA_TO_BECOME_REPH;
-
/* Attach misc marks to previous char to move with them. */
{
khmer_position_t last_pos = POS_START;
@@ -766,7 +620,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
last = i;
}
-
{
/* Use syllable() for sort accounting temporarily. */
unsigned int syllable = info[start].syllable();
@@ -811,20 +664,6 @@ initial_reordering_consonant_syllable (const hb_ot_shape_plan_t *plan,
{
hb_mask_t mask;
- /* Reph */
- for (unsigned int i = start; i < end && info[i].khmer_position() == POS_RA_TO_BECOME_REPH; i++)
- info[i].mask |= khmer_plan->mask_array[RPHF];
-
- /* Pre-base */
- mask = khmer_plan->mask_array[HALF];
- if (khmer_plan->config->blwf_mode == BLWF_MODE_PRE_AND_POST)
- mask |= khmer_plan->mask_array[BLWF];
- for (unsigned int i = start; i < base; i++)
- info[i].mask |= mask;
- /* Base */
- mask = 0;
- if (base < end)
- info[base].mask |= mask;
/* Post-base */
mask = khmer_plan->mask_array[BLWF] | khmer_plan->mask_array[ABVF] | khmer_plan->mask_array[PSTF];
for (unsigned int i = base + 1; i < end; i++)
@@ -1138,158 +977,6 @@ final_reordering_syllable (const hb_ot_shape_plan_t *plan,
}
- /* o Reorder reph:
- *
- * Reph’s original position is always at the beginning of the syllable,
- * (i.e. it is not reordered at the character reordering stage). However,
- * it will be reordered according to the basic-forms shaping results.
- * Possible positions for reph, depending on the script, are; after main,
- * before post-base consonant forms, and after post-base consonant forms.
- */
-
- /* Two cases:
- *
- * - If repha is encoded as a sequence of characters (Ra,H or Ra,H,ZWJ), then
- * we should only move it if the sequence ligated to the repha form.
- *
- * - If repha is encoded separately and in the logical position, we should only
- * move it if it did NOT ligate. If it ligated, it's probably the font trying
- * to make it work without the reordering.
- */
- if (start + 1 < end &&
- info[start].khmer_position() == POS_RA_TO_BECOME_REPH &&
- ((info[start].khmer_category() == OT_Repha) ^
- _hb_glyph_info_ligated_and_didnt_multiply (&info[start])))
- {
- unsigned int new_reph_pos;
- reph_position_t reph_pos = khmer_plan->config->reph_pos;
-
- assert (reph_pos != REPH_POS_DONT_CARE);
-
- /* 1. If reph should be positioned after post-base consonant forms,
- * proceed to step 5.
- */
- if (reph_pos == REPH_POS_AFTER_POST)
- {
- goto reph_step_5;
- }
-
- /* 2. If the reph repositioning class is not after post-base: target
- * position is after the first explicit halant glyph between the
- * first post-reph consonant and last main consonant. If ZWJ or ZWNJ
- * are following this halant, position is moved after it. If such
- * position is found, this is the target position. Otherwise,
- * proceed to the next step.
- *
- * Note: in old-implementation fonts, where classifications were
- * fixed in shaping engine, there was no case where reph position
- * will be found on this step.
- */
- {
- new_reph_pos = start + 1;
- while (new_reph_pos < base && !is_coeng (info[new_reph_pos]))
- new_reph_pos++;
-
- if (new_reph_pos < base && is_coeng (info[new_reph_pos]))
- {
- /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
- if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
- new_reph_pos++;
- goto reph_move;
- }
- }
-
- /* 3. If reph should be repositioned after the main consonant: find the
- * first consonant not ligated with main, or find the first
- * consonant that is not a potential pre-base-reordering Ra.
- */
- if (reph_pos == REPH_POS_AFTER_MAIN)
- {
- new_reph_pos = base;
- while (new_reph_pos + 1 < end && info[new_reph_pos + 1].khmer_position() <= POS_AFTER_MAIN)
- new_reph_pos++;
- if (new_reph_pos < end)
- goto reph_move;
- }
-
- /* 4. If reph should be positioned before post-base consonant, find
- * first post-base classified consonant not ligated with main. If no
- * consonant is found, the target position should be before the
- * first matra, syllable modifier sign or vedic sign.
- */
- /* This is our take on what step 4 is trying to say (and failing, BADLY). */
- if (reph_pos == REPH_POS_AFTER_SUB)
- {
- new_reph_pos = base;
- while (new_reph_pos + 1 < end &&
- !( FLAG_UNSAFE (info[new_reph_pos + 1].khmer_position()) & (FLAG (POS_POST_C) | FLAG (POS_AFTER_POST) | FLAG (POS_SMVD))))
- new_reph_pos++;
- if (new_reph_pos < end)
- goto reph_move;
- }
-
- /* 5. If no consonant is found in steps 3 or 4, move reph to a position
- * immediately before the first post-base matra, syllable modifier
- * sign or vedic sign that has a reordering class after the intended
- * reph position. For example, if the reordering position for reph
- * is post-main, it will skip above-base matras that also have a
- * post-main position.
- */
- reph_step_5:
- {
- /* Copied from step 2. */
- new_reph_pos = start + 1;
- while (new_reph_pos < base && !is_coeng (info[new_reph_pos]))
- new_reph_pos++;
-
- if (new_reph_pos < base && is_coeng (info[new_reph_pos]))
- {
- /* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
- if (new_reph_pos + 1 < base && is_joiner (info[new_reph_pos + 1]))
- new_reph_pos++;
- goto reph_move;
- }
- }
-
- /* 6. Otherwise, reorder reph to the end of the syllable.
- */
- {
- new_reph_pos = end - 1;
- while (new_reph_pos > start && info[new_reph_pos].khmer_position() == POS_SMVD)
- new_reph_pos--;
-
- /*
- * If the Reph is to be ending up after a Matra,Halant sequence,
- * position it before that Halant so it can interact with the Matra.
- * However, if it's a plain Consonant,Halant we shouldn't do that.
- * Uniscribe doesn't do this.
- * TEST: U+0930,U+094D,U+0915,U+094B,U+094D
- */
- if (!hb_options ().uniscribe_bug_compatible &&
- unlikely (is_coeng (info[new_reph_pos]))) {
- for (unsigned int i = base + 1; i < new_reph_pos; i++)
- if (info[i].khmer_category() == OT_M) {
- /* Ok, got it. */
- new_reph_pos--;
- }
- }
- goto reph_move;
- }
-
- reph_move:
- {
- /* Move */
- buffer->merge_clusters (start, new_reph_pos + 1);
- hb_glyph_info_t reph = info[start];
- memmove (&info[start], &info[start + 1], (new_reph_pos - start) * sizeof (info[0]));
- info[new_reph_pos] = reph;
-
- if (start < base && base <= new_reph_pos)
- base--;
- }
- }
-
-
/* o Reorder pre-base-reordering consonants:
*
* If a pre-base-reordering consonant is found, reorder it according to