Improve engine: tone placement, backtrack limit, grab default
- Fix tone placement for uâ, uê, uơ → tone on second vowel (e.g. tuấn, thuế, phương) - Limit flexible backward scan to 3 chars to prevent cross-syllable modification - Set grab = true as default config - Add 9 new engine tests for tone placement and backtrack limit
This commit is contained in:
parent
44a4b032a6
commit
cdb5eb4812
4 changed files with 99 additions and 13 deletions
|
|
@ -2,6 +2,12 @@ use crate::engine::EngineEvent;
|
|||
|
||||
const VOWELS: &[char] = &['a', 'e', 'i', 'o', 'u', 'y', 'ă', 'â', 'ê', 'ô', 'ơ', 'ư'];
|
||||
|
||||
/// Maximum number of characters to scan backward during flexible placement.
|
||||
/// Vietnamese vowel clusters are at most 3 characters; limiting the scan
|
||||
/// prevents modifying vowels in a different syllable (e.g. `dang d` + `a`
|
||||
/// should not change the `a` in `dang`).
|
||||
const MAX_FLEXIBLE_BACKTRACK: usize = 3;
|
||||
|
||||
fn is_vowel(c: char) -> bool {
|
||||
VOWELS.contains(&c)
|
||||
}
|
||||
|
|
@ -152,11 +158,12 @@ impl TelexEngine {
|
|||
if i > 0 && is_vowel(chars[i - 1]) {
|
||||
let first = chars[i - 1];
|
||||
let second = chars[i];
|
||||
// For oa, oe, uy → tone on second vowel (already at position i)
|
||||
// For others → tone on first vowel
|
||||
// For oa, oe, uâ, uê, uơ, uy, iê, yê → tone on second vowel
|
||||
let tone_on_second = matches!(
|
||||
(first, second),
|
||||
('o', 'a') | ('o', 'e') | ('u', 'y') | ('i', 'ê') | ('y', 'ê')
|
||||
('o', 'a') | ('o', 'e')
|
||||
| ('u', 'â') | ('u', 'ê') | ('u', 'ơ') | ('u', 'y')
|
||||
| ('i', 'ê') | ('y', 'ê')
|
||||
);
|
||||
if !tone_on_second {
|
||||
// Apply tone to first vowel
|
||||
|
|
@ -212,13 +219,15 @@ impl TelexEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// Flexible placement: if last char is not a vowel, scan backward
|
||||
// for a matching vowel to form a double-vowel pair.
|
||||
// Flexible placement: if last char is not a vowel, scan the last
|
||||
// N chars for a matching vowel to form a double-vowel pair.
|
||||
// Limited backtrack prevents modifying vowels in a different syllable.
|
||||
if matches!(ch, 'a' | 'e' | 'o') {
|
||||
if let Some(last_ch) = self.buffer.chars().last() {
|
||||
if !is_vowel(last_ch) {
|
||||
let chars: Vec<char> = self.buffer.chars().collect();
|
||||
for i in (0..chars.len()).rev() {
|
||||
let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
|
||||
for i in (start..chars.len()).rev() {
|
||||
if chars[i] == ch {
|
||||
let replacement = match ch {
|
||||
'a' => 'â',
|
||||
|
|
@ -255,12 +264,13 @@ impl TelexEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// Flexible placement: if last char is not a vowel, scan backward
|
||||
// for a vowel to apply the w modifier.
|
||||
// Flexible placement: if last char is not a vowel, scan the last
|
||||
// N chars for a vowel to apply the w modifier.
|
||||
if let Some(last_ch) = self.buffer.chars().last() {
|
||||
if !is_vowel(last_ch) {
|
||||
let chars: Vec<char> = self.buffer.chars().collect();
|
||||
for i in (0..chars.len()).rev() {
|
||||
let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
|
||||
for i in (start..chars.len()).rev() {
|
||||
if is_vowel(chars[i]) {
|
||||
if let Some(modified) = apply_w_to_vowel(chars[i]) {
|
||||
self.buffer = chars[..i].iter().collect::<String>();
|
||||
|
|
|
|||
|
|
@ -279,10 +279,69 @@ mod tests {
|
|||
#[test]
|
||||
fn telex_uy_tone() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// Engine applies tone to first vowel in "uy": uý
|
||||
// Engine applies tone to second vowel (y) in "uy": uý
|
||||
assert_eq!(get_display(&process_input(&mut e, "uys")), "uý");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_ua_tone_on_first_vowel() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "ua" → tone on first vowel (u): mùa → "ùa"
|
||||
assert_eq!(get_display(&process_input(&mut e, "uaf")), "ùa");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_uâ_tone_on_second_vowel() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "uâ" → tone on second vowel (â): tuấn
|
||||
assert_eq!(get_display(&process_input(&mut e, "tuana")), "tuân");
|
||||
assert_eq!(get_display(&process_input(&mut e, "tuanas")), "tuấn");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_uê_tone_on_second_vowel() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "uê" → tone on second vowel (ê): thuế
|
||||
assert_eq!(get_display(&process_input(&mut e, "thuee")), "thuê");
|
||||
assert_eq!(get_display(&process_input(&mut e, "thuees")), "thuế");
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
// Telex: Flexible backtrack limit
|
||||
// ================================================================
|
||||
|
||||
#[test]
|
||||
fn telex_flexible_backtrack_limit() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "dangd" + "a" should NOT modify the 'a' in "dang"
|
||||
// (too far back, crosses a syllable boundary).
|
||||
// The last 3 chars are "ngd" → no vowel → 'a' is appended normally.
|
||||
assert_eq!(get_display(&process_input(&mut e, "dangda")), "dangda");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_flexible_backtrack_still_works_near() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "tran" + "a" → last 3: "ran" → 'a' found at index 1 → "trân"
|
||||
assert_eq!(get_display(&process_input(&mut e, "trana")), "trân");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_flexible_backtrack_w_limit() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "dangd" + "w" should NOT modify 'a' in "dang".
|
||||
// w becomes a pending modifier (no vowel found within backtrack)
|
||||
// On flush, pending w is consumed without modifying anything.
|
||||
assert_eq!(get_display(&process_input(&mut e, "dangdw")), "dangd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn telex_flexible_backtrack_w_still_works_near() {
|
||||
let mut e = Engine::new(InputMethod::Telex);
|
||||
// "ngon" + "w" → last 3: "gon" → 'o' found at index 1 → "ngơn"
|
||||
assert_eq!(get_display(&process_input(&mut e, "ngonw")), "ngơn");
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
// Telex: Digraph dd
|
||||
// ================================================================
|
||||
|
|
@ -624,6 +683,20 @@ mod tests {
|
|||
assert_eq!(get_display(&process_input(&mut e, "1")), "1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vni_flexible_backtrack_limit() {
|
||||
let mut e = Engine::new(InputMethod::Vni);
|
||||
// "dangd" + "6" should NOT modify 'a' in "dang"
|
||||
assert_eq!(get_display(&process_input(&mut e, "dangd6")), "dangd6");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vni_flexible_backtrack_still_works_near() {
|
||||
let mut e = Engine::new(InputMethod::Vni);
|
||||
// "tran" + "6" → "trân" (within backtrack limit)
|
||||
assert_eq!(get_display(&process_input(&mut e, "tran6")), "trân");
|
||||
}
|
||||
|
||||
// ================================================================
|
||||
// VNI: Tones
|
||||
// ================================================================
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ fn is_vowel(c: char) -> bool {
|
|||
VOWELS.contains(&c)
|
||||
}
|
||||
|
||||
const MAX_FLEXIBLE_BACKTRACK: usize = 3;
|
||||
|
||||
fn apply_tone_to_vowel(vowel: char, digit: char) -> Option<char> {
|
||||
// VNI: 1=sắc, 2=huyền, 3=hỏi, 4=ngã, 5=nặng
|
||||
let table: &[(char, char, char)] = &[
|
||||
|
|
@ -132,11 +134,12 @@ impl VniEngine {
|
|||
}
|
||||
}
|
||||
|
||||
// Flexible placement: last char not a vowel, scan backward
|
||||
// Flexible placement: last char not a vowel, scan the last N chars
|
||||
if let Some(last_ch) = self.buffer.chars().last() {
|
||||
if !is_vowel(last_ch) {
|
||||
let chars: Vec<char> = self.buffer.chars().collect();
|
||||
for i in (0..chars.len()).rev() {
|
||||
let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
|
||||
for i in (start..chars.len()).rev() {
|
||||
if is_vowel(chars[i]) {
|
||||
// Try tone first (1-5)
|
||||
if let Some(modified) = apply_tone_to_vowel(chars[i], digit) {
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
input_method = "telex"
|
||||
toggle_key = "space"
|
||||
start_enabled = true
|
||||
grab = false
|
||||
grab = true
|
||||
|
||||
[auto_restore]
|
||||
enabled = true
|
||||
|
|
|
|||
Loading…
Reference in a new issue