From cdb5eb4812b2d84370245502e05d363e35d72974 Mon Sep 17 00:00:00 2001 From: vndangkhoa Date: Wed, 24 Jun 2026 17:57:24 +0700 Subject: [PATCH] Improve engine: tone placement, backtrack limit, grab default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix tone placement for uâ, uê, uơ → tone on second vowel (e.g. tuấn, thuế, phương) - Limit flexible backward scan to 3 chars to prevent cross-syllable modification - Set grab = true as default config - Add 9 new engine tests for tone placement and backtrack limit --- engine/src/telex.rs | 28 +++++++++++------ engine/src/tests.rs | 75 ++++++++++++++++++++++++++++++++++++++++++++- engine/src/vni.rs | 7 +++-- vietc.toml | 2 +- 4 files changed, 99 insertions(+), 13 deletions(-) diff --git a/engine/src/telex.rs b/engine/src/telex.rs index 48fc64f..36e298a 100644 --- a/engine/src/telex.rs +++ b/engine/src/telex.rs @@ -2,6 +2,12 @@ use crate::engine::EngineEvent; const VOWELS: &[char] = &['a', 'e', 'i', 'o', 'u', 'y', 'ă', 'â', 'ê', 'ô', 'ơ', 'ư']; +/// Maximum number of characters to scan backward during flexible placement. +/// Vietnamese vowel clusters are at most 3 characters; limiting the scan +/// prevents modifying vowels in a different syllable (e.g. `dang d` + `a` +/// should not change the `a` in `dang`). +const MAX_FLEXIBLE_BACKTRACK: usize = 3; + fn is_vowel(c: char) -> bool { VOWELS.contains(&c) } @@ -152,11 +158,12 @@ impl TelexEngine { if i > 0 && is_vowel(chars[i - 1]) { let first = chars[i - 1]; let second = chars[i]; - // For oa, oe, uy → tone on second vowel (already at position i) - // For others → tone on first vowel + // For oa, oe, uâ, uê, uơ, uy, iê, yê → tone on second vowel let tone_on_second = matches!( (first, second), - ('o', 'a') | ('o', 'e') | ('u', 'y') | ('i', 'ê') | ('y', 'ê') + ('o', 'a') | ('o', 'e') + | ('u', 'â') | ('u', 'ê') | ('u', 'ơ') | ('u', 'y') + | ('i', 'ê') | ('y', 'ê') ); if !tone_on_second { // Apply tone to first vowel @@ -212,13 +219,15 @@ impl TelexEngine { } } - // Flexible placement: if last char is not a vowel, scan backward - // for a matching vowel to form a double-vowel pair. + // Flexible placement: if last char is not a vowel, scan the last + // N chars for a matching vowel to form a double-vowel pair. + // Limited backtrack prevents modifying vowels in a different syllable. if matches!(ch, 'a' | 'e' | 'o') { if let Some(last_ch) = self.buffer.chars().last() { if !is_vowel(last_ch) { let chars: Vec = self.buffer.chars().collect(); - for i in (0..chars.len()).rev() { + let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK); + for i in (start..chars.len()).rev() { if chars[i] == ch { let replacement = match ch { 'a' => 'â', @@ -255,12 +264,13 @@ impl TelexEngine { } } - // Flexible placement: if last char is not a vowel, scan backward - // for a vowel to apply the w modifier. + // Flexible placement: if last char is not a vowel, scan the last + // N chars for a vowel to apply the w modifier. if let Some(last_ch) = self.buffer.chars().last() { if !is_vowel(last_ch) { let chars: Vec = self.buffer.chars().collect(); - for i in (0..chars.len()).rev() { + let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK); + for i in (start..chars.len()).rev() { if is_vowel(chars[i]) { if let Some(modified) = apply_w_to_vowel(chars[i]) { self.buffer = chars[..i].iter().collect::(); diff --git a/engine/src/tests.rs b/engine/src/tests.rs index 25ade3b..7a8f4f8 100644 --- a/engine/src/tests.rs +++ b/engine/src/tests.rs @@ -279,10 +279,69 @@ mod tests { #[test] fn telex_uy_tone() { let mut e = Engine::new(InputMethod::Telex); - // Engine applies tone to first vowel in "uy": uý + // Engine applies tone to second vowel (y) in "uy": uý assert_eq!(get_display(&process_input(&mut e, "uys")), "uý"); } + #[test] + fn telex_ua_tone_on_first_vowel() { + let mut e = Engine::new(InputMethod::Telex); + // "ua" → tone on first vowel (u): mùa → "ùa" + assert_eq!(get_display(&process_input(&mut e, "uaf")), "ùa"); + } + + #[test] + fn telex_uâ_tone_on_second_vowel() { + let mut e = Engine::new(InputMethod::Telex); + // "uâ" → tone on second vowel (â): tuấn + assert_eq!(get_display(&process_input(&mut e, "tuana")), "tuân"); + assert_eq!(get_display(&process_input(&mut e, "tuanas")), "tuấn"); + } + + #[test] + fn telex_uê_tone_on_second_vowel() { + let mut e = Engine::new(InputMethod::Telex); + // "uê" → tone on second vowel (ê): thuế + assert_eq!(get_display(&process_input(&mut e, "thuee")), "thuê"); + assert_eq!(get_display(&process_input(&mut e, "thuees")), "thuế"); + } + + // ================================================================ + // Telex: Flexible backtrack limit + // ================================================================ + + #[test] + fn telex_flexible_backtrack_limit() { + let mut e = Engine::new(InputMethod::Telex); + // "dangd" + "a" should NOT modify the 'a' in "dang" + // (too far back, crosses a syllable boundary). + // The last 3 chars are "ngd" → no vowel → 'a' is appended normally. + assert_eq!(get_display(&process_input(&mut e, "dangda")), "dangda"); + } + + #[test] + fn telex_flexible_backtrack_still_works_near() { + let mut e = Engine::new(InputMethod::Telex); + // "tran" + "a" → last 3: "ran" → 'a' found at index 1 → "trân" + assert_eq!(get_display(&process_input(&mut e, "trana")), "trân"); + } + + #[test] + fn telex_flexible_backtrack_w_limit() { + let mut e = Engine::new(InputMethod::Telex); + // "dangd" + "w" should NOT modify 'a' in "dang". + // w becomes a pending modifier (no vowel found within backtrack) + // On flush, pending w is consumed without modifying anything. + assert_eq!(get_display(&process_input(&mut e, "dangdw")), "dangd"); + } + + #[test] + fn telex_flexible_backtrack_w_still_works_near() { + let mut e = Engine::new(InputMethod::Telex); + // "ngon" + "w" → last 3: "gon" → 'o' found at index 1 → "ngơn" + assert_eq!(get_display(&process_input(&mut e, "ngonw")), "ngơn"); + } + // ================================================================ // Telex: Digraph dd // ================================================================ @@ -624,6 +683,20 @@ mod tests { assert_eq!(get_display(&process_input(&mut e, "1")), "1"); } + #[test] + fn vni_flexible_backtrack_limit() { + let mut e = Engine::new(InputMethod::Vni); + // "dangd" + "6" should NOT modify 'a' in "dang" + assert_eq!(get_display(&process_input(&mut e, "dangd6")), "dangd6"); + } + + #[test] + fn vni_flexible_backtrack_still_works_near() { + let mut e = Engine::new(InputMethod::Vni); + // "tran" + "6" → "trân" (within backtrack limit) + assert_eq!(get_display(&process_input(&mut e, "tran6")), "trân"); + } + // ================================================================ // VNI: Tones // ================================================================ diff --git a/engine/src/vni.rs b/engine/src/vni.rs index 2b6799a..9ca14b1 100644 --- a/engine/src/vni.rs +++ b/engine/src/vni.rs @@ -6,6 +6,8 @@ fn is_vowel(c: char) -> bool { VOWELS.contains(&c) } +const MAX_FLEXIBLE_BACKTRACK: usize = 3; + fn apply_tone_to_vowel(vowel: char, digit: char) -> Option { // VNI: 1=sắc, 2=huyền, 3=hỏi, 4=ngã, 5=nặng let table: &[(char, char, char)] = &[ @@ -132,11 +134,12 @@ impl VniEngine { } } - // Flexible placement: last char not a vowel, scan backward + // Flexible placement: last char not a vowel, scan the last N chars if let Some(last_ch) = self.buffer.chars().last() { if !is_vowel(last_ch) { let chars: Vec = self.buffer.chars().collect(); - for i in (0..chars.len()).rev() { + let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK); + for i in (start..chars.len()).rev() { if is_vowel(chars[i]) { // Try tone first (1-5) if let Some(modified) = apply_tone_to_vowel(chars[i], digit) { diff --git a/vietc.toml b/vietc.toml index f06537a..f603a54 100644 --- a/vietc.toml +++ b/vietc.toml @@ -3,7 +3,7 @@ input_method = "telex" toggle_key = "space" start_enabled = true -grab = false +grab = true [auto_restore] enabled = true