Improve engine: tone placement, backtrack limit, grab default

- Fix tone placement for uâ, uê, uơ → tone on second vowel (e.g. tuấn, thuế, phương)
- Limit flexible backward scan to 3 chars to prevent cross-syllable modification
- Set grab = true as default config
- Add 9 new engine tests for tone placement and backtrack limit
This commit is contained in:
vndangkhoa 2026-06-24 17:57:24 +07:00
parent 44a4b032a6
commit cdb5eb4812
4 changed files with 99 additions and 13 deletions

View file

@ -2,6 +2,12 @@ use crate::engine::EngineEvent;
const VOWELS: &[char] = &['a', 'e', 'i', 'o', 'u', 'y', 'ă', 'â', 'ê', 'ô', 'ơ', 'ư']; const VOWELS: &[char] = &['a', 'e', 'i', 'o', 'u', 'y', 'ă', 'â', 'ê', 'ô', 'ơ', 'ư'];
/// Maximum number of characters to scan backward during flexible placement.
/// Vietnamese vowel clusters are at most 3 characters; limiting the scan
/// prevents modifying vowels in a different syllable (e.g. `dang d` + `a`
/// should not change the `a` in `dang`).
const MAX_FLEXIBLE_BACKTRACK: usize = 3;
fn is_vowel(c: char) -> bool { fn is_vowel(c: char) -> bool {
VOWELS.contains(&c) VOWELS.contains(&c)
} }
@ -152,11 +158,12 @@ impl TelexEngine {
if i > 0 && is_vowel(chars[i - 1]) { if i > 0 && is_vowel(chars[i - 1]) {
let first = chars[i - 1]; let first = chars[i - 1];
let second = chars[i]; let second = chars[i];
// For oa, oe, uy → tone on second vowel (already at position i) // For oa, oe, uâ, uê, uơ, uy, iê, yê → tone on second vowel
// For others → tone on first vowel
let tone_on_second = matches!( let tone_on_second = matches!(
(first, second), (first, second),
('o', 'a') | ('o', 'e') | ('u', 'y') | ('i', 'ê') | ('y', 'ê') ('o', 'a') | ('o', 'e')
| ('u', 'â') | ('u', 'ê') | ('u', 'ơ') | ('u', 'y')
| ('i', 'ê') | ('y', 'ê')
); );
if !tone_on_second { if !tone_on_second {
// Apply tone to first vowel // Apply tone to first vowel
@ -212,13 +219,15 @@ impl TelexEngine {
} }
} }
// Flexible placement: if last char is not a vowel, scan backward // Flexible placement: if last char is not a vowel, scan the last
// for a matching vowel to form a double-vowel pair. // N chars for a matching vowel to form a double-vowel pair.
// Limited backtrack prevents modifying vowels in a different syllable.
if matches!(ch, 'a' | 'e' | 'o') { if matches!(ch, 'a' | 'e' | 'o') {
if let Some(last_ch) = self.buffer.chars().last() { if let Some(last_ch) = self.buffer.chars().last() {
if !is_vowel(last_ch) { if !is_vowel(last_ch) {
let chars: Vec<char> = self.buffer.chars().collect(); let chars: Vec<char> = self.buffer.chars().collect();
for i in (0..chars.len()).rev() { let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
for i in (start..chars.len()).rev() {
if chars[i] == ch { if chars[i] == ch {
let replacement = match ch { let replacement = match ch {
'a' => 'â', 'a' => 'â',
@ -255,12 +264,13 @@ impl TelexEngine {
} }
} }
// Flexible placement: if last char is not a vowel, scan backward // Flexible placement: if last char is not a vowel, scan the last
// for a vowel to apply the w modifier. // N chars for a vowel to apply the w modifier.
if let Some(last_ch) = self.buffer.chars().last() { if let Some(last_ch) = self.buffer.chars().last() {
if !is_vowel(last_ch) { if !is_vowel(last_ch) {
let chars: Vec<char> = self.buffer.chars().collect(); let chars: Vec<char> = self.buffer.chars().collect();
for i in (0..chars.len()).rev() { let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
for i in (start..chars.len()).rev() {
if is_vowel(chars[i]) { if is_vowel(chars[i]) {
if let Some(modified) = apply_w_to_vowel(chars[i]) { if let Some(modified) = apply_w_to_vowel(chars[i]) {
self.buffer = chars[..i].iter().collect::<String>(); self.buffer = chars[..i].iter().collect::<String>();

View file

@ -279,10 +279,69 @@ mod tests {
#[test] #[test]
fn telex_uy_tone() { fn telex_uy_tone() {
let mut e = Engine::new(InputMethod::Telex); let mut e = Engine::new(InputMethod::Telex);
// Engine applies tone to first vowel in "uy": uý // Engine applies tone to second vowel (y) in "uy": uý
assert_eq!(get_display(&process_input(&mut e, "uys")), ""); assert_eq!(get_display(&process_input(&mut e, "uys")), "");
} }
#[test]
fn telex_ua_tone_on_first_vowel() {
let mut e = Engine::new(InputMethod::Telex);
// "ua" → tone on first vowel (u): mùa → "ùa"
assert_eq!(get_display(&process_input(&mut e, "uaf")), "ùa");
}
#[test]
fn telex_uâ_tone_on_second_vowel() {
let mut e = Engine::new(InputMethod::Telex);
// "uâ" → tone on second vowel (â): tuấn
assert_eq!(get_display(&process_input(&mut e, "tuana")), "tuân");
assert_eq!(get_display(&process_input(&mut e, "tuanas")), "tuấn");
}
#[test]
fn telex_uê_tone_on_second_vowel() {
let mut e = Engine::new(InputMethod::Telex);
// "uê" → tone on second vowel (ê): thuế
assert_eq!(get_display(&process_input(&mut e, "thuee")), "thuê");
assert_eq!(get_display(&process_input(&mut e, "thuees")), "thuế");
}
// ================================================================
// Telex: Flexible backtrack limit
// ================================================================
#[test]
fn telex_flexible_backtrack_limit() {
let mut e = Engine::new(InputMethod::Telex);
// "dangd" + "a" should NOT modify the 'a' in "dang"
// (too far back, crosses a syllable boundary).
// The last 3 chars are "ngd" → no vowel → 'a' is appended normally.
assert_eq!(get_display(&process_input(&mut e, "dangda")), "dangda");
}
#[test]
fn telex_flexible_backtrack_still_works_near() {
let mut e = Engine::new(InputMethod::Telex);
// "tran" + "a" → last 3: "ran" → 'a' found at index 1 → "trân"
assert_eq!(get_display(&process_input(&mut e, "trana")), "trân");
}
#[test]
fn telex_flexible_backtrack_w_limit() {
let mut e = Engine::new(InputMethod::Telex);
// "dangd" + "w" should NOT modify 'a' in "dang".
// w becomes a pending modifier (no vowel found within backtrack)
// On flush, pending w is consumed without modifying anything.
assert_eq!(get_display(&process_input(&mut e, "dangdw")), "dangd");
}
#[test]
fn telex_flexible_backtrack_w_still_works_near() {
let mut e = Engine::new(InputMethod::Telex);
// "ngon" + "w" → last 3: "gon" → 'o' found at index 1 → "ngơn"
assert_eq!(get_display(&process_input(&mut e, "ngonw")), "ngơn");
}
// ================================================================ // ================================================================
// Telex: Digraph dd // Telex: Digraph dd
// ================================================================ // ================================================================
@ -624,6 +683,20 @@ mod tests {
assert_eq!(get_display(&process_input(&mut e, "1")), "1"); assert_eq!(get_display(&process_input(&mut e, "1")), "1");
} }
#[test]
fn vni_flexible_backtrack_limit() {
let mut e = Engine::new(InputMethod::Vni);
// "dangd" + "6" should NOT modify 'a' in "dang"
assert_eq!(get_display(&process_input(&mut e, "dangd6")), "dangd6");
}
#[test]
fn vni_flexible_backtrack_still_works_near() {
let mut e = Engine::new(InputMethod::Vni);
// "tran" + "6" → "trân" (within backtrack limit)
assert_eq!(get_display(&process_input(&mut e, "tran6")), "trân");
}
// ================================================================ // ================================================================
// VNI: Tones // VNI: Tones
// ================================================================ // ================================================================

View file

@ -6,6 +6,8 @@ fn is_vowel(c: char) -> bool {
VOWELS.contains(&c) VOWELS.contains(&c)
} }
const MAX_FLEXIBLE_BACKTRACK: usize = 3;
fn apply_tone_to_vowel(vowel: char, digit: char) -> Option<char> { fn apply_tone_to_vowel(vowel: char, digit: char) -> Option<char> {
// VNI: 1=sắc, 2=huyền, 3=hỏi, 4=ngã, 5=nặng // VNI: 1=sắc, 2=huyền, 3=hỏi, 4=ngã, 5=nặng
let table: &[(char, char, char)] = &[ let table: &[(char, char, char)] = &[
@ -132,11 +134,12 @@ impl VniEngine {
} }
} }
// Flexible placement: last char not a vowel, scan backward // Flexible placement: last char not a vowel, scan the last N chars
if let Some(last_ch) = self.buffer.chars().last() { if let Some(last_ch) = self.buffer.chars().last() {
if !is_vowel(last_ch) { if !is_vowel(last_ch) {
let chars: Vec<char> = self.buffer.chars().collect(); let chars: Vec<char> = self.buffer.chars().collect();
for i in (0..chars.len()).rev() { let start = chars.len().saturating_sub(MAX_FLEXIBLE_BACKTRACK);
for i in (start..chars.len()).rev() {
if is_vowel(chars[i]) { if is_vowel(chars[i]) {
// Try tone first (1-5) // Try tone first (1-5)
if let Some(modified) = apply_tone_to_vowel(chars[i], digit) { if let Some(modified) = apply_tone_to_vowel(chars[i], digit) {

View file

@ -3,7 +3,7 @@
input_method = "telex" input_method = "telex"
toggle_key = "space" toggle_key = "space"
start_enabled = true start_enabled = true
grab = false grab = true
[auto_restore] [auto_restore]
enabled = true enabled = true