From 6e48d8b2fbc14d7f85da36ead9e89526945c5d34 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 26 Jun 2026 10:10:42 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20correct=20tone=20placement=20for=20qu/gi?= =?UTF-8?q?=20onset=20glides=20and=20u=C3=AA/u=C6=A1=20clusters?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: vndangkhoa --- engine/src/bamboo.rs | 15 +++++++++- engine/tests/tone_placement.rs | 53 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 engine/tests/tone_placement.rs diff --git a/engine/src/bamboo.rs b/engine/src/bamboo.rs index dcb02be..4406c37 100644 --- a/engine/src/bamboo.rs +++ b/engine/src/bamboo.rs @@ -288,6 +288,7 @@ impl BambooEngine { } fn find_tone_position(&self, range: std::ops::Range) -> usize { + let start = range.start; let mut vowels: Vec = Vec::new(); for i in range { @@ -301,6 +302,18 @@ impl BambooEngine { return self.composition.len().saturating_sub(1); } + // Exclude onset glides: in "qu…" the u and in "gi…" the i belong to the + // initial consonant, not the vowel nucleus — so they must never carry the + // tone (e.g. "quả" not "qủa", "giờ" not "gìơ"). Only strip the glide when + // another vowel follows it; bare "gì"/"qu" keep the letter as the nucleus. + if vowels.len() >= 2 && vowels[0] == start + 1 { + let onset = self.composition[start].base_char.to_ascii_lowercase(); + let glide = self.composition[start + 1].base_char.to_ascii_lowercase(); + if (onset == 'q' && glide == 'u') || (onset == 'g' && glide == 'i') { + vowels.remove(0); + } + } + if vowels.len() == 1 { return vowels[0]; } @@ -322,7 +335,7 @@ impl BambooEngine { let tone_on_second = matches!((cv1, cv2), ('o', 'a') | ('o', 'e') | ('u', 'y') | ('i', 'ê') | ('y', 'ê') | ('u', 'ô') | ('ư', 'ơ') | - ('i', 'o') | ('u', 'â') + ('i', 'o') | ('u', 'â') | ('u', 'ê') | ('u', 'ơ') ); if tone_on_second { diff --git a/engine/tests/tone_placement.rs b/engine/tests/tone_placement.rs new file mode 100644 index 0000000..fe594d8 --- /dev/null +++ b/engine/tests/tone_placement.rs @@ -0,0 +1,53 @@ +//! Regression tests for tone placement on syllables whose onset contains a +//! glide letter ("qu", "gi") and on the "uê"/"uơ" vowel clusters. + +use std::collections::HashMap; +use vietc_engine::{Engine, InputMethod}; + +fn telex(keys: &str) -> String { + Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::>()).0 +} + +fn vni(keys: &str) -> String { + Engine::replay_keystrokes(InputMethod::Vni, &HashMap::new(), &keys.chars().collect::>()).0 +} + +/// (telex keystrokes, vni keystrokes, expected word) +const CASES: &[(&str, &str, &str)] = &[ + // "qu" onset: the u is part of the consonant, tone stays on the nucleus. + ("quar", "qua3", "quả"), + ("quaf", "qua2", "quà"), + ("quas", "qua1", "quá"), + ("quaj", "qua5", "quạ"), + // "gi" onset: the i is part of the consonant, tone stays on the nucleus. + ("gias", "gia1", "giá"), + ("giof", "gio2", "giò"), + ("giowf", "gio72", "giờ"), + ("giups", "giu1p", "giúp"), + ("gieets", "gie61t", "giết"), + ("giuwowngf", "giuo7ng2", "giường"), + // "uê"/"uơ" clusters: tone belongs on the second vowel. + ("thuees", "thue61", "thuế"), + ("hueej", "hue65", "huệ"), + // Controls that must keep working: bare "gì", "uy", "uâ", "uô". + ("gif", "gi2", "gì"), + ("quys", "quy1", "quý"), + ("quaanf", "qua62n", "quần"), + ("muoons", "muo61n", "muốn"), +]; + +#[test] +fn onset_glide_and_cluster_tone_placement() { + let mut fails = Vec::new(); + for &(tk, vk, want) in CASES { + let gt = telex(tk); + if gt != want { + fails.push(format!("TELEX {tk:>10} -> {gt:>8} want {want}")); + } + let gv = vni(vk); + if gv != want { + fails.push(format!("VNI {vk:>10} -> {gv:>8} want {want}")); + } + } + assert!(fails.is_empty(), "tone placement mismatches:\n{}", fails.join("\n")); +}