Merge pull request #1 from vndangkhoa/devin/1782468642-tone-placement

Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Co-authored-by: vndangkhoa <vonguyendangkhoa@gmail.com>
This commit is contained in:
vndangkhoa 2026-06-26 17:13:05 +07:00 committed by GitHub
commit a4c83e06b9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 67 additions and 1 deletions

View file

@ -288,6 +288,7 @@ impl BambooEngine {
}
fn find_tone_position(&self, range: std::ops::Range<usize>) -> usize {
let start = range.start;
let mut vowels: Vec<usize> = Vec::new();
for i in range {
@ -301,6 +302,18 @@ impl BambooEngine {
return self.composition.len().saturating_sub(1);
}
// Exclude onset glides: in "qu…" the u and in "gi…" the i belong to the
// initial consonant, not the vowel nucleus — so they must never carry the
// tone (e.g. "quả" not "qủa", "giờ" not "gìơ"). Only strip the glide when
// another vowel follows it; bare "gì"/"qu" keep the letter as the nucleus.
if vowels.len() >= 2 && vowels[0] == start + 1 {
let onset = self.composition[start].base_char.to_ascii_lowercase();
let glide = self.composition[start + 1].base_char.to_ascii_lowercase();
if (onset == 'q' && glide == 'u') || (onset == 'g' && glide == 'i') {
vowels.remove(0);
}
}
if vowels.len() == 1 {
return vowels[0];
}
@ -322,7 +335,7 @@ impl BambooEngine {
let tone_on_second = matches!((cv1, cv2),
('o', 'a') | ('o', 'e') | ('u', 'y') |
('i', 'ê') | ('y', 'ê') | ('u', 'ô') | ('ư', 'ơ') |
('i', 'o') | ('u', 'â')
('i', 'o') | ('u', 'â') | ('u', 'ê') | ('u', 'ơ')
);
if tone_on_second {

View file

@ -0,0 +1,53 @@
//! Regression tests for tone placement on syllables whose onset contains a
//! glide letter ("qu", "gi") and on the "uê"/"uơ" vowel clusters.
use std::collections::HashMap;
use vietc_engine::{Engine, InputMethod};
fn telex(keys: &str) -> String {
Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::<Vec<_>>()).0
}
fn vni(keys: &str) -> String {
Engine::replay_keystrokes(InputMethod::Vni, &HashMap::new(), &keys.chars().collect::<Vec<_>>()).0
}
/// (telex keystrokes, vni keystrokes, expected word)
const CASES: &[(&str, &str, &str)] = &[
// "qu" onset: the u is part of the consonant, tone stays on the nucleus.
("quar", "qua3", "quả"),
("quaf", "qua2", "quà"),
("quas", "qua1", "quá"),
("quaj", "qua5", "quạ"),
// "gi" onset: the i is part of the consonant, tone stays on the nucleus.
("gias", "gia1", "giá"),
("giof", "gio2", "giò"),
("giowf", "gio72", "giờ"),
("giups", "giu1p", "giúp"),
("gieets", "gie61t", "giết"),
("giuwowngf", "giuo7ng2", "giường"),
// "uê"/"uơ" clusters: tone belongs on the second vowel.
("thuees", "thue61", "thuế"),
("hueej", "hue65", "huệ"),
// Controls that must keep working: bare "gì", "uy", "uâ", "uô".
("gif", "gi2", ""),
("quys", "quy1", "quý"),
("quaanf", "qua62n", "quần"),
("muoons", "muo61n", "muốn"),
];
#[test]
fn onset_glide_and_cluster_tone_placement() {
let mut fails = Vec::new();
for &(tk, vk, want) in CASES {
let gt = telex(tk);
if gt != want {
fails.push(format!("TELEX {tk:>10} -> {gt:>8} want {want}"));
}
let gv = vni(vk);
if gv != want {
fails.push(format!("VNI {vk:>10} -> {gv:>8} want {want}"));
}
}
assert!(fails.is_empty(), "tone placement mismatches:\n{}", fails.join("\n"));
}