Merge pull request #1 from vndangkhoa/devin/1782468642-tone-placement
Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: vndangkhoa <vonguyendangkhoa@gmail.com>
This commit is contained in:
commit
a4c83e06b9
2 changed files with 67 additions and 1 deletions
|
|
@ -288,6 +288,7 @@ impl BambooEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_tone_position(&self, range: std::ops::Range<usize>) -> usize {
|
fn find_tone_position(&self, range: std::ops::Range<usize>) -> usize {
|
||||||
|
let start = range.start;
|
||||||
let mut vowels: Vec<usize> = Vec::new();
|
let mut vowels: Vec<usize> = Vec::new();
|
||||||
|
|
||||||
for i in range {
|
for i in range {
|
||||||
|
|
@ -301,6 +302,18 @@ impl BambooEngine {
|
||||||
return self.composition.len().saturating_sub(1);
|
return self.composition.len().saturating_sub(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Exclude onset glides: in "qu…" the u and in "gi…" the i belong to the
|
||||||
|
// initial consonant, not the vowel nucleus — so they must never carry the
|
||||||
|
// tone (e.g. "quả" not "qủa", "giờ" not "gìơ"). Only strip the glide when
|
||||||
|
// another vowel follows it; bare "gì"/"qu" keep the letter as the nucleus.
|
||||||
|
if vowels.len() >= 2 && vowels[0] == start + 1 {
|
||||||
|
let onset = self.composition[start].base_char.to_ascii_lowercase();
|
||||||
|
let glide = self.composition[start + 1].base_char.to_ascii_lowercase();
|
||||||
|
if (onset == 'q' && glide == 'u') || (onset == 'g' && glide == 'i') {
|
||||||
|
vowels.remove(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if vowels.len() == 1 {
|
if vowels.len() == 1 {
|
||||||
return vowels[0];
|
return vowels[0];
|
||||||
}
|
}
|
||||||
|
|
@ -322,7 +335,7 @@ impl BambooEngine {
|
||||||
let tone_on_second = matches!((cv1, cv2),
|
let tone_on_second = matches!((cv1, cv2),
|
||||||
('o', 'a') | ('o', 'e') | ('u', 'y') |
|
('o', 'a') | ('o', 'e') | ('u', 'y') |
|
||||||
('i', 'ê') | ('y', 'ê') | ('u', 'ô') | ('ư', 'ơ') |
|
('i', 'ê') | ('y', 'ê') | ('u', 'ô') | ('ư', 'ơ') |
|
||||||
('i', 'o') | ('u', 'â')
|
('i', 'o') | ('u', 'â') | ('u', 'ê') | ('u', 'ơ')
|
||||||
);
|
);
|
||||||
|
|
||||||
if tone_on_second {
|
if tone_on_second {
|
||||||
|
|
|
||||||
53
engine/tests/tone_placement.rs
Normal file
53
engine/tests/tone_placement.rs
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
//! Regression tests for tone placement on syllables whose onset contains a
|
||||||
|
//! glide letter ("qu", "gi") and on the "uê"/"uơ" vowel clusters.
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use vietc_engine::{Engine, InputMethod};
|
||||||
|
|
||||||
|
fn telex(keys: &str) -> String {
|
||||||
|
Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::<Vec<_>>()).0
|
||||||
|
}
|
||||||
|
|
||||||
|
fn vni(keys: &str) -> String {
|
||||||
|
Engine::replay_keystrokes(InputMethod::Vni, &HashMap::new(), &keys.chars().collect::<Vec<_>>()).0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// (telex keystrokes, vni keystrokes, expected word)
|
||||||
|
const CASES: &[(&str, &str, &str)] = &[
|
||||||
|
// "qu" onset: the u is part of the consonant, tone stays on the nucleus.
|
||||||
|
("quar", "qua3", "quả"),
|
||||||
|
("quaf", "qua2", "quà"),
|
||||||
|
("quas", "qua1", "quá"),
|
||||||
|
("quaj", "qua5", "quạ"),
|
||||||
|
// "gi" onset: the i is part of the consonant, tone stays on the nucleus.
|
||||||
|
("gias", "gia1", "giá"),
|
||||||
|
("giof", "gio2", "giò"),
|
||||||
|
("giowf", "gio72", "giờ"),
|
||||||
|
("giups", "giu1p", "giúp"),
|
||||||
|
("gieets", "gie61t", "giết"),
|
||||||
|
("giuwowngf", "giuo7ng2", "giường"),
|
||||||
|
// "uê"/"uơ" clusters: tone belongs on the second vowel.
|
||||||
|
("thuees", "thue61", "thuế"),
|
||||||
|
("hueej", "hue65", "huệ"),
|
||||||
|
// Controls that must keep working: bare "gì", "uy", "uâ", "uô".
|
||||||
|
("gif", "gi2", "gì"),
|
||||||
|
("quys", "quy1", "quý"),
|
||||||
|
("quaanf", "qua62n", "quần"),
|
||||||
|
("muoons", "muo61n", "muốn"),
|
||||||
|
];
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn onset_glide_and_cluster_tone_placement() {
|
||||||
|
let mut fails = Vec::new();
|
||||||
|
for &(tk, vk, want) in CASES {
|
||||||
|
let gt = telex(tk);
|
||||||
|
if gt != want {
|
||||||
|
fails.push(format!("TELEX {tk:>10} -> {gt:>8} want {want}"));
|
||||||
|
}
|
||||||
|
let gv = vni(vk);
|
||||||
|
if gv != want {
|
||||||
|
fails.push(format!("VNI {vk:>10} -> {gv:>8} want {want}"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(fails.is_empty(), "tone placement mismatches:\n{}", fails.join("\n"));
|
||||||
|
}
|
||||||
Loading…
Reference in a new issue