From 7569e7e2184253c9353e9d6925c42584fe098caa Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 26 Jun 2026 10:31:37 +0000 Subject: [PATCH] feat: auto-restore English words and invalid Vietnamese syllables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Vietnamese mode is on, the engine transformed every word including English (test->tét, cargo->cảgo, status->státu). This wires up the previously-dead english.rs dictionary and spelling.rs validator so that on word commit, words that are clearly English or not phonologically valid Vietnamese are reverted to the raw keystrokes typed. Genuine Vietnamese (tiếng, việt, quả) is kept. Gated by the existing [auto_restore] enabled config (default on). Co-Authored-By: vndangkhoa --- daemon/src/main.rs | 20 ++++++- engine/src/engine.rs | 49 +++++++++++++++++ engine/src/english.rs | 5 ++ engine/src/lib.rs | 1 + engine/tests/auto_restore.rs | 101 +++++++++++++++++++++++++++++++++++ 5 files changed, 174 insertions(+), 2 deletions(-) create mode 100644 engine/tests/auto_restore.rs diff --git a/daemon/src/main.rs b/daemon/src/main.rs index 44ded91..e5ec156 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -127,6 +127,7 @@ impl Daemon { }; let mut engine = Engine::new(method); engine.set_enabled(config.start_enabled); + engine.set_auto_restore(config.auto_restore.enabled); engine_enabled.store(config.start_enabled, Ordering::SeqCst); for (shortcut, expansion) in &config.macros { @@ -197,6 +198,8 @@ impl Daemon { _ => InputMethod::Telex, }; self.engine.set_method(method); + self.engine + .set_auto_restore(new_config.auto_restore.enabled); self.engine.clear_macros(); for (shortcut, expansion) in &new_config.macros { @@ -287,7 +290,7 @@ impl Daemon { if !self.screen_output.is_empty() { let backspaces = self.screen_output.chars().count(); commands.push(OutputCommand::Backspace(backspaces)); - commands.push(OutputCommand::Type(self.screen_output.clone())); + commands.push(OutputCommand::Type(self.word_to_commit())); } // Type the flush character itself commands.push(OutputCommand::Type(ch.to_string())); @@ -317,7 +320,7 @@ impl Daemon { if !self.screen_output.is_empty() { let backspaces = self.screen_output.chars().count(); commands.push(OutputCommand::Backspace(backspaces)); - commands.push(OutputCommand::Type(self.screen_output.clone())); + commands.push(OutputCommand::Type(self.word_to_commit())); } self.keystroke_history.clear(); self.screen_output.clear(); @@ -379,6 +382,19 @@ impl Daemon { commands } + /// Decide what to type when committing the current word: the Vietnamese + /// composition normally, or — when smart auto-restore is enabled and the + /// word is English / not valid Vietnamese — the raw keystrokes typed. + fn word_to_commit(&self) -> String { + if self.config.auto_restore.enabled { + let raw: String = self.keystroke_history.iter().collect(); + if Engine::should_restore_word(&self.screen_output, &raw) { + return raw; + } + } + self.screen_output.clone() + } + /// Reset the replay state (on flush, focus loss, modifier key, etc.) fn replay_reset(&mut self) { self.keystroke_history.clear(); diff --git a/engine/src/engine.rs b/engine/src/engine.rs index 13edb27..37e1c4e 100644 --- a/engine/src/engine.rs +++ b/engine/src/engine.rs @@ -1,6 +1,13 @@ use crate::bamboo::BambooEngine; +use crate::english::EnglishDict; use crate::input_method::InputMethod; use std::collections::HashMap; +use std::sync::OnceLock; + +fn english_dict() -> &'static EnglishDict { + static DICT: OnceLock = OnceLock::new(); + DICT.get_or_init(EnglishDict::new) +} #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] pub enum EngineEvent { @@ -17,6 +24,7 @@ pub struct Engine { macros: HashMap, raw_buffer: String, paste_mode: bool, + auto_restore: bool, } impl Engine { @@ -26,9 +34,41 @@ impl Engine { macros: HashMap::new(), raw_buffer: String::new(), paste_mode: false, + auto_restore: true, } } + pub fn set_auto_restore(&mut self, enabled: bool) { + self.auto_restore = enabled; + } + + /// Decide whether a committed word should be reverted to the raw keystrokes + /// the user typed instead of the Vietnamese transformation. Returns true for + /// words that are clearly English / non-Vietnamese: a known English word, a + /// result that isn't a phonologically valid Vietnamese syllable, or one that + /// contains letters foreign to Vietnamese. `composed` is the transformed + /// output; `raw` is the literal keystrokes typed. + pub fn should_restore_word(composed: &str, raw: &str) -> bool { + // No transformation happened — English already passed through untouched. + if composed == raw { + return false; + } + + let dict = english_dict(); + let raw_lower = raw.to_lowercase(); + let composed_lower = composed.to_lowercase(); + + // Genuine Vietnamese words that happen to look like English stay as-is. + if dict.is_vietnamese_override(&composed_lower) { + return false; + } + if dict.is_english_word(&raw_lower) { + return true; + } + + !crate::spelling::is_valid_vietnamese_syllable(composed) + } + pub fn set_enabled(&mut self, enabled: bool) { self.bamboo.set_enabled(enabled); if !enabled { @@ -171,8 +211,17 @@ impl Engine { }); } + let raw = self.raw_buffer.clone(); self.reset(); if prev_len > 0 { + // Auto-restore: if the committed word is English / not valid + // Vietnamese, revert to the raw keystrokes the user typed. + if self.auto_restore && Engine::should_restore_word(&previous, &raw) { + return Some(EngineEvent::Replace { + backspaces: prev_len, + insert: raw, + }); + } // Don't include flush char in insert — daemon forwards it separately return Some(EngineEvent::Replace { backspaces: prev_len, diff --git a/engine/src/english.rs b/engine/src/english.rs index a33e4fc..9099724 100644 --- a/engine/src/english.rs +++ b/engine/src/english.rs @@ -364,6 +364,11 @@ impl EnglishDict { self.words.contains(word) } + pub fn is_vietnamese_override(&self, word: &str) -> bool { + self.vietnamese_overrides.contains(word) + } + + #[allow(dead_code)] pub fn should_restore(&self, word: &str) -> bool { if self.vietnamese_overrides.contains(word) { return false; diff --git a/engine/src/lib.rs b/engine/src/lib.rs index 034e133..a1ccbdf 100644 --- a/engine/src/lib.rs +++ b/engine/src/lib.rs @@ -1,5 +1,6 @@ mod bamboo; mod engine; +mod english; mod input_method; pub mod spelling; diff --git a/engine/tests/auto_restore.rs b/engine/tests/auto_restore.rs new file mode 100644 index 0000000..64ee2de --- /dev/null +++ b/engine/tests/auto_restore.rs @@ -0,0 +1,101 @@ +//! Tests for smart English auto-restore: when Vietnamese mode is on, words that +//! are clearly English / not valid Vietnamese revert to the raw keystrokes the +//! user typed, while genuine Vietnamese is kept. + +use std::collections::HashMap; +use vietc_engine::{Engine, InputMethod}; + +fn telex(keys: &str) -> String { + Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::>()).0 +} + +/// Resolve what would actually be committed for a Telex keystroke sequence, +/// applying the auto-restore decision the daemon makes on word commit. +fn committed(keys: &str) -> String { + let composed = telex(keys); + let raw: String = keys.chars().collect(); + if Engine::should_restore_word(&composed, &raw) { + raw + } else { + composed + } +} + +#[test] +fn english_words_are_restored() { + // (telex keystrokes, expected committed word) + let cases = [ + ("fix", "fix"), // foreign letter f + ("cargo", "cargo"), // invalid onset/coda + ("status", "status"), // invalid cluster + ("world", "world"), // invalid coda + ("english", "english"), + ("sweet", "sweet"), // invalid onset "sw" + ]; + for (keys, want) in cases { + assert_eq!(committed(keys), want, "expected {keys} to restore to {want}"); + } +} + +#[test] +fn vietnamese_words_are_kept() { + let cases = [ + ("tieengs", "tiếng"), + ("vieejt", "việt"), + ("quar", "quả"), + ("gif", "gì"), + ("khoong", "không"), + ("tooi", "tôi"), + ("banhf", "bành"), + ("ddi", "đi"), + ]; + for (keys, want) in cases { + assert_eq!(committed(keys), want, "expected {keys} to stay {want}"); + } +} + +#[test] +fn untransformed_english_passes_through() { + // Words with no tone/mark letters never transform, so nothing to restore. + for keys in ["type", "code", "hello", "the", "and"] { + assert_eq!(committed(keys), keys); + assert!(!Engine::should_restore_word(&telex(keys), keys)); + } +} + +#[test] +fn process_key_restores_on_flush() { + // Drive the per-keystroke engine API and confirm the flush commits English. + let mut engine = Engine::new(InputMethod::Telex); + engine.set_enabled(true); + for ch in "cargo".chars() { + engine.process_key(ch); + } + // Mid-word the buffer is the Vietnamese composition. + assert_eq!(engine.buffer(), "cảgo"); + // On flush the engine should emit a Replace back to the raw English word. + let event = engine.process_key(' '); + match event { + Some(vietc_engine::EngineEvent::Replace { insert, .. }) => { + assert_eq!(insert, "cargo"); + } + other => panic!("expected Replace to 'cargo', got {other:?}"), + } +} + +#[test] +fn auto_restore_can_be_disabled() { + let mut engine = Engine::new(InputMethod::Telex); + engine.set_enabled(true); + engine.set_auto_restore(false); + for ch in "cargo".chars() { + engine.process_key(ch); + } + let event = engine.process_key(' '); + match event { + Some(vietc_engine::EngineEvent::Replace { insert, .. }) => { + assert_eq!(insert, "cảgo", "with auto-restore off the VN form is kept"); + } + other => panic!("expected Replace to 'cảgo', got {other:?}"), + } +}