diff --git a/daemon/src/main.rs b/daemon/src/main.rs index 2c1b02c..c49ec33 100644 --- a/daemon/src/main.rs +++ b/daemon/src/main.rs @@ -127,6 +127,7 @@ impl Daemon { }; let mut engine = Engine::new(method); engine.set_enabled(config.start_enabled); + engine.set_auto_restore(config.auto_restore.enabled); engine_enabled.store(config.start_enabled, Ordering::SeqCst); for (shortcut, expansion) in &config.macros { @@ -197,6 +198,8 @@ impl Daemon { _ => InputMethod::Telex, }; self.engine.set_method(method); + self.engine + .set_auto_restore(new_config.auto_restore.enabled); self.engine.clear_macros(); for (shortcut, expansion) in &new_config.macros { @@ -287,6 +290,12 @@ impl Daemon { // backspace and retype it — doing so eats the spacing and shifts the // finished word left. Just type the flush char and clear state. if is_flush_char(ch) { + if !self.screen_output.is_empty() { + let backspaces = self.screen_output.chars().count(); + commands.push(OutputCommand::Backspace(backspaces)); + commands.push(OutputCommand::Type(self.word_to_commit())); + } + // Type the flush character itself commands.push(OutputCommand::Type(ch.to_string())); self.keystroke_history.clear(); self.screen_output.clear(); @@ -311,6 +320,14 @@ impl Daemon { // Engine flushed a word — it is already correctly on screen, so // just clear state without backspacing/retyping it (retyping eats // spacing and shifts the finished word left). + // Engine flushed a word — commit it and clear state + // The flush char (space/period/etc) was NOT in history, so we need to + // type whatever was on screen + the flush char + if !self.screen_output.is_empty() { + let backspaces = self.screen_output.chars().count(); + commands.push(OutputCommand::Backspace(backspaces)); + commands.push(OutputCommand::Type(self.word_to_commit())); + } self.keystroke_history.clear(); self.screen_output.clear(); return commands; @@ -371,6 +388,19 @@ impl Daemon { commands } + /// Decide what to type when committing the current word: the Vietnamese + /// composition normally, or — when smart auto-restore is enabled and the + /// word is English / not valid Vietnamese — the raw keystrokes typed. + fn word_to_commit(&self) -> String { + if self.config.auto_restore.enabled { + let raw: String = self.keystroke_history.iter().collect(); + if Engine::should_restore_word(&self.screen_output, &raw) { + return raw; + } + } + self.screen_output.clone() + } + /// Reset the replay state (on flush, focus loss, modifier key, etc.) fn replay_reset(&mut self) { self.keystroke_history.clear(); diff --git a/engine/src/engine.rs b/engine/src/engine.rs index 126af0c..9739614 100644 --- a/engine/src/engine.rs +++ b/engine/src/engine.rs @@ -1,6 +1,13 @@ use crate::bamboo::BambooEngine; +use crate::english::EnglishDict; use crate::input_method::InputMethod; use std::collections::HashMap; +use std::sync::OnceLock; + +fn english_dict() -> &'static EnglishDict { + static DICT: OnceLock = OnceLock::new(); + DICT.get_or_init(EnglishDict::new) +} #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)] pub enum EngineEvent { @@ -17,6 +24,7 @@ pub struct Engine { macros: HashMap, raw_buffer: String, paste_mode: bool, + auto_restore: bool, } impl Engine { @@ -26,9 +34,41 @@ impl Engine { macros: HashMap::new(), raw_buffer: String::new(), paste_mode: false, + auto_restore: true, } } + pub fn set_auto_restore(&mut self, enabled: bool) { + self.auto_restore = enabled; + } + + /// Decide whether a committed word should be reverted to the raw keystrokes + /// the user typed instead of the Vietnamese transformation. Returns true for + /// words that are clearly English / non-Vietnamese: a known English word, a + /// result that isn't a phonologically valid Vietnamese syllable, or one that + /// contains letters foreign to Vietnamese. `composed` is the transformed + /// output; `raw` is the literal keystrokes typed. + pub fn should_restore_word(composed: &str, raw: &str) -> bool { + // No transformation happened — English already passed through untouched. + if composed == raw { + return false; + } + + let dict = english_dict(); + let raw_lower = raw.to_lowercase(); + let composed_lower = composed.to_lowercase(); + + // Genuine Vietnamese words that happen to look like English stay as-is. + if dict.is_vietnamese_override(&composed_lower) { + return false; + } + if dict.is_english_word(&raw_lower) { + return true; + } + + !crate::spelling::is_valid_vietnamese_syllable(composed) + } + pub fn set_enabled(&mut self, enabled: bool) { self.bamboo.set_enabled(enabled); if !enabled { @@ -171,12 +211,28 @@ impl Engine { }); } + let raw = self.raw_buffer.clone(); self.reset(); // The composed word is already correctly on screen — re-typing it // here would trigger a redundant backspace + clipboard-paste cycle // that races against the separately-forwarded flush char, eating // spaces and merging words. Just finalize and let the flush char // through untouched. + if prev_len > 0 { + // Auto-restore: if the committed word is English / not valid + // Vietnamese, revert to the raw keystrokes the user typed. + if self.auto_restore && Engine::should_restore_word(&previous, &raw) { + return Some(EngineEvent::Replace { + backspaces: prev_len, + insert: raw, + }); + } + // Don't include flush char in insert — daemon forwards it separately + return Some(EngineEvent::Replace { + backspaces: prev_len, + insert: previous, + }); + } return None; } diff --git a/engine/src/english.rs b/engine/src/english.rs index a33e4fc..9099724 100644 --- a/engine/src/english.rs +++ b/engine/src/english.rs @@ -364,6 +364,11 @@ impl EnglishDict { self.words.contains(word) } + pub fn is_vietnamese_override(&self, word: &str) -> bool { + self.vietnamese_overrides.contains(word) + } + + #[allow(dead_code)] pub fn should_restore(&self, word: &str) -> bool { if self.vietnamese_overrides.contains(word) { return false; diff --git a/engine/src/lib.rs b/engine/src/lib.rs index 034e133..a1ccbdf 100644 --- a/engine/src/lib.rs +++ b/engine/src/lib.rs @@ -1,5 +1,6 @@ mod bamboo; mod engine; +mod english; mod input_method; pub mod spelling; diff --git a/engine/tests/auto_restore.rs b/engine/tests/auto_restore.rs new file mode 100644 index 0000000..64ee2de --- /dev/null +++ b/engine/tests/auto_restore.rs @@ -0,0 +1,101 @@ +//! Tests for smart English auto-restore: when Vietnamese mode is on, words that +//! are clearly English / not valid Vietnamese revert to the raw keystrokes the +//! user typed, while genuine Vietnamese is kept. + +use std::collections::HashMap; +use vietc_engine::{Engine, InputMethod}; + +fn telex(keys: &str) -> String { + Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::>()).0 +} + +/// Resolve what would actually be committed for a Telex keystroke sequence, +/// applying the auto-restore decision the daemon makes on word commit. +fn committed(keys: &str) -> String { + let composed = telex(keys); + let raw: String = keys.chars().collect(); + if Engine::should_restore_word(&composed, &raw) { + raw + } else { + composed + } +} + +#[test] +fn english_words_are_restored() { + // (telex keystrokes, expected committed word) + let cases = [ + ("fix", "fix"), // foreign letter f + ("cargo", "cargo"), // invalid onset/coda + ("status", "status"), // invalid cluster + ("world", "world"), // invalid coda + ("english", "english"), + ("sweet", "sweet"), // invalid onset "sw" + ]; + for (keys, want) in cases { + assert_eq!(committed(keys), want, "expected {keys} to restore to {want}"); + } +} + +#[test] +fn vietnamese_words_are_kept() { + let cases = [ + ("tieengs", "tiếng"), + ("vieejt", "việt"), + ("quar", "quả"), + ("gif", "gì"), + ("khoong", "không"), + ("tooi", "tôi"), + ("banhf", "bành"), + ("ddi", "đi"), + ]; + for (keys, want) in cases { + assert_eq!(committed(keys), want, "expected {keys} to stay {want}"); + } +} + +#[test] +fn untransformed_english_passes_through() { + // Words with no tone/mark letters never transform, so nothing to restore. + for keys in ["type", "code", "hello", "the", "and"] { + assert_eq!(committed(keys), keys); + assert!(!Engine::should_restore_word(&telex(keys), keys)); + } +} + +#[test] +fn process_key_restores_on_flush() { + // Drive the per-keystroke engine API and confirm the flush commits English. + let mut engine = Engine::new(InputMethod::Telex); + engine.set_enabled(true); + for ch in "cargo".chars() { + engine.process_key(ch); + } + // Mid-word the buffer is the Vietnamese composition. + assert_eq!(engine.buffer(), "cảgo"); + // On flush the engine should emit a Replace back to the raw English word. + let event = engine.process_key(' '); + match event { + Some(vietc_engine::EngineEvent::Replace { insert, .. }) => { + assert_eq!(insert, "cargo"); + } + other => panic!("expected Replace to 'cargo', got {other:?}"), + } +} + +#[test] +fn auto_restore_can_be_disabled() { + let mut engine = Engine::new(InputMethod::Telex); + engine.set_enabled(true); + engine.set_auto_restore(false); + for ch in "cargo".chars() { + engine.process_key(ch); + } + let event = engine.process_key(' '); + match event { + Some(vietc_engine::EngineEvent::Replace { insert, .. }) => { + assert_eq!(insert, "cảgo", "with auto-restore off the VN form is kept"); + } + other => panic!("expected Replace to 'cảgo', got {other:?}"), + } +}