Merge pull request #2 from vndangkhoa/devin/1782469883-auto-restore-english
When Vietnamese mode is on, the engine transformed every word including English (test->tét, cargo->cảgo, status->státu). This wires up the previously-dead english.rs dictionary and spelling.rs validator so that on word commit, words that are clearly English or not phonologically valid Vietnamese are reverted to the raw keystrokes typed. Genuine Vietnamese (tiếng, việt, quả) is kept. Gated by the existing [auto_restore] enabled config (default on). Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: vndangkhoa <vonguyendangkhoa@gmail.com>
This commit is contained in:
commit
141df163e5
5 changed files with 174 additions and 2 deletions
|
|
@ -127,6 +127,7 @@ impl Daemon {
|
|||
};
|
||||
let mut engine = Engine::new(method);
|
||||
engine.set_enabled(config.start_enabled);
|
||||
engine.set_auto_restore(config.auto_restore.enabled);
|
||||
engine_enabled.store(config.start_enabled, Ordering::SeqCst);
|
||||
|
||||
for (shortcut, expansion) in &config.macros {
|
||||
|
|
@ -197,6 +198,8 @@ impl Daemon {
|
|||
_ => InputMethod::Telex,
|
||||
};
|
||||
self.engine.set_method(method);
|
||||
self.engine
|
||||
.set_auto_restore(new_config.auto_restore.enabled);
|
||||
|
||||
self.engine.clear_macros();
|
||||
for (shortcut, expansion) in &new_config.macros {
|
||||
|
|
@ -287,7 +290,7 @@ impl Daemon {
|
|||
if !self.screen_output.is_empty() {
|
||||
let backspaces = self.screen_output.chars().count();
|
||||
commands.push(OutputCommand::Backspace(backspaces));
|
||||
commands.push(OutputCommand::Type(self.screen_output.clone()));
|
||||
commands.push(OutputCommand::Type(self.word_to_commit()));
|
||||
}
|
||||
// Type the flush character itself
|
||||
commands.push(OutputCommand::Type(ch.to_string()));
|
||||
|
|
@ -317,7 +320,7 @@ impl Daemon {
|
|||
if !self.screen_output.is_empty() {
|
||||
let backspaces = self.screen_output.chars().count();
|
||||
commands.push(OutputCommand::Backspace(backspaces));
|
||||
commands.push(OutputCommand::Type(self.screen_output.clone()));
|
||||
commands.push(OutputCommand::Type(self.word_to_commit()));
|
||||
}
|
||||
self.keystroke_history.clear();
|
||||
self.screen_output.clear();
|
||||
|
|
@ -379,6 +382,19 @@ impl Daemon {
|
|||
commands
|
||||
}
|
||||
|
||||
/// Decide what to type when committing the current word: the Vietnamese
|
||||
/// composition normally, or — when smart auto-restore is enabled and the
|
||||
/// word is English / not valid Vietnamese — the raw keystrokes typed.
|
||||
fn word_to_commit(&self) -> String {
|
||||
if self.config.auto_restore.enabled {
|
||||
let raw: String = self.keystroke_history.iter().collect();
|
||||
if Engine::should_restore_word(&self.screen_output, &raw) {
|
||||
return raw;
|
||||
}
|
||||
}
|
||||
self.screen_output.clone()
|
||||
}
|
||||
|
||||
/// Reset the replay state (on flush, focus loss, modifier key, etc.)
|
||||
fn replay_reset(&mut self) {
|
||||
self.keystroke_history.clear();
|
||||
|
|
|
|||
|
|
@ -1,6 +1,13 @@
|
|||
use crate::bamboo::BambooEngine;
|
||||
use crate::english::EnglishDict;
|
||||
use crate::input_method::InputMethod;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
fn english_dict() -> &'static EnglishDict {
|
||||
static DICT: OnceLock<EnglishDict> = OnceLock::new();
|
||||
DICT.get_or_init(EnglishDict::new)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
|
||||
pub enum EngineEvent {
|
||||
|
|
@ -17,6 +24,7 @@ pub struct Engine {
|
|||
macros: HashMap<String, String>,
|
||||
raw_buffer: String,
|
||||
paste_mode: bool,
|
||||
auto_restore: bool,
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
|
|
@ -26,9 +34,41 @@ impl Engine {
|
|||
macros: HashMap::new(),
|
||||
raw_buffer: String::new(),
|
||||
paste_mode: false,
|
||||
auto_restore: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_auto_restore(&mut self, enabled: bool) {
|
||||
self.auto_restore = enabled;
|
||||
}
|
||||
|
||||
/// Decide whether a committed word should be reverted to the raw keystrokes
|
||||
/// the user typed instead of the Vietnamese transformation. Returns true for
|
||||
/// words that are clearly English / non-Vietnamese: a known English word, a
|
||||
/// result that isn't a phonologically valid Vietnamese syllable, or one that
|
||||
/// contains letters foreign to Vietnamese. `composed` is the transformed
|
||||
/// output; `raw` is the literal keystrokes typed.
|
||||
pub fn should_restore_word(composed: &str, raw: &str) -> bool {
|
||||
// No transformation happened — English already passed through untouched.
|
||||
if composed == raw {
|
||||
return false;
|
||||
}
|
||||
|
||||
let dict = english_dict();
|
||||
let raw_lower = raw.to_lowercase();
|
||||
let composed_lower = composed.to_lowercase();
|
||||
|
||||
// Genuine Vietnamese words that happen to look like English stay as-is.
|
||||
if dict.is_vietnamese_override(&composed_lower) {
|
||||
return false;
|
||||
}
|
||||
if dict.is_english_word(&raw_lower) {
|
||||
return true;
|
||||
}
|
||||
|
||||
!crate::spelling::is_valid_vietnamese_syllable(composed)
|
||||
}
|
||||
|
||||
pub fn set_enabled(&mut self, enabled: bool) {
|
||||
self.bamboo.set_enabled(enabled);
|
||||
if !enabled {
|
||||
|
|
@ -171,8 +211,17 @@ impl Engine {
|
|||
});
|
||||
}
|
||||
|
||||
let raw = self.raw_buffer.clone();
|
||||
self.reset();
|
||||
if prev_len > 0 {
|
||||
// Auto-restore: if the committed word is English / not valid
|
||||
// Vietnamese, revert to the raw keystrokes the user typed.
|
||||
if self.auto_restore && Engine::should_restore_word(&previous, &raw) {
|
||||
return Some(EngineEvent::Replace {
|
||||
backspaces: prev_len,
|
||||
insert: raw,
|
||||
});
|
||||
}
|
||||
// Don't include flush char in insert — daemon forwards it separately
|
||||
return Some(EngineEvent::Replace {
|
||||
backspaces: prev_len,
|
||||
|
|
|
|||
|
|
@ -364,6 +364,11 @@ impl EnglishDict {
|
|||
self.words.contains(word)
|
||||
}
|
||||
|
||||
pub fn is_vietnamese_override(&self, word: &str) -> bool {
|
||||
self.vietnamese_overrides.contains(word)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn should_restore(&self, word: &str) -> bool {
|
||||
if self.vietnamese_overrides.contains(word) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
mod bamboo;
|
||||
mod engine;
|
||||
mod english;
|
||||
mod input_method;
|
||||
pub mod spelling;
|
||||
|
||||
|
|
|
|||
101
engine/tests/auto_restore.rs
Normal file
101
engine/tests/auto_restore.rs
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
//! Tests for smart English auto-restore: when Vietnamese mode is on, words that
|
||||
//! are clearly English / not valid Vietnamese revert to the raw keystrokes the
|
||||
//! user typed, while genuine Vietnamese is kept.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use vietc_engine::{Engine, InputMethod};
|
||||
|
||||
fn telex(keys: &str) -> String {
|
||||
Engine::replay_keystrokes(InputMethod::Telex, &HashMap::new(), &keys.chars().collect::<Vec<_>>()).0
|
||||
}
|
||||
|
||||
/// Resolve what would actually be committed for a Telex keystroke sequence,
|
||||
/// applying the auto-restore decision the daemon makes on word commit.
|
||||
fn committed(keys: &str) -> String {
|
||||
let composed = telex(keys);
|
||||
let raw: String = keys.chars().collect();
|
||||
if Engine::should_restore_word(&composed, &raw) {
|
||||
raw
|
||||
} else {
|
||||
composed
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn english_words_are_restored() {
|
||||
// (telex keystrokes, expected committed word)
|
||||
let cases = [
|
||||
("fix", "fix"), // foreign letter f
|
||||
("cargo", "cargo"), // invalid onset/coda
|
||||
("status", "status"), // invalid cluster
|
||||
("world", "world"), // invalid coda
|
||||
("english", "english"),
|
||||
("sweet", "sweet"), // invalid onset "sw"
|
||||
];
|
||||
for (keys, want) in cases {
|
||||
assert_eq!(committed(keys), want, "expected {keys} to restore to {want}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vietnamese_words_are_kept() {
|
||||
let cases = [
|
||||
("tieengs", "tiếng"),
|
||||
("vieejt", "việt"),
|
||||
("quar", "quả"),
|
||||
("gif", "gì"),
|
||||
("khoong", "không"),
|
||||
("tooi", "tôi"),
|
||||
("banhf", "bành"),
|
||||
("ddi", "đi"),
|
||||
];
|
||||
for (keys, want) in cases {
|
||||
assert_eq!(committed(keys), want, "expected {keys} to stay {want}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn untransformed_english_passes_through() {
|
||||
// Words with no tone/mark letters never transform, so nothing to restore.
|
||||
for keys in ["type", "code", "hello", "the", "and"] {
|
||||
assert_eq!(committed(keys), keys);
|
||||
assert!(!Engine::should_restore_word(&telex(keys), keys));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn process_key_restores_on_flush() {
|
||||
// Drive the per-keystroke engine API and confirm the flush commits English.
|
||||
let mut engine = Engine::new(InputMethod::Telex);
|
||||
engine.set_enabled(true);
|
||||
for ch in "cargo".chars() {
|
||||
engine.process_key(ch);
|
||||
}
|
||||
// Mid-word the buffer is the Vietnamese composition.
|
||||
assert_eq!(engine.buffer(), "cảgo");
|
||||
// On flush the engine should emit a Replace back to the raw English word.
|
||||
let event = engine.process_key(' ');
|
||||
match event {
|
||||
Some(vietc_engine::EngineEvent::Replace { insert, .. }) => {
|
||||
assert_eq!(insert, "cargo");
|
||||
}
|
||||
other => panic!("expected Replace to 'cargo', got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_restore_can_be_disabled() {
|
||||
let mut engine = Engine::new(InputMethod::Telex);
|
||||
engine.set_enabled(true);
|
||||
engine.set_auto_restore(false);
|
||||
for ch in "cargo".chars() {
|
||||
engine.process_key(ch);
|
||||
}
|
||||
let event = engine.process_key(' ');
|
||||
match event {
|
||||
Some(vietc_engine::EngineEvent::Replace { insert, .. }) => {
|
||||
assert_eq!(insert, "cảgo", "with auto-restore off the VN form is kept");
|
||||
}
|
||||
other => panic!("expected Replace to 'cảgo', got {other:?}"),
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue