diff --git a/pd/internal/desktop/input.go b/pd/internal/desktop/input.go index c06f422..7191a33 100644 --- a/pd/internal/desktop/input.go +++ b/pd/internal/desktop/input.go @@ -109,18 +109,104 @@ func (d *Desktop) Type(text string) error { } // Key sends a key combination, clearing any active modifiers first. +// The combo is normalized so that LLM-style names like "CTRL+L" are +// translated to xdotool keysyms like "ctrl+l". func (d *Desktop) Key(combo string) error { - return d.runTool("xdotool", []string{"key", "--clearmodifiers", combo}) + return d.runTool("xdotool", []string{"key", "--clearmodifiers", normalizeCombo(combo)}) } // KeyDown presses a key without releasing it. func (d *Desktop) KeyDown(key string) error { - return d.runTool("xdotool", []string{"keydown", key}) + return d.runTool("xdotool", []string{"keydown", normalizeKeyName(key)}) } // KeyUp releases a previously pressed key. func (d *Desktop) KeyUp(key string) error { - return d.runTool("xdotool", []string{"keyup", key}) + return d.runTool("xdotool", []string{"keyup", normalizeKeyName(key)}) +} + +// keyNameMap maps case-folded key names that LLMs commonly produce to +// the X11 keysym strings that xdotool expects. Entries are keyed by +// the lowercase form so lookups can be case-insensitive. +var keyNameMap = map[string]string{ + // Modifiers. + "ctrl": "ctrl", + "control": "ctrl", + "alt": "alt", + "shift": "shift", + "super": "super", + "meta": "super", + "command": "super", + "cmd": "super", + + // Common special keys. + "return": "Return", + "enter": "Return", + "backspace": "BackSpace", + "tab": "Tab", + "escape": "Escape", + "esc": "Escape", + "space": "space", + "delete": "Delete", + "del": "Delete", + "insert": "Insert", + "home": "Home", + "end": "End", + "pageup": "Prior", + "page_up": "Prior", + "pagedown": "Next", + "page_down": "Next", + + // Arrow keys. + "up": "Up", + "down": "Down", + "left": "Left", + "right": "Right", + + // Function keys. + "f1": "F1", + "f2": "F2", + "f3": "F3", + "f4": "F4", + "f5": "F5", + "f6": "F6", + "f7": "F7", + "f8": "F8", + "f9": "F9", + "f10": "F10", + "f11": "F11", + "f12": "F12", +} + +// normalizeKeyName translates a single key name into the X11 keysym +// that xdotool expects. LLMs frequently send names like "CTRL", +// "ENTER", or "F4" in varying cases that xdotool does not recognise. +// Known names are resolved through keyNameMap. Single ASCII letters +// are lowercased because xdotool treats an uppercase letter as +// shift+. Anything else passes through unchanged so that +// already-valid keysyms are not broken. +func normalizeKeyName(name string) string { + if mapped, ok := keyNameMap[strings.ToLower(name)]; ok { + return mapped + } + // A bare uppercase ASCII letter should be lowered; xdotool + // interprets "L" as shift+l which is not the intended behaviour + // when an LLM sends "CTRL+L". + if len(name) == 1 && name[0] >= 'A' && name[0] <= 'Z' { + return strings.ToLower(name) + } + return name +} + +// normalizeCombo normalises a "+"-separated key combination string. +// Each component is passed through normalizeKeyName so that +// "CTRL+L" becomes "ctrl+l" and "ALT+F4" becomes "alt+F4". +func normalizeCombo(combo string) string { + parts := strings.Split(combo, "+") + for i, p := range parts { + parts[i] = normalizeKeyName(p) + } + return strings.Join(parts, "+") } // buttonToNumber maps a button name to its X11 button number. diff --git a/pd/internal/desktop/input_test.go b/pd/internal/desktop/input_test.go new file mode 100644 index 0000000..a845014 --- /dev/null +++ b/pd/internal/desktop/input_test.go @@ -0,0 +1,143 @@ +package desktop + +import ( + "testing" +) + +func TestNormalizeKeyName_Modifiers(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"CTRL", "ctrl"}, + {"ctrl", "ctrl"}, + {"Ctrl", "ctrl"}, + {"CONTROL", "ctrl"}, + {"control", "ctrl"}, + {"ALT", "alt"}, + {"alt", "alt"}, + {"SHIFT", "shift"}, + {"shift", "shift"}, + {"SUPER", "super"}, + {"META", "super"}, + {"COMMAND", "super"}, + {"CMD", "super"}, + {"cmd", "super"}, + } + for _, tc := range cases { + if got := normalizeKeyName(tc.input); got != tc.want { + t.Errorf("normalizeKeyName(%q) = %q, want %q", tc.input, got, tc.want) + } + } +} + +func TestNormalizeKeyName_SpecialKeys(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"RETURN", "Return"}, + {"Return", "Return"}, + {"ENTER", "Return"}, + {"enter", "Return"}, + {"BACKSPACE", "BackSpace"}, + {"TAB", "Tab"}, + {"ESCAPE", "Escape"}, + {"ESC", "Escape"}, + {"SPACE", "space"}, + {"DELETE", "Delete"}, + {"DEL", "Delete"}, + {"INSERT", "Insert"}, + {"HOME", "Home"}, + {"END", "End"}, + {"PAGEUP", "Prior"}, + {"PAGE_UP", "Prior"}, + {"PAGEDOWN", "Next"}, + {"PAGE_DOWN", "Next"}, + {"UP", "Up"}, + {"DOWN", "Down"}, + {"LEFT", "Left"}, + {"RIGHT", "Right"}, + } + for _, tc := range cases { + if got := normalizeKeyName(tc.input); got != tc.want { + t.Errorf("normalizeKeyName(%q) = %q, want %q", tc.input, got, tc.want) + } + } +} + +func TestNormalizeKeyName_FunctionKeys(t *testing.T) { + cases := []struct { + input string + want string + }{ + {"f1", "F1"}, + {"F1", "F1"}, + {"f4", "F4"}, + {"F4", "F4"}, + {"f12", "F12"}, + {"F12", "F12"}, + } + for _, tc := range cases { + if got := normalizeKeyName(tc.input); got != tc.want { + t.Errorf("normalizeKeyName(%q) = %q, want %q", tc.input, got, tc.want) + } + } +} + +func TestNormalizeKeyName_SingleLetters(t *testing.T) { + cases := []struct { + input string + want string + }{ + // Uppercase single ASCII letters are lowered. + {"L", "l"}, + {"A", "a"}, + {"Z", "z"}, + // Already-lowercase letters pass through. + {"l", "l"}, + {"a", "a"}, + } + for _, tc := range cases { + if got := normalizeKeyName(tc.input); got != tc.want { + t.Errorf("normalizeKeyName(%q) = %q, want %q", tc.input, got, tc.want) + } + } +} + +func TestNormalizeKeyName_Passthrough(t *testing.T) { + // Already-valid X11 keysyms and unknown strings pass through. + cases := []string{"Return", "BackSpace", "XF86AudioPlay", "1", "semicolon"} + for _, input := range cases { + if got := normalizeKeyName(input); got != input { + t.Errorf("normalizeKeyName(%q) = %q, want passthrough", input, got) + } + } +} + +func TestNormalizeCombo(t *testing.T) { + cases := []struct { + input string + want string + }{ + // Typical LLM-produced combos. + {"CTRL+L", "ctrl+l"}, + {"ALT+F4", "alt+F4"}, + {"SHIFT+TAB", "shift+Tab"}, + {"CTRL+SHIFT+T", "ctrl+shift+t"}, + {"SUPER+D", "super+d"}, + {"CMD+A", "super+a"}, + // Already-correct xdotool combos are unchanged. + {"ctrl+l", "ctrl+l"}, + {"alt+F4", "alt+F4"}, + {"shift+Tab", "shift+Tab"}, + // Single key (no +) still normalised. + {"RETURN", "Return"}, + {"a", "a"}, + } + for _, tc := range cases { + if got := normalizeCombo(tc.input); got != tc.want { + t.Errorf("normalizeCombo(%q) = %q, want %q", tc.input, got, tc.want) + } + } +}