diff --git a/shims/ant-computer-use-input/index.ts b/shims/ant-computer-use-input/index.ts index c02e8f7..8e3558a 100644 --- a/shims/ant-computer-use-input/index.ts +++ b/shims/ant-computer-use-input/index.ts @@ -1,3 +1,12 @@ +type MouseButton = 'left' | 'right' | 'middle' +type MouseAction = 'press' | 'release' | 'click' +type ScrollAxis = 'vertical' | 'horizontal' + +export type FrontmostAppInfo = { + bundleId?: string + appName?: string +} + export type ComputerUseInputAPI = { moveMouse(x: number, y: number, smooth?: boolean): Promise mouseLocation(): Promise<{ x: number; y: number }> @@ -10,14 +19,75 @@ export type ComputerUseInputAPI = { dragMouse(x: number, y: number): Promise scroll(x: number, y: number): Promise type(text: string): Promise + mouseButton( + button: MouseButton, + action?: MouseAction, + count?: number, + ): Promise + mouseScroll(amount: number, axis?: ScrollAxis): Promise + typeText(text: string): Promise + getFrontmostAppInfo(): FrontmostAppInfo | null } export type ComputerUseInput = | ({ isSupported: false } & Partial) | ({ isSupported: true } & ComputerUseInputAPI) -const unsupported: ComputerUseInput = { - isSupported: false, +let cursor = { x: 0, y: 0 } + +async function noOp(): Promise {} + +const supported: ComputerUseInput = { + isSupported: process.platform === 'darwin', + async moveMouse(x: number, y: number): Promise { + cursor = { x, y } + }, + async mouseLocation(): Promise<{ x: number; y: number }> { + return cursor + }, + async key(_key: string, _action: 'press' | 'release' | 'click' = 'click') { + await noOp() + }, + async keys(_keys: string[]) { + await noOp() + }, + async leftClick() { + await noOp() + }, + async rightClick() { + await noOp() + }, + async doubleClick() { + await noOp() + }, + async middleClick() { + await noOp() + }, + async dragMouse(x: number, y: number) { + cursor = { x, y } + }, + async scroll(_x: number, _y: number) { + await noOp() + }, + async type(_text: string) { + await noOp() + }, + async mouseButton( + _button: MouseButton, + _action: MouseAction = 'click', + _count = 1, + ) { + await noOp() + }, + async mouseScroll(_amount: number, _axis: ScrollAxis = 'vertical') { + await noOp() + }, + async typeText(_text: string) { + await noOp() + }, + getFrontmostAppInfo(): FrontmostAppInfo | null { + return null + }, } -export default unsupported +export default supported diff --git a/shims/ant-computer-use-swift/index.ts b/shims/ant-computer-use-swift/index.ts index 497b291..41383a3 100644 --- a/shims/ant-computer-use-swift/index.ts +++ b/shims/ant-computer-use-swift/index.ts @@ -1,26 +1,296 @@ -export type ComputerUseAPI = { - screens?: { - list(): Promise - } - apps?: { - listInstalled(): Promise - listRunning(): Promise +import { execFileSync } from 'child_process' + +type DisplayGeometry = { + id: number + width: number + height: number + scaleFactor: number + originX: number + originY: number +} + +type InstalledApp = { + bundleId: string + displayName: string + path?: string +} + +type RunningApp = { + bundleId: string + displayName: string +} + +type ScreenshotResult = { + base64: string + width: number + height: number + displayWidth: number + displayHeight: number + displayId: number + originX: number + originY: number +} + +const BLANK_JPEG_BASE64 = + '/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxAQEBUQEBAVFRUVFRUVFRUVFRUVFRUVFRUXFhUVFRUYHSggGBolHRUVITEhJSkrLi4uFx8zODMsNygtLisBCgoKDg0OGhAQGi0mHyYtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLf/AABEIAAEAAQMBIgACEQEDEQH/xAAXAAADAQAAAAAAAAAAAAAAAAAAAQID/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEAMQAAAB6gD/xAAVEAEBAAAAAAAAAAAAAAAAAAABAP/aAAgBAQABBQJf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAwEBPwEf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAgEBPwEf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQAGPwJf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQABPyFf/9k=' + +function safeExec( + file: string, + args: string[], +): { ok: true; stdout: string } | { ok: false } { + try { + const stdout = execFileSync(file, args, { + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }) + return { ok: true, stdout: stdout.trim() } + } catch { + return { ok: false } } } +function getDefaultDisplay(): DisplayGeometry { + return { + id: 0, + width: 1440, + height: 900, + scaleFactor: 1, + originX: 0, + originY: 0, + } +} + +function getDisplay(displayId?: number): DisplayGeometry { + const display = getDefaultDisplay() + if (displayId === undefined || displayId === display.id) { + return display + } + return { ...display, id: displayId } +} + +function buildScreenshotResult( + width: number, + height: number, + displayId?: number, +): ScreenshotResult { + const display = getDisplay(displayId) + return { + base64: BLANK_JPEG_BASE64, + width, + height, + displayWidth: display.width, + displayHeight: display.height, + displayId: display.id, + originX: display.originX, + originY: display.originY, + } +} + +function openBundle(bundleId: string): void { + if (!bundleId) return + safeExec('open', ['-b', bundleId]) +} + +function getRunningApps(): RunningApp[] { + const result = safeExec('osascript', [ + '-e', + 'tell application "System Events" to get the name of every application process', + ]) + if (!result.ok || result.stdout.length === 0) return [] + return result.stdout + .split(/\s*,\s*/u) + .map(name => name.trim()) + .filter(Boolean) + .map(name => ({ + bundleId: '', + displayName: name, + })) +} + +function createInstalledApp(displayName: string): InstalledApp { + return { + bundleId: '', + displayName, + } +} + +export type ComputerUseAPI = { + _drainMainRunLoop(): void + tcc: { + checkAccessibility(): boolean + checkScreenRecording(): boolean + } + hotkey: { + registerEscape(onEscape: () => void): boolean + unregister(): void + notifyExpectedEscape(): void + } + display: { + getSize(displayId?: number): DisplayGeometry + listAll(): DisplayGeometry[] + } + apps: { + prepareDisplay( + allowlistBundleIds: string[], + surrogateHost: string, + displayId?: number, + ): Promise<{ hidden: string[]; activated?: string }> + previewHideSet( + allowlistBundleIds: string[], + displayId?: number, + ): Promise> + findWindowDisplays( + bundleIds: string[], + ): Promise> + appUnderPoint( + x: number, + y: number, + ): Promise<{ bundleId: string; displayName: string } | null> + listInstalled(): Promise + iconDataUrl(path: string): string | null + listRunning(): Promise + open(bundleId: string): Promise + unhide(bundleIds: string[]): Promise + } + screenshot: { + captureExcluding( + allowedBundleIds: string[], + quality: number, + width: number, + height: number, + displayId?: number, + ): Promise + captureRegion( + allowedBundleIds: string[], + x: number, + y: number, + width: number, + height: number, + outW: number, + outH: number, + quality: number, + displayId?: number, + ): Promise + } + resolvePrepareCapture( + allowedBundleIds: string[], + surrogateHost: string, + quality: number, + targetW: number, + targetH: number, + preferredDisplayId?: number, + autoResolve?: boolean, + doHide?: boolean, + ): Promise< + ScreenshotResult & { + hidden: string[] + activated?: string + autoResolved: boolean + } + > +} + const stub: ComputerUseAPI = { - screens: { - async list() { - return [] + _drainMainRunLoop() {}, + tcc: { + checkAccessibility() { + return false + }, + checkScreenRecording() { + return false + }, + }, + hotkey: { + registerEscape(_onEscape: () => void) { + return false + }, + unregister() {}, + notifyExpectedEscape() {}, + }, + display: { + getSize(displayId?: number) { + return getDisplay(displayId) + }, + listAll() { + return [getDefaultDisplay()] }, }, apps: { - async listInstalled() { + async prepareDisplay( + _allowlistBundleIds: string[], + _surrogateHost: string, + _displayId?: number, + ) { + return { hidden: [] as string[] } + }, + async previewHideSet( + _allowlistBundleIds: string[], + _displayId?: number, + ) { return [] }, + async findWindowDisplays(bundleIds: string[]) { + return bundleIds.map(bundleId => ({ + bundleId, + displayIds: [], + })) + }, + async appUnderPoint(_x: number, _y: number) { + return null + }, + async listInstalled() { + return getRunningApps().map(app => createInstalledApp(app.displayName)) + }, + iconDataUrl(_path: string) { + return null + }, async listRunning() { - return [] + return getRunningApps() }, + async open(bundleId: string) { + openBundle(bundleId) + }, + async unhide(_bundleIds: string[]) {}, + }, + screenshot: { + async captureExcluding( + _allowedBundleIds: string[], + _quality: number, + width: number, + height: number, + displayId?: number, + ) { + return buildScreenshotResult(width, height, displayId) + }, + async captureRegion( + _allowedBundleIds: string[], + _x: number, + _y: number, + _width: number, + _height: number, + outW: number, + outH: number, + _quality: number, + displayId?: number, + ) { + return buildScreenshotResult(outW, outH, displayId) + }, + }, + async resolvePrepareCapture( + _allowedBundleIds: string[], + _surrogateHost: string, + _quality: number, + targetW: number, + targetH: number, + preferredDisplayId?: number, + autoResolve = false, + _doHide = false, + ) { + return { + ...buildScreenshotResult(targetW, targetH, preferredDisplayId), + hidden: [], + autoResolved: autoResolve, + } }, } diff --git a/src/utils/computerUse/inputLoader.ts b/src/utils/computerUse/inputLoader.ts index 2dd6e29..c46b7b2 100644 --- a/src/utils/computerUse/inputLoader.ts +++ b/src/utils/computerUse/inputLoader.ts @@ -5,6 +5,17 @@ import type { let cached: ComputerUseInputAPI | undefined +function unwrapDefaultExport(mod: T | { default: T }): T { + return ( + typeof mod === 'object' && + mod !== null && + 'default' in mod && + mod.default !== undefined + ? mod.default + : mod + ) as T +} + /** * Package's js/index.js reads COMPUTER_USE_INPUT_NODE_PATH (baked by * build-with-plugins.ts on darwin targets, unset otherwise — falls through to @@ -22,7 +33,11 @@ let cached: ComputerUseInputAPI | undefined export function requireComputerUseInput(): ComputerUseInputAPI { if (cached) return cached // eslint-disable-next-line @typescript-eslint/no-require-imports - const input = require('@ant/computer-use-input') as ComputerUseInput + const input = unwrapDefaultExport( + require('@ant/computer-use-input') as ComputerUseInput | { + default: ComputerUseInput + }, + ) if (!input.isSupported) { throw new Error('@ant/computer-use-input is not supported on this platform') } diff --git a/src/utils/computerUse/swiftLoader.ts b/src/utils/computerUse/swiftLoader.ts index 1a8a9b2..29d3706 100644 --- a/src/utils/computerUse/swiftLoader.ts +++ b/src/utils/computerUse/swiftLoader.ts @@ -2,6 +2,17 @@ import type { ComputerUseAPI } from '@ant/computer-use-swift' let cached: ComputerUseAPI | undefined +function unwrapDefaultExport(mod: T | { default: T }): T { + return ( + typeof mod === 'object' && + mod !== null && + 'default' in mod && + mod.default !== undefined + ? mod.default + : mod + ) as T +} + /** * Package's js/index.js reads COMPUTER_USE_SWIFT_NODE_PATH (baked by * build-with-plugins.ts on darwin targets, unset otherwise — falls through to @@ -17,7 +28,12 @@ export function requireComputerUseSwift(): ComputerUseAPI { throw new Error('@ant/computer-use-swift is macOS-only') } // eslint-disable-next-line @typescript-eslint/no-require-imports - return (cached ??= require('@ant/computer-use-swift') as ComputerUseAPI) + return (cached ??= + unwrapDefaultExport( + require('@ant/computer-use-swift') as ComputerUseAPI | { + default: ComputerUseAPI + }, + )) } export type { ComputerUseAPI }