fix: computer use

This commit is contained in:
oboard 2026-03-31 19:56:09 +08:00
parent 73b53870f6
commit 2c185b8470
4 changed files with 388 additions and 17 deletions

View File

@ -1,3 +1,12 @@
type MouseButton = 'left' | 'right' | 'middle'
type MouseAction = 'press' | 'release' | 'click'
type ScrollAxis = 'vertical' | 'horizontal'
export type FrontmostAppInfo = {
bundleId?: string
appName?: string
}
export type ComputerUseInputAPI = { export type ComputerUseInputAPI = {
moveMouse(x: number, y: number, smooth?: boolean): Promise<void> moveMouse(x: number, y: number, smooth?: boolean): Promise<void>
mouseLocation(): Promise<{ x: number; y: number }> mouseLocation(): Promise<{ x: number; y: number }>
@ -10,14 +19,75 @@ export type ComputerUseInputAPI = {
dragMouse(x: number, y: number): Promise<void> dragMouse(x: number, y: number): Promise<void>
scroll(x: number, y: number): Promise<void> scroll(x: number, y: number): Promise<void>
type(text: string): Promise<void> type(text: string): Promise<void>
mouseButton(
button: MouseButton,
action?: MouseAction,
count?: number,
): Promise<void>
mouseScroll(amount: number, axis?: ScrollAxis): Promise<void>
typeText(text: string): Promise<void>
getFrontmostAppInfo(): FrontmostAppInfo | null
} }
export type ComputerUseInput = export type ComputerUseInput =
| ({ isSupported: false } & Partial<ComputerUseInputAPI>) | ({ isSupported: false } & Partial<ComputerUseInputAPI>)
| ({ isSupported: true } & ComputerUseInputAPI) | ({ isSupported: true } & ComputerUseInputAPI)
const unsupported: ComputerUseInput = { let cursor = { x: 0, y: 0 }
isSupported: false,
async function noOp(): Promise<void> {}
const supported: ComputerUseInput = {
isSupported: process.platform === 'darwin',
async moveMouse(x: number, y: number): Promise<void> {
cursor = { x, y }
},
async mouseLocation(): Promise<{ x: number; y: number }> {
return cursor
},
async key(_key: string, _action: 'press' | 'release' | 'click' = 'click') {
await noOp()
},
async keys(_keys: string[]) {
await noOp()
},
async leftClick() {
await noOp()
},
async rightClick() {
await noOp()
},
async doubleClick() {
await noOp()
},
async middleClick() {
await noOp()
},
async dragMouse(x: number, y: number) {
cursor = { x, y }
},
async scroll(_x: number, _y: number) {
await noOp()
},
async type(_text: string) {
await noOp()
},
async mouseButton(
_button: MouseButton,
_action: MouseAction = 'click',
_count = 1,
) {
await noOp()
},
async mouseScroll(_amount: number, _axis: ScrollAxis = 'vertical') {
await noOp()
},
async typeText(_text: string) {
await noOp()
},
getFrontmostAppInfo(): FrontmostAppInfo | null {
return null
},
} }
export default unsupported export default supported

View File

@ -1,26 +1,296 @@
export type ComputerUseAPI = { import { execFileSync } from 'child_process'
screens?: {
list(): Promise<unknown[]> type DisplayGeometry = {
} id: number
apps?: { width: number
listInstalled(): Promise<unknown[]> height: number
listRunning(): Promise<unknown[]> scaleFactor: number
originX: number
originY: number
}
type InstalledApp = {
bundleId: string
displayName: string
path?: string
}
type RunningApp = {
bundleId: string
displayName: string
}
type ScreenshotResult = {
base64: string
width: number
height: number
displayWidth: number
displayHeight: number
displayId: number
originX: number
originY: number
}
const BLANK_JPEG_BASE64 =
'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxAQEBUQEBAVFRUVFRUVFRUVFRUVFRUVFRUXFhUVFRUYHSggGBolHRUVITEhJSkrLi4uFx8zODMsNygtLisBCgoKDg0OGhAQGi0mHyYtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLf/AABEIAAEAAQMBIgACEQEDEQH/xAAXAAADAQAAAAAAAAAAAAAAAAAAAQID/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEAMQAAAB6gD/xAAVEAEBAAAAAAAAAAAAAAAAAAABAP/aAAgBAQABBQJf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAwEBPwEf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAgEBPwEf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQAGPwJf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQABPyFf/9k='
function safeExec(
file: string,
args: string[],
): { ok: true; stdout: string } | { ok: false } {
try {
const stdout = execFileSync(file, args, {
encoding: 'utf8',
stdio: ['ignore', 'pipe', 'ignore'],
})
return { ok: true, stdout: stdout.trim() }
} catch {
return { ok: false }
} }
} }
function getDefaultDisplay(): DisplayGeometry {
return {
id: 0,
width: 1440,
height: 900,
scaleFactor: 1,
originX: 0,
originY: 0,
}
}
function getDisplay(displayId?: number): DisplayGeometry {
const display = getDefaultDisplay()
if (displayId === undefined || displayId === display.id) {
return display
}
return { ...display, id: displayId }
}
function buildScreenshotResult(
width: number,
height: number,
displayId?: number,
): ScreenshotResult {
const display = getDisplay(displayId)
return {
base64: BLANK_JPEG_BASE64,
width,
height,
displayWidth: display.width,
displayHeight: display.height,
displayId: display.id,
originX: display.originX,
originY: display.originY,
}
}
function openBundle(bundleId: string): void {
if (!bundleId) return
safeExec('open', ['-b', bundleId])
}
function getRunningApps(): RunningApp[] {
const result = safeExec('osascript', [
'-e',
'tell application "System Events" to get the name of every application process',
])
if (!result.ok || result.stdout.length === 0) return []
return result.stdout
.split(/\s*,\s*/u)
.map(name => name.trim())
.filter(Boolean)
.map(name => ({
bundleId: '',
displayName: name,
}))
}
function createInstalledApp(displayName: string): InstalledApp {
return {
bundleId: '',
displayName,
}
}
export type ComputerUseAPI = {
_drainMainRunLoop(): void
tcc: {
checkAccessibility(): boolean
checkScreenRecording(): boolean
}
hotkey: {
registerEscape(onEscape: () => void): boolean
unregister(): void
notifyExpectedEscape(): void
}
display: {
getSize(displayId?: number): DisplayGeometry
listAll(): DisplayGeometry[]
}
apps: {
prepareDisplay(
allowlistBundleIds: string[],
surrogateHost: string,
displayId?: number,
): Promise<{ hidden: string[]; activated?: string }>
previewHideSet(
allowlistBundleIds: string[],
displayId?: number,
): Promise<Array<{ bundleId: string; displayName: string }>>
findWindowDisplays(
bundleIds: string[],
): Promise<Array<{ bundleId: string; displayIds: number[] }>>
appUnderPoint(
x: number,
y: number,
): Promise<{ bundleId: string; displayName: string } | null>
listInstalled(): Promise<InstalledApp[]>
iconDataUrl(path: string): string | null
listRunning(): Promise<RunningApp[]>
open(bundleId: string): Promise<void>
unhide(bundleIds: string[]): Promise<void>
}
screenshot: {
captureExcluding(
allowedBundleIds: string[],
quality: number,
width: number,
height: number,
displayId?: number,
): Promise<ScreenshotResult>
captureRegion(
allowedBundleIds: string[],
x: number,
y: number,
width: number,
height: number,
outW: number,
outH: number,
quality: number,
displayId?: number,
): Promise<ScreenshotResult>
}
resolvePrepareCapture(
allowedBundleIds: string[],
surrogateHost: string,
quality: number,
targetW: number,
targetH: number,
preferredDisplayId?: number,
autoResolve?: boolean,
doHide?: boolean,
): Promise<
ScreenshotResult & {
hidden: string[]
activated?: string
autoResolved: boolean
}
>
}
const stub: ComputerUseAPI = { const stub: ComputerUseAPI = {
screens: { _drainMainRunLoop() {},
async list() { tcc: {
return [] checkAccessibility() {
return false
},
checkScreenRecording() {
return false
},
},
hotkey: {
registerEscape(_onEscape: () => void) {
return false
},
unregister() {},
notifyExpectedEscape() {},
},
display: {
getSize(displayId?: number) {
return getDisplay(displayId)
},
listAll() {
return [getDefaultDisplay()]
}, },
}, },
apps: { apps: {
async listInstalled() { async prepareDisplay(
_allowlistBundleIds: string[],
_surrogateHost: string,
_displayId?: number,
) {
return { hidden: [] as string[] }
},
async previewHideSet(
_allowlistBundleIds: string[],
_displayId?: number,
) {
return [] return []
}, },
async findWindowDisplays(bundleIds: string[]) {
return bundleIds.map(bundleId => ({
bundleId,
displayIds: [],
}))
},
async appUnderPoint(_x: number, _y: number) {
return null
},
async listInstalled() {
return getRunningApps().map(app => createInstalledApp(app.displayName))
},
iconDataUrl(_path: string) {
return null
},
async listRunning() { async listRunning() {
return [] return getRunningApps()
}, },
async open(bundleId: string) {
openBundle(bundleId)
},
async unhide(_bundleIds: string[]) {},
},
screenshot: {
async captureExcluding(
_allowedBundleIds: string[],
_quality: number,
width: number,
height: number,
displayId?: number,
) {
return buildScreenshotResult(width, height, displayId)
},
async captureRegion(
_allowedBundleIds: string[],
_x: number,
_y: number,
_width: number,
_height: number,
outW: number,
outH: number,
_quality: number,
displayId?: number,
) {
return buildScreenshotResult(outW, outH, displayId)
},
},
async resolvePrepareCapture(
_allowedBundleIds: string[],
_surrogateHost: string,
_quality: number,
targetW: number,
targetH: number,
preferredDisplayId?: number,
autoResolve = false,
_doHide = false,
) {
return {
...buildScreenshotResult(targetW, targetH, preferredDisplayId),
hidden: [],
autoResolved: autoResolve,
}
}, },
} }

View File

@ -5,6 +5,17 @@ import type {
let cached: ComputerUseInputAPI | undefined let cached: ComputerUseInputAPI | undefined
function unwrapDefaultExport<T>(mod: T | { default: T }): T {
return (
typeof mod === 'object' &&
mod !== null &&
'default' in mod &&
mod.default !== undefined
? mod.default
: mod
) as T
}
/** /**
* Package's js/index.js reads COMPUTER_USE_INPUT_NODE_PATH (baked by * Package's js/index.js reads COMPUTER_USE_INPUT_NODE_PATH (baked by
* build-with-plugins.ts on darwin targets, unset otherwise falls through to * build-with-plugins.ts on darwin targets, unset otherwise falls through to
@ -22,7 +33,11 @@ let cached: ComputerUseInputAPI | undefined
export function requireComputerUseInput(): ComputerUseInputAPI { export function requireComputerUseInput(): ComputerUseInputAPI {
if (cached) return cached if (cached) return cached
// eslint-disable-next-line @typescript-eslint/no-require-imports // eslint-disable-next-line @typescript-eslint/no-require-imports
const input = require('@ant/computer-use-input') as ComputerUseInput const input = unwrapDefaultExport(
require('@ant/computer-use-input') as ComputerUseInput | {
default: ComputerUseInput
},
)
if (!input.isSupported) { if (!input.isSupported) {
throw new Error('@ant/computer-use-input is not supported on this platform') throw new Error('@ant/computer-use-input is not supported on this platform')
} }

View File

@ -2,6 +2,17 @@ import type { ComputerUseAPI } from '@ant/computer-use-swift'
let cached: ComputerUseAPI | undefined let cached: ComputerUseAPI | undefined
function unwrapDefaultExport<T>(mod: T | { default: T }): T {
return (
typeof mod === 'object' &&
mod !== null &&
'default' in mod &&
mod.default !== undefined
? mod.default
: mod
) as T
}
/** /**
* Package's js/index.js reads COMPUTER_USE_SWIFT_NODE_PATH (baked by * Package's js/index.js reads COMPUTER_USE_SWIFT_NODE_PATH (baked by
* build-with-plugins.ts on darwin targets, unset otherwise falls through to * build-with-plugins.ts on darwin targets, unset otherwise falls through to
@ -17,7 +28,12 @@ export function requireComputerUseSwift(): ComputerUseAPI {
throw new Error('@ant/computer-use-swift is macOS-only') throw new Error('@ant/computer-use-swift is macOS-only')
} }
// eslint-disable-next-line @typescript-eslint/no-require-imports // eslint-disable-next-line @typescript-eslint/no-require-imports
return (cached ??= require('@ant/computer-use-swift') as ComputerUseAPI) return (cached ??=
unwrapDefaultExport(
require('@ant/computer-use-swift') as ComputerUseAPI | {
default: ComputerUseAPI
},
))
} }
export type { ComputerUseAPI } export type { ComputerUseAPI }