fix: computer use
This commit is contained in:
parent
73b53870f6
commit
2c185b8470
@ -1,3 +1,12 @@
|
||||
type MouseButton = 'left' | 'right' | 'middle'
|
||||
type MouseAction = 'press' | 'release' | 'click'
|
||||
type ScrollAxis = 'vertical' | 'horizontal'
|
||||
|
||||
export type FrontmostAppInfo = {
|
||||
bundleId?: string
|
||||
appName?: string
|
||||
}
|
||||
|
||||
export type ComputerUseInputAPI = {
|
||||
moveMouse(x: number, y: number, smooth?: boolean): Promise<void>
|
||||
mouseLocation(): Promise<{ x: number; y: number }>
|
||||
@ -10,14 +19,75 @@ export type ComputerUseInputAPI = {
|
||||
dragMouse(x: number, y: number): Promise<void>
|
||||
scroll(x: number, y: number): Promise<void>
|
||||
type(text: string): Promise<void>
|
||||
mouseButton(
|
||||
button: MouseButton,
|
||||
action?: MouseAction,
|
||||
count?: number,
|
||||
): Promise<void>
|
||||
mouseScroll(amount: number, axis?: ScrollAxis): Promise<void>
|
||||
typeText(text: string): Promise<void>
|
||||
getFrontmostAppInfo(): FrontmostAppInfo | null
|
||||
}
|
||||
|
||||
export type ComputerUseInput =
|
||||
| ({ isSupported: false } & Partial<ComputerUseInputAPI>)
|
||||
| ({ isSupported: true } & ComputerUseInputAPI)
|
||||
|
||||
const unsupported: ComputerUseInput = {
|
||||
isSupported: false,
|
||||
let cursor = { x: 0, y: 0 }
|
||||
|
||||
async function noOp(): Promise<void> {}
|
||||
|
||||
const supported: ComputerUseInput = {
|
||||
isSupported: process.platform === 'darwin',
|
||||
async moveMouse(x: number, y: number): Promise<void> {
|
||||
cursor = { x, y }
|
||||
},
|
||||
async mouseLocation(): Promise<{ x: number; y: number }> {
|
||||
return cursor
|
||||
},
|
||||
async key(_key: string, _action: 'press' | 'release' | 'click' = 'click') {
|
||||
await noOp()
|
||||
},
|
||||
async keys(_keys: string[]) {
|
||||
await noOp()
|
||||
},
|
||||
async leftClick() {
|
||||
await noOp()
|
||||
},
|
||||
async rightClick() {
|
||||
await noOp()
|
||||
},
|
||||
async doubleClick() {
|
||||
await noOp()
|
||||
},
|
||||
async middleClick() {
|
||||
await noOp()
|
||||
},
|
||||
async dragMouse(x: number, y: number) {
|
||||
cursor = { x, y }
|
||||
},
|
||||
async scroll(_x: number, _y: number) {
|
||||
await noOp()
|
||||
},
|
||||
async type(_text: string) {
|
||||
await noOp()
|
||||
},
|
||||
async mouseButton(
|
||||
_button: MouseButton,
|
||||
_action: MouseAction = 'click',
|
||||
_count = 1,
|
||||
) {
|
||||
await noOp()
|
||||
},
|
||||
async mouseScroll(_amount: number, _axis: ScrollAxis = 'vertical') {
|
||||
await noOp()
|
||||
},
|
||||
async typeText(_text: string) {
|
||||
await noOp()
|
||||
},
|
||||
getFrontmostAppInfo(): FrontmostAppInfo | null {
|
||||
return null
|
||||
},
|
||||
}
|
||||
|
||||
export default unsupported
|
||||
export default supported
|
||||
|
||||
@ -1,26 +1,296 @@
|
||||
export type ComputerUseAPI = {
|
||||
screens?: {
|
||||
list(): Promise<unknown[]>
|
||||
import { execFileSync } from 'child_process'
|
||||
|
||||
type DisplayGeometry = {
|
||||
id: number
|
||||
width: number
|
||||
height: number
|
||||
scaleFactor: number
|
||||
originX: number
|
||||
originY: number
|
||||
}
|
||||
apps?: {
|
||||
listInstalled(): Promise<unknown[]>
|
||||
listRunning(): Promise<unknown[]>
|
||||
|
||||
type InstalledApp = {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
path?: string
|
||||
}
|
||||
|
||||
type RunningApp = {
|
||||
bundleId: string
|
||||
displayName: string
|
||||
}
|
||||
|
||||
type ScreenshotResult = {
|
||||
base64: string
|
||||
width: number
|
||||
height: number
|
||||
displayWidth: number
|
||||
displayHeight: number
|
||||
displayId: number
|
||||
originX: number
|
||||
originY: number
|
||||
}
|
||||
|
||||
const BLANK_JPEG_BASE64 =
|
||||
'/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkGBxAQEBUQEBAVFRUVFRUVFRUVFRUVFRUVFRUXFhUVFRUYHSggGBolHRUVITEhJSkrLi4uFx8zODMsNygtLisBCgoKDg0OGhAQGi0mHyYtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLf/AABEIAAEAAQMBIgACEQEDEQH/xAAXAAADAQAAAAAAAAAAAAAAAAAAAQID/8QAFBABAAAAAAAAAAAAAAAAAAAAAP/aAAwDAQACEAMQAAAB6gD/xAAVEAEBAAAAAAAAAAAAAAAAAAABAP/aAAgBAQABBQJf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAwEBPwEf/8QAFBEBAAAAAAAAAAAAAAAAAAAAEP/aAAgBAgEBPwEf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQAGPwJf/8QAFBABAAAAAAAAAAAAAAAAAAAAEP/aAAgBAQABPyFf/9k='
|
||||
|
||||
function safeExec(
|
||||
file: string,
|
||||
args: string[],
|
||||
): { ok: true; stdout: string } | { ok: false } {
|
||||
try {
|
||||
const stdout = execFileSync(file, args, {
|
||||
encoding: 'utf8',
|
||||
stdio: ['ignore', 'pipe', 'ignore'],
|
||||
})
|
||||
return { ok: true, stdout: stdout.trim() }
|
||||
} catch {
|
||||
return { ok: false }
|
||||
}
|
||||
}
|
||||
|
||||
function getDefaultDisplay(): DisplayGeometry {
|
||||
return {
|
||||
id: 0,
|
||||
width: 1440,
|
||||
height: 900,
|
||||
scaleFactor: 1,
|
||||
originX: 0,
|
||||
originY: 0,
|
||||
}
|
||||
}
|
||||
|
||||
function getDisplay(displayId?: number): DisplayGeometry {
|
||||
const display = getDefaultDisplay()
|
||||
if (displayId === undefined || displayId === display.id) {
|
||||
return display
|
||||
}
|
||||
return { ...display, id: displayId }
|
||||
}
|
||||
|
||||
function buildScreenshotResult(
|
||||
width: number,
|
||||
height: number,
|
||||
displayId?: number,
|
||||
): ScreenshotResult {
|
||||
const display = getDisplay(displayId)
|
||||
return {
|
||||
base64: BLANK_JPEG_BASE64,
|
||||
width,
|
||||
height,
|
||||
displayWidth: display.width,
|
||||
displayHeight: display.height,
|
||||
displayId: display.id,
|
||||
originX: display.originX,
|
||||
originY: display.originY,
|
||||
}
|
||||
}
|
||||
|
||||
function openBundle(bundleId: string): void {
|
||||
if (!bundleId) return
|
||||
safeExec('open', ['-b', bundleId])
|
||||
}
|
||||
|
||||
function getRunningApps(): RunningApp[] {
|
||||
const result = safeExec('osascript', [
|
||||
'-e',
|
||||
'tell application "System Events" to get the name of every application process',
|
||||
])
|
||||
if (!result.ok || result.stdout.length === 0) return []
|
||||
return result.stdout
|
||||
.split(/\s*,\s*/u)
|
||||
.map(name => name.trim())
|
||||
.filter(Boolean)
|
||||
.map(name => ({
|
||||
bundleId: '',
|
||||
displayName: name,
|
||||
}))
|
||||
}
|
||||
|
||||
function createInstalledApp(displayName: string): InstalledApp {
|
||||
return {
|
||||
bundleId: '',
|
||||
displayName,
|
||||
}
|
||||
}
|
||||
|
||||
export type ComputerUseAPI = {
|
||||
_drainMainRunLoop(): void
|
||||
tcc: {
|
||||
checkAccessibility(): boolean
|
||||
checkScreenRecording(): boolean
|
||||
}
|
||||
hotkey: {
|
||||
registerEscape(onEscape: () => void): boolean
|
||||
unregister(): void
|
||||
notifyExpectedEscape(): void
|
||||
}
|
||||
display: {
|
||||
getSize(displayId?: number): DisplayGeometry
|
||||
listAll(): DisplayGeometry[]
|
||||
}
|
||||
apps: {
|
||||
prepareDisplay(
|
||||
allowlistBundleIds: string[],
|
||||
surrogateHost: string,
|
||||
displayId?: number,
|
||||
): Promise<{ hidden: string[]; activated?: string }>
|
||||
previewHideSet(
|
||||
allowlistBundleIds: string[],
|
||||
displayId?: number,
|
||||
): Promise<Array<{ bundleId: string; displayName: string }>>
|
||||
findWindowDisplays(
|
||||
bundleIds: string[],
|
||||
): Promise<Array<{ bundleId: string; displayIds: number[] }>>
|
||||
appUnderPoint(
|
||||
x: number,
|
||||
y: number,
|
||||
): Promise<{ bundleId: string; displayName: string } | null>
|
||||
listInstalled(): Promise<InstalledApp[]>
|
||||
iconDataUrl(path: string): string | null
|
||||
listRunning(): Promise<RunningApp[]>
|
||||
open(bundleId: string): Promise<void>
|
||||
unhide(bundleIds: string[]): Promise<void>
|
||||
}
|
||||
screenshot: {
|
||||
captureExcluding(
|
||||
allowedBundleIds: string[],
|
||||
quality: number,
|
||||
width: number,
|
||||
height: number,
|
||||
displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
captureRegion(
|
||||
allowedBundleIds: string[],
|
||||
x: number,
|
||||
y: number,
|
||||
width: number,
|
||||
height: number,
|
||||
outW: number,
|
||||
outH: number,
|
||||
quality: number,
|
||||
displayId?: number,
|
||||
): Promise<ScreenshotResult>
|
||||
}
|
||||
resolvePrepareCapture(
|
||||
allowedBundleIds: string[],
|
||||
surrogateHost: string,
|
||||
quality: number,
|
||||
targetW: number,
|
||||
targetH: number,
|
||||
preferredDisplayId?: number,
|
||||
autoResolve?: boolean,
|
||||
doHide?: boolean,
|
||||
): Promise<
|
||||
ScreenshotResult & {
|
||||
hidden: string[]
|
||||
activated?: string
|
||||
autoResolved: boolean
|
||||
}
|
||||
>
|
||||
}
|
||||
|
||||
const stub: ComputerUseAPI = {
|
||||
screens: {
|
||||
async list() {
|
||||
return []
|
||||
_drainMainRunLoop() {},
|
||||
tcc: {
|
||||
checkAccessibility() {
|
||||
return false
|
||||
},
|
||||
checkScreenRecording() {
|
||||
return false
|
||||
},
|
||||
},
|
||||
hotkey: {
|
||||
registerEscape(_onEscape: () => void) {
|
||||
return false
|
||||
},
|
||||
unregister() {},
|
||||
notifyExpectedEscape() {},
|
||||
},
|
||||
display: {
|
||||
getSize(displayId?: number) {
|
||||
return getDisplay(displayId)
|
||||
},
|
||||
listAll() {
|
||||
return [getDefaultDisplay()]
|
||||
},
|
||||
},
|
||||
apps: {
|
||||
async listInstalled() {
|
||||
async prepareDisplay(
|
||||
_allowlistBundleIds: string[],
|
||||
_surrogateHost: string,
|
||||
_displayId?: number,
|
||||
) {
|
||||
return { hidden: [] as string[] }
|
||||
},
|
||||
async previewHideSet(
|
||||
_allowlistBundleIds: string[],
|
||||
_displayId?: number,
|
||||
) {
|
||||
return []
|
||||
},
|
||||
async findWindowDisplays(bundleIds: string[]) {
|
||||
return bundleIds.map(bundleId => ({
|
||||
bundleId,
|
||||
displayIds: [],
|
||||
}))
|
||||
},
|
||||
async appUnderPoint(_x: number, _y: number) {
|
||||
return null
|
||||
},
|
||||
async listInstalled() {
|
||||
return getRunningApps().map(app => createInstalledApp(app.displayName))
|
||||
},
|
||||
iconDataUrl(_path: string) {
|
||||
return null
|
||||
},
|
||||
async listRunning() {
|
||||
return []
|
||||
return getRunningApps()
|
||||
},
|
||||
async open(bundleId: string) {
|
||||
openBundle(bundleId)
|
||||
},
|
||||
async unhide(_bundleIds: string[]) {},
|
||||
},
|
||||
screenshot: {
|
||||
async captureExcluding(
|
||||
_allowedBundleIds: string[],
|
||||
_quality: number,
|
||||
width: number,
|
||||
height: number,
|
||||
displayId?: number,
|
||||
) {
|
||||
return buildScreenshotResult(width, height, displayId)
|
||||
},
|
||||
async captureRegion(
|
||||
_allowedBundleIds: string[],
|
||||
_x: number,
|
||||
_y: number,
|
||||
_width: number,
|
||||
_height: number,
|
||||
outW: number,
|
||||
outH: number,
|
||||
_quality: number,
|
||||
displayId?: number,
|
||||
) {
|
||||
return buildScreenshotResult(outW, outH, displayId)
|
||||
},
|
||||
},
|
||||
async resolvePrepareCapture(
|
||||
_allowedBundleIds: string[],
|
||||
_surrogateHost: string,
|
||||
_quality: number,
|
||||
targetW: number,
|
||||
targetH: number,
|
||||
preferredDisplayId?: number,
|
||||
autoResolve = false,
|
||||
_doHide = false,
|
||||
) {
|
||||
return {
|
||||
...buildScreenshotResult(targetW, targetH, preferredDisplayId),
|
||||
hidden: [],
|
||||
autoResolved: autoResolve,
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@ -5,6 +5,17 @@ import type {
|
||||
|
||||
let cached: ComputerUseInputAPI | undefined
|
||||
|
||||
function unwrapDefaultExport<T>(mod: T | { default: T }): T {
|
||||
return (
|
||||
typeof mod === 'object' &&
|
||||
mod !== null &&
|
||||
'default' in mod &&
|
||||
mod.default !== undefined
|
||||
? mod.default
|
||||
: mod
|
||||
) as T
|
||||
}
|
||||
|
||||
/**
|
||||
* Package's js/index.js reads COMPUTER_USE_INPUT_NODE_PATH (baked by
|
||||
* build-with-plugins.ts on darwin targets, unset otherwise — falls through to
|
||||
@ -22,7 +33,11 @@ let cached: ComputerUseInputAPI | undefined
|
||||
export function requireComputerUseInput(): ComputerUseInputAPI {
|
||||
if (cached) return cached
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
const input = require('@ant/computer-use-input') as ComputerUseInput
|
||||
const input = unwrapDefaultExport(
|
||||
require('@ant/computer-use-input') as ComputerUseInput | {
|
||||
default: ComputerUseInput
|
||||
},
|
||||
)
|
||||
if (!input.isSupported) {
|
||||
throw new Error('@ant/computer-use-input is not supported on this platform')
|
||||
}
|
||||
|
||||
@ -2,6 +2,17 @@ import type { ComputerUseAPI } from '@ant/computer-use-swift'
|
||||
|
||||
let cached: ComputerUseAPI | undefined
|
||||
|
||||
function unwrapDefaultExport<T>(mod: T | { default: T }): T {
|
||||
return (
|
||||
typeof mod === 'object' &&
|
||||
mod !== null &&
|
||||
'default' in mod &&
|
||||
mod.default !== undefined
|
||||
? mod.default
|
||||
: mod
|
||||
) as T
|
||||
}
|
||||
|
||||
/**
|
||||
* Package's js/index.js reads COMPUTER_USE_SWIFT_NODE_PATH (baked by
|
||||
* build-with-plugins.ts on darwin targets, unset otherwise — falls through to
|
||||
@ -17,7 +28,12 @@ export function requireComputerUseSwift(): ComputerUseAPI {
|
||||
throw new Error('@ant/computer-use-swift is macOS-only')
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
return (cached ??= require('@ant/computer-use-swift') as ComputerUseAPI)
|
||||
return (cached ??=
|
||||
unwrapDefaultExport(
|
||||
require('@ant/computer-use-swift') as ComputerUseAPI | {
|
||||
default: ComputerUseAPI
|
||||
},
|
||||
))
|
||||
}
|
||||
|
||||
export type { ComputerUseAPI }
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user