Skip to content

Commit a034369

Browse files
authored
feat(o-gui-agent): support ChromeUI gui operation on AIO sandbox (#1383)
1 parent 2a92348 commit a034369

File tree

22 files changed

+2221
-158
lines changed

22 files changed

+2221
-158
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Dumps directory
2+
dumps/
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# @gui-agent/operator-aio
2+
3+
AIO (All-in-One) operator for GUI Agent that provides comprehensive computer control capabilities.
4+
5+
## Features
6+
7+
- Remote computer control
8+
- Mouse operations (click, drag, scroll)
9+
- Keyboard input and hotkeys
10+
- Screenshot capture
11+
- Cross-platform support
12+
13+
## Installation
14+
15+
```bash
16+
npm install @gui-agent/operator-aio
17+
```
18+
19+
## Usage
20+
21+
```typescript
22+
import { AioOperator } from '@gui-agent/operator-aio';
23+
24+
// Create AIO operator instance
25+
const operator = await AioOperator.create();
26+
27+
// Take screenshot
28+
const screenshot = await operator.screenshot();
29+
30+
// Execute actions
31+
const result = await operator.execute({
32+
parsedPrediction: {
33+
action_type: 'click',
34+
action_inputs: {
35+
start_box: '[100, 100, 200, 200]'
36+
}
37+
},
38+
screenWidth: 1920,
39+
screenHeight: 1080,
40+
scaleFactor: 1
41+
});
42+
```
43+
44+
## License
45+
46+
Apache-2.0
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/*
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
import { AIOHybridOperator } from '../src/AIOHybridOperator';
7+
import { StatusEnum } from '@ui-tars/sdk';
8+
// @ts-ignore - Module resolution issue: TypeScript cannot resolve this module with current 'Bundler' moduleResolution setting
9+
// The type exists at '/node_modules/@ui-tars/sdk/dist/core.d.ts' but requires 'node16', 'nodenext', or 'bundler' moduleResolution
10+
import type { ExecuteParams } from '@ui-tars/sdk/core';
11+
import * as fs from 'fs';
12+
import * as path from 'path';
13+
14+
// 配置真实的baseURL
15+
// 在文件顶部添加
16+
import 'dotenv/config';
17+
18+
const CONFIG = {
19+
baseURL: process.env.AIO_BASE_URL || 'http://localhost:8080', // 您的真实URL
20+
timeout: 10000,
21+
};
22+
23+
async function testAIOHybridOperator() {
24+
console.log('🚀 开始测试 AIOHybridOperator...');
25+
console.log('配置:', CONFIG);
26+
27+
try {
28+
// 1. 创建操作器实例
29+
console.log('\n📦 创建 AIOHybridOperator 实例...');
30+
const operator = await AIOHybridOperator.create(CONFIG);
31+
console.log('✅ 实例创建成功');
32+
33+
// 2. 测试截图功能
34+
console.log('\n📸 测试截图功能...');
35+
const screenshot = await operator.screenshot();
36+
37+
// 创建dumps目录
38+
const dumpsDir = path.join(__dirname, 'dumps');
39+
if (!fs.existsSync(dumpsDir)) {
40+
fs.mkdirSync(dumpsDir, { recursive: true });
41+
}
42+
43+
// 保存截图
44+
if (screenshot.base64) {
45+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
46+
const filename = `screenshot-${timestamp}.png`;
47+
const filepath = path.join(dumpsDir, filename);
48+
49+
// 将base64转换为buffer并保存
50+
const base64Data = screenshot.base64.replace(/^data:image\/png;base64,/, '');
51+
const buffer = Buffer.from(base64Data, 'base64');
52+
fs.writeFileSync(filepath, buffer);
53+
54+
console.log('截图已保存:', filepath);
55+
}
56+
57+
console.log('截图结果:', {
58+
base64Length: screenshot.base64?.length || 0,
59+
scaleFactor: screenshot.scaleFactor,
60+
hasBase64: !!screenshot.base64,
61+
});
62+
console.log('✅ 截图功能正常');
63+
64+
// 3. 测试各种动作执行
65+
const testCases = [
66+
{
67+
name: '点击动作',
68+
params: {
69+
parsedPrediction: {
70+
action_type: 'click',
71+
action_inputs: {
72+
start_box: '[100, 200, 150, 250]',
73+
},
74+
reflection: '测试点击',
75+
thought: '执行点击操作',
76+
prediction: 'click action',
77+
factors: [1000, 1000],
78+
},
79+
screenWidth: 1920,
80+
screenHeight: 1080,
81+
scaleFactor: 1,
82+
prediction: 'click action',
83+
factors: [1000, 1000],
84+
} as ExecuteParams,
85+
},
86+
{
87+
name: '输入文本',
88+
params: {
89+
parsedPrediction: {
90+
action_type: 'type',
91+
action_inputs: {
92+
content: 'Hello World\n',
93+
},
94+
reflection: '测试输入',
95+
thought: '输入测试文本',
96+
prediction: 'type action',
97+
factors: [1000, 1000],
98+
},
99+
screenWidth: 1920,
100+
screenHeight: 1080,
101+
scaleFactor: 1,
102+
prediction: 'type action',
103+
factors: [1000, 1000],
104+
} as ExecuteParams,
105+
},
106+
{
107+
name: '快捷键',
108+
params: {
109+
parsedPrediction: {
110+
action_type: 'hotkey',
111+
action_inputs: {
112+
key: 'Ctrl+C',
113+
},
114+
reflection: '测试快捷键',
115+
thought: '执行复制快捷键',
116+
prediction: 'hotkey action',
117+
factors: [1000, 1000],
118+
},
119+
screenWidth: 1920,
120+
screenHeight: 1080,
121+
scaleFactor: 1,
122+
prediction: 'hotkey action',
123+
factors: [1000, 1000],
124+
} as ExecuteParams,
125+
},
126+
{
127+
name: '滚动操作',
128+
params: {
129+
parsedPrediction: {
130+
action_type: 'scroll',
131+
action_inputs: {
132+
start_box: '[500, 500, 600, 600]',
133+
direction: 'down',
134+
},
135+
reflection: '测试滚动',
136+
thought: '向下滚动',
137+
prediction: 'scroll action',
138+
factors: [1000, 1000],
139+
},
140+
screenWidth: 1920,
141+
screenHeight: 1080,
142+
scaleFactor: 1,
143+
prediction: 'scroll action',
144+
factors: [1000, 1000],
145+
} as ExecuteParams,
146+
},
147+
{
148+
name: '等待操作',
149+
params: {
150+
parsedPrediction: {
151+
action_type: 'wait',
152+
action_inputs: {},
153+
reflection: '测试等待',
154+
thought: '等待5秒',
155+
prediction: 'wait action',
156+
factors: [1000, 1000],
157+
},
158+
screenWidth: 1920,
159+
screenHeight: 1080,
160+
scaleFactor: 1,
161+
prediction: 'wait action',
162+
factors: [1000, 1000],
163+
} as ExecuteParams,
164+
},
165+
];
166+
167+
console.log('\n🎯 开始测试各种动作执行...');
168+
for (const testCase of testCases) {
169+
console.log(`\n测试: ${testCase.name}`);
170+
try {
171+
const result = await operator.execute(testCase.params);
172+
console.log('执行结果:', JSON.stringify(result));
173+
console.log(`✅ ${testCase.name} 执行成功`);
174+
} catch (error) {
175+
const errorMessage = error instanceof Error ? error.message : String(error);
176+
console.error(`❌ ${testCase.name} 执行失败:`, errorMessage);
177+
}
178+
}
179+
180+
// 4. 验证动作空间定义
181+
console.log('\n📋 验证动作空间定义...');
182+
const actionSpaces = AIOHybridOperator.MANUAL.ACTION_SPACES;
183+
console.log('动作空间数量:', actionSpaces.length);
184+
console.log('动作空间列表:');
185+
actionSpaces.forEach((action, index) => {
186+
console.log(` ${index + 1}. ${action}`);
187+
});
188+
console.log('✅ 动作空间验证完成');
189+
190+
console.log('\n🎉 所有测试完成!');
191+
} catch (error) {
192+
console.error('❌ 测试过程中发生错误:', error);
193+
const errorStack = error instanceof Error ? error.stack : String(error);
194+
console.error('错误详情:', errorStack);
195+
process.exit(1);
196+
}
197+
}
198+
199+
// 运行测试
200+
if (require.main === module) {
201+
testAIOHybridOperator()
202+
.then(() => {
203+
console.log('\n✨ 测试脚本执行完成');
204+
process.exit(0);
205+
})
206+
.catch((error) => {
207+
console.error('\n💥 测试脚本执行失败:', error);
208+
process.exit(1);
209+
});
210+
}
211+
212+
export { testAIOHybridOperator };
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
{
2+
"name": "@gui-agent/operator-aio",
3+
"description": "AIO (All-in-One) operator for GUI Agent",
4+
"version": "0.3.0-beta.7",
5+
"repository": {
6+
"type": "git",
7+
"url": "https://github.com/bytedance/UI-TARS-desktop"
8+
},
9+
"bugs": {
10+
"url": "https://github.com/bytedance/UI-TARS-desktop/issues"
11+
},
12+
"keywords": [
13+
"AI",
14+
"Core",
15+
"SDK",
16+
"Operator",
17+
"UI-TARS",
18+
"AIO"
19+
],
20+
"main": "./dist/index.js",
21+
"module": "./dist/index.mjs",
22+
"types": "./dist/index.d.ts",
23+
"scripts": {
24+
"dev": "rslib build --watch",
25+
"build": "rslib build",
26+
"build:watch": "rslib build --watch",
27+
"test": "vitest",
28+
"example": "npx tsx examples/test-runner.ts"
29+
},
30+
"license": "Apache-2.0",
31+
"files": [
32+
"dist"
33+
],
34+
"publishConfig": {
35+
"access": "public",
36+
"registry": "https://registry.npmjs.org"
37+
},
38+
"dependencies": {
39+
"@agent-infra/browser": "0.1.1",
40+
"@agent-infra/logger": "0.0.2-beta.2",
41+
"@agent-infra/sandbox": "0.0.2-beta7",
42+
"@agent-infra/media-utils": "0.1.5",
43+
"@ui-tars/sdk": "1.2.3",
44+
"@ui-tars/shared": "1.2.3"
45+
},
46+
"devDependencies": {
47+
"@rslib/core": "0.10.0",
48+
"dotenv": "^16.3.1",
49+
"ts-node": "^10.9.2",
50+
"tsx": "^4.19.2",
51+
"typescript": "^5.7.2",
52+
"vitest": "^3.0.2"
53+
}
54+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/**
2+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
import { defineConfig } from '@rslib/core';
6+
7+
const BANNER = `/**
8+
* Copyright (c) 2025 Bytedance, Inc. and its affiliates.
9+
* SPDX-License-Identifier: Apache-2.0
10+
*/`;
11+
12+
export default defineConfig({
13+
source: {
14+
entry: {
15+
index: ['src/**'],
16+
// examples: ['examples/**'], // Prevent static files in examples/dumps from being bundled into dist, so this line is commented out
17+
},
18+
},
19+
lib: [
20+
{
21+
format: 'esm',
22+
syntax: 'es2021',
23+
bundle: false,
24+
autoExternal: false,
25+
dts: true,
26+
banner: { js: BANNER },
27+
},
28+
{
29+
format: 'cjs',
30+
syntax: 'es2021',
31+
bundle: false,
32+
dts: true,
33+
banner: { js: BANNER },
34+
},
35+
],
36+
output: {
37+
target: 'node',
38+
cleanDistPath: false,
39+
sourceMap: true,
40+
},
41+
});

0 commit comments

Comments
 (0)