-
Notifications
You must be signed in to change notification settings - Fork 58
feat(android): add UIAutomator hierarchy dump, parsing, and agent tool #251
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| UnknownAndroidDisplay, | ||
| ) | ||
| from askui.tools.android.android_agent_os_error import AndroidAgentOsError | ||
| from askui.tools.android.uiautomator_hierarchy import UIElementCollection | ||
| from askui.utils.annotated_image import AnnotatedImage | ||
|
|
||
|
|
||
|
|
@@ -34,6 +35,7 @@ class PpadbAgentOs(AndroidAgentOs): | |
| """ | ||
|
|
||
| _REPORTER_ROLE_NAME: str = "AndroidAgentOS" | ||
| _UIAUTOMATOR_DUMP_PATH: str = "/data/local/tmp/askui_window_dump.xml" | ||
|
|
||
| def __init__( | ||
| self, reporter: Reporter = NULL_REPORTER, device_identifier: str | int = 0 | ||
|
|
@@ -72,8 +74,8 @@ def connect(self) -> None: | |
| self.set_device_by_serial_number(self._device_identifier) | ||
| else: | ||
| self.set_device_by_index(self._device_identifier) | ||
| assert self._device is not None | ||
| self._device.wait_boot_complete() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| device.wait_boot_complete() | ||
|
|
||
| def disconnect(self) -> None: | ||
| self._client = None | ||
|
|
@@ -92,10 +94,9 @@ def _set_display(self, display: AndroidDisplay) -> None: | |
| ) | ||
|
|
||
| def get_connected_displays(self) -> list[AndroidDisplay]: | ||
| self._check_if_device_is_selected() | ||
| assert self._device is not None | ||
| device: AndroidDevice = self._get_selected_device() | ||
| displays: list[AndroidDisplay] = [] | ||
| output: str = self._device.shell( | ||
| output: str = device.shell( | ||
| "dumpsys SurfaceFlinger --display-id", | ||
| ) | ||
|
|
||
|
|
@@ -202,11 +203,10 @@ def set_device_by_serial_number(self, device_sn: str) -> None: | |
| raise AndroidAgentOsError(msg) | ||
|
|
||
| def _screenshot_without_reporting(self) -> Image.Image: | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| connection_to_device = self._device.create_connection() | ||
| connection_to_device = device.create_connection() | ||
| unique_display_id_flag = self._selected_display.get_display_unique_id_flag() | ||
| connection_to_device.send( | ||
| f"shell:/system/bin/screencap -p {unique_display_id_flag}" | ||
|
|
@@ -222,28 +222,26 @@ def screenshot(self) -> Image.Image: | |
| return screenshot | ||
|
|
||
| def shell(self, command: str) -> str: | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| response: str = self._device.shell(command) | ||
| response: str = device.shell(command) | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"shell(command='{command}') -> '{response}'", | ||
| ) | ||
| return response | ||
|
|
||
| def tap(self, x: int, y: int) -> None: | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"tap(x={x}, y={y})", | ||
| AnnotatedImage(self._screenshot_without_reporting, [(x, y)]), | ||
| ) | ||
| self._device.shell(f"input {display_flag} tap {x} {y}") | ||
| device.shell(f"input {display_flag} tap {x} {y}") | ||
| self._mouse_position = (x, y) | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
|
|
@@ -259,9 +257,8 @@ def swipe( | |
| y2: int, | ||
| duration_in_ms: int = 1000, | ||
| ) -> None: | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| self._reporter.add_message( | ||
|
|
@@ -272,9 +269,7 @@ def swipe( | |
| ), | ||
| AnnotatedImage(self._screenshot_without_reporting, [(x1, y1)]), | ||
| ) | ||
| self._device.shell( | ||
| f"input {display_flag} swipe {x1} {y1} {x2} {y2} {duration_in_ms}" | ||
| ) | ||
| device.shell(f"input {display_flag} swipe {x1} {y1} {x2} {y2} {duration_in_ms}") | ||
| self._mouse_position = (x2, y2) | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
|
|
@@ -290,9 +285,8 @@ def drag_and_drop( | |
| y2: int, | ||
| duration_in_ms: int = 1000, | ||
| ) -> None: | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| self._reporter.add_message( | ||
|
|
@@ -301,7 +295,7 @@ def drag_and_drop( | |
| AnnotatedImage(self._screenshot_without_reporting, [(x1, y1)]), | ||
| ) | ||
|
|
||
| self._device.shell( | ||
| device.shell( | ||
| f"input {display_flag} draganddrop {x1} {y1} {x2} {y2} {duration_in_ms}" | ||
| ) | ||
| self._mouse_position = (x2, y2) | ||
|
|
@@ -319,9 +313,8 @@ def type(self, text: str) -> None: | |
| + "or special characters which are not supported by the device" | ||
| ) | ||
| raise AndroidAgentOsError(error_msg_nonprintable) | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| escaped_text = shlex.quote(text) | ||
|
|
@@ -331,7 +324,7 @@ def type(self, text: str) -> None: | |
| f"Typing text: '{text}'", | ||
| AnnotatedImage(self._screenshot_without_reporting), | ||
| ) | ||
| self._device.shell(f"input {display_flag} text {shell_safe_text}") | ||
| device.shell(f"input {display_flag} text {shell_safe_text}") | ||
|
|
||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
|
|
@@ -343,17 +336,16 @@ def key_tap(self, key: ANDROID_KEY) -> None: | |
| if key not in get_args(ANDROID_KEY): | ||
| error_msg_invalid_key: str = f"Invalid key: {key}" | ||
| raise AndroidAgentOsError(error_msg_invalid_key) | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"Tapping key: '{key}'", | ||
| AnnotatedImage(self._screenshot_without_reporting), | ||
| ) | ||
| self._device.shell(f"input {display_flag} keyevent {key}") | ||
| device.shell(f"input {display_flag} keyevent {key}") | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"After tapping key: '{key}'", | ||
|
|
@@ -372,17 +364,16 @@ def key_combination( | |
| raise AndroidAgentOsError(error_msg_too_few) | ||
|
|
||
| keys_string = " ".join(keys) | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| display_flag = self._selected_display.get_display_id_flag() | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"Performing key combination: '{keys_string}'", | ||
| AnnotatedImage(self._screenshot_without_reporting), | ||
| ) | ||
| self._device.shell( | ||
| device.shell( | ||
| f"input {display_flag} keycombination -t {duration_in_ms} {keys_string}" | ||
| ) | ||
| self._reporter.add_message( | ||
|
|
@@ -391,7 +382,7 @@ def key_combination( | |
| AnnotatedImage(self._screenshot_without_reporting), | ||
| ) | ||
|
|
||
| def _check_if_device_is_selected(self) -> None: | ||
| def _get_selected_device(self) -> AndroidDevice: | ||
| devices: list[AndroidDevice] = self._get_connected_devices() | ||
|
|
||
| if not self._device: | ||
|
|
@@ -400,7 +391,7 @@ def _check_if_device_is_selected(self) -> None: | |
|
|
||
| for device in devices: | ||
| if device.serial == self._device.serial: | ||
| return | ||
| return self._device | ||
| msg = f"Device {self._device.serial} not found in connected devices" | ||
| raise AndroidAgentOsError(msg) | ||
|
|
||
|
|
@@ -441,30 +432,28 @@ def get_selected_device_infos(self) -> tuple[str, AndroidDisplay]: | |
| """ | ||
| Get the selected device infos. | ||
| """ | ||
| self._check_if_device_is_selected() | ||
| device: AndroidDevice = self._get_selected_device() | ||
| self._check_if_display_is_selected() | ||
| assert self._device is not None | ||
| assert self._selected_display is not None | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| ( | ||
| "get_selected_device_infos() -> " | ||
| f"Selected device serial number: '{self._device.serial}' " | ||
| f"Selected device serial number: '{device.serial}' " | ||
| f"and selected display: '{self._selected_display}'" | ||
| ), | ||
| ) | ||
| return (self._device.serial, self._selected_display) | ||
| return (device.serial, self._selected_display) | ||
|
|
||
| def push(self, local_path: str, remote_path: str) -> None: | ||
| """ | ||
| Push a file to the device. | ||
| """ | ||
| self._check_if_device_is_selected() | ||
| assert self._device is not None | ||
| device: AndroidDevice = self._get_selected_device() | ||
| if not Path.exists(Path(local_path)): | ||
| msg = f"Local path {local_path} does not exist" | ||
| raise FileNotFoundError(msg) | ||
| self._device.push(local_path, remote_path) | ||
| device.push(local_path, remote_path) | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"push(local_path='{local_path}', remote_path='{remote_path}')", | ||
|
|
@@ -474,11 +463,39 @@ def pull(self, remote_path: str, local_path: str) -> None: | |
| """ | ||
| Pull a file from the device. | ||
| """ | ||
| self._check_if_device_is_selected() | ||
| assert self._device is not None | ||
| device: AndroidDevice = self._get_selected_device() | ||
| Path.mkdir(Path.absolute(Path(local_path).parent), exist_ok=True) | ||
| self._device.pull(remote_path, local_path) | ||
| device.pull(remote_path, local_path) | ||
| self._reporter.add_message( | ||
| self._REPORTER_ROLE_NAME, | ||
| f"pull(remote_path='{remote_path}', local_path='{local_path}')", | ||
| ) | ||
|
|
||
| def get_ui_elements(self) -> UIElementCollection: | ||
| """ | ||
| Return UI elements from a `uiautomator dump` of the current screen. | ||
|
|
||
| Returns: | ||
| UIElementCollection: Parsed hierarchy from the dump, or empty if the dump | ||
| has no usable content. | ||
|
|
||
| Raises: | ||
| AndroidAgentOsError: When the dump command does not report success (often | ||
| while animations are visible on screen). | ||
|
|
||
| Notes: | ||
| `uiautomator dump` is unreliable while the screen shows animation | ||
| (transitions, loaders, pulsing highlights, etc.). Retry after motion has | ||
| stopped and the UI has settled. | ||
| """ | ||
| self._get_selected_device() | ||
| dump_cmd = f"uiautomator dump {self._UIAUTOMATOR_DUMP_PATH}" | ||
| dump_response = self.shell(dump_cmd) | ||
| if "dumped" not in dump_response.lower(): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does the "dumped" mean?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. “dumped” is included in the response of a successful UI dump. |
||
| msg = f"Failed to dump UI hierarchy: {dump_response}" | ||
| raise AndroidAgentOsError(msg) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have to terminate the Agent Loop or is this error recoverably from the Agent??
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don’t think so, since I assume the agent can auto-recover and use different methods, such as taking a screenshot or using the shell. |
||
|
|
||
| raw = self.shell(f"cat {self._UIAUTOMATOR_DUMP_PATH}") | ||
| if not raw or not raw.strip(): | ||
| return UIElementCollection([]) | ||
| return UIElementCollection.build_from_xml_dump(raw) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does the
from_agentmean?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
“from_agent” indicates whether the coordinates were provided by the agent.