OpenAdaptAI
diff --git a/‎.flake8‎
Lines changed: 1 addition & 3 deletions b/‎.flake8‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/main.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎experiments/fastsamsom.py‎
Lines changed: 104 additions & 0 deletions b/‎experiments/fastsamsom.py‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎experiments/gpt4o_seg.py‎
Lines changed: 87 additions & 0 deletions b/‎experiments/gpt4o_seg.py‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎experiments/handle_similar_segments.py‎
Lines changed: 129 additions & 0 deletions b/‎experiments/handle_similar_segments.py‎
Lines changed: 129 additions & 0 deletions
diff --git a/‎install/install_openadapt.ps1‎
Lines changed: 1 addition & 1 deletion b/‎install/install_openadapt.ps1‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎install/install_openadapt.sh‎
Lines changed: 1 addition & 1 deletion b/‎install/install_openadapt.sh‎
Lines changed: 1 addition & 1 deletion
@@ -1,7 +1,5 @@
 [flake8]
-exclude =
-    alembic/versions,
-    .venv
+exclude = alembic,.venv,venv,contrib,.cache,.git
 docstring-convention = google
 max-line-length = 88
 extend-ignore = ANN101, E203
@@ -60,7 +60,7 @@ jobs:
         if: steps.cache-deps.outputs.cache-hit == 'true'
 
       - name: Check formatting with Black
-        run: poetry run black --preview --check . --exclude '/(alembic|\.venv)/'
+        run: poetry run black --preview --check . --exclude '/(alembic|\.cache|\.venv|venv|contrib|__pycache__)/'
 
       - name: Run Flake8
-        run: poetry run flake8 --exclude=alembic,.venv,*/.cache
+        run: poetry run flake8 --exclude=alembic,.venv,venv,contrib,.cache,.git
@@ -99,6 +99,7 @@ pip3 install poetry
 poetry install
 poetry shell
 poetry run install-dashboard
+poetry run postinstall
 cd openadapt && alembic upgrade head && cd ..
 pytest
 ```
 
@@ -0,0 +1,104 @@
+"""SoM with Ultralytics FastSAM."""
+
+from pprint import pformat
+
+from loguru import logger
+from PIL import Image
+import numpy as np
+
+from openadapt import adapters, config, contrib, utils, vision
+
+
+CONTRAST_FACTOR = 10000
+DEBUG = False
+
+
+def main() -> None:
+    """Main."""
+    image_file_path = config.ROOT_DIR_PATH / "../tests/assets/excel.png"
+    image = Image.open(image_file_path)
+    if DEBUG:
+        image.show()
+
+    image_contrasted = utils.increase_contrast(image, CONTRAST_FACTOR)
+    if DEBUG:
+        image_contrasted.show()
+
+    segmentation_adapter = adapters.get_default_segmentation_adapter()
+    segmented_image = segmentation_adapter.fetch_segmented_image(image)
+    if DEBUG:
+        segmented_image.show()
+
+    masks = vision.get_masks_from_segmented_image(segmented_image, sort_by_area=True)
+    # refined_masks = vision.refine_masks(masks)
+
+    image_arr = np.asarray(image)
+
+    # https://github.com/microsoft/SoM/blob/main/task_adapter/sam/tasks/inference_sam_m2m_auto.py
+    # metadata = MetadataCatalog.get('coco_2017_train_panoptic')
+    metadata = None
+    visual = contrib.som.visualizer.Visualizer(image_arr, metadata=metadata)
+    mask_map = np.zeros(image_arr.shape, dtype=np.uint8)
+    label_mode = "1"
+    alpha = 0.1
+    anno_mode = [
+        "Mask",
+        # 'Mark',
+    ]
+    for i, mask in enumerate(masks):
+        label = i + 1
+        demo = visual.draw_binary_mask_with_number(
+            mask,
+            text=str(label),
+            label_mode=label_mode,
+            alpha=alpha,
+            anno_mode=anno_mode,
+        )
+        mask_map[mask == 1] = label
+
+    im = demo.get_image()
+    image_som = Image.fromarray(im)
+    image_som.show()
+
+    results = []
+
+    prompt_adapter = adapters.get_default_prompt_adapter()
+    text = (
+        "What are the values of the dates in the leftmost column? What about the"
+        " horizontal column headings?"
+    )
+    output = prompt_adapter.prompt(
+        text,
+        images=[
+            # no marks seem to perform just as well as with marks on spreadsheets
+            # image_som,
+            image,
+        ],
+    )
+    logger.info(output)
+    results.append((text, output))
+
+    text = "\n".join(
+        [
+            (
+                "Consider the dates along the leftmost column and the horizontal"
+                " column headings:"
+            ),
+            output,
+            "What are the values in the corresponding cells?",
+        ]
+    )
+    output = prompt_adapter.prompt(text, images=[image_som])
+    logger.info(output)
+    results.append((text, output))
+
+    text = "What are the contents of cells A2, B2, and C2?"
+    output = prompt_adapter.prompt(text, images=[image_som])
+    logger.info(output)
+    results.append((text, output))
+
+    logger.info(f"results=\n{pformat(results)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,87 @@
+"""Generate segmentations directly with LLM."""
+
+from pprint import pformat
+import os
+import sys
+import time
+
+from loguru import logger
+from PIL import Image
+
+from openadapt import cache, config, models, plotting, utils
+from openadapt.adapters import openai
+
+
+@cache.cache(force_refresh=False)
+def get_window_image(window_search_str: str) -> tuple:
+    """Get window image."""
+    logger.info(f"Waiting for window with title containing {window_search_str=}...")
+    while True:
+        window_event = models.WindowEvent.get_active_window_event()
+        window_title = window_event.title
+        if window_search_str.lower() in window_title.lower():
+            logger.info(f"found {window_title=}")
+            break
+        time.sleep(0.1)
+
+    screenshot = models.Screenshot.take_screenshot()
+    image = screenshot.crop_active_window(window_event=window_event)
+    return window_event, image
+
+
+def main(window_search_str: str | None) -> None:
+    """Main."""
+    if window_search_str:
+        window_event, image = get_window_image(window_search_str)
+        window_dict = window_event.to_prompt_dict()
+        window_dict = utils.normalize_positions(
+            window_dict, -window_event.left, -window_event.top
+        )
+    else:
+        image_file_path = os.path.join(
+            config.ROOT_DIR_PATH, "../tests/assets/calculator.png"
+        )
+        image = Image.open(image_file_path)
+        window_dict = None
+
+    system_prompt = utils.render_template_from_file(
+        "prompts/system.j2",
+    )
+
+    if window_dict:
+        window_prompt = (
+            f"Consider the corresponding window state:\n```{pformat(window_dict)}```"
+        )
+    else:
+        window_prompt = ""
+
+    prompt = f"""You are a master GUI understander.
+Your task is to locate all interactable elements in the supplied screenshot.
+{window_prompt}
+Return JSON containing an array of segments with the following properties:
+- "name": a unique identifier
+- "description": enough context to be able to differentiate between similar segments
+- "top": top coordinate of bounding box
+- "left": left coordinate of bounding box
+- "width": width of bouding box
+- "height": height of bounding box
+Provide as much detail as possible. My career depends on this. Lives are at stake.
+Respond with JSON ONLY AND NOTHING ELSE.
+"""
+
+    result = openai.prompt(
+        prompt,
+        system_prompt,
+        [image],
+    )
+    segment_dict = utils.parse_code_snippet(result)
+    plotting.plot_segments(image, segment_dict)
+
+    window_dict = window_event.to_prompt_dict()
+    import ipdb
+
+    ipdb.set_trace()
+
+
+if __name__ == "__main__":
+    main(sys.argv[1])
@@ -0,0 +1,129 @@
+"""Handle similar segments."""
+
+import os
+
+from PIL import Image
+from loguru import logger
+
+from openadapt import adapters, cache, config, plotting, utils, vision
+
+
+DEBUG = True
+MIN_SEGMENT_SSIM = 0.95  # threshold for considering segments structurally similar
+MIN_SEGMENT_SIZE_SIM = 0.95  # threshold for considering segment sizes similar
+
+
+# TODO: consolidate with strategies.visual.get_window_segmentation
+@cache.cache(enabled=not DEBUG)
+def get_similar_segment_groups(
+    image_file_path: str,
+    min_segment_ssim: float = MIN_SEGMENT_SSIM,
+    min_segment_size_sim: float = MIN_SEGMENT_SIZE_SIM,
+    show_images: bool = DEBUG,
+    contrast_factor: int = 10000,
+) -> tuple:
+    """Get similar segment groups."""
+    image = Image.open(image_file_path)
+    image.show()
+
+    if contrast_factor:
+        image = utils.increase_contrast(image, contrast_factor)
+        image.show()
+
+    segmentation_adapter = adapters.get_default_segmentation_adapter()
+    segmented_image = segmentation_adapter.fetch_segmented_image(image)
+    if show_images:
+        segmented_image.show()
+
+    import ipdb
+
+    ipdb.set_trace()
+
+    masks = vision.get_masks_from_segmented_image(segmented_image)
+    logger.info(f"{len(masks)=}")
+    if show_images:
+        plotting.display_binary_images_grid(masks)
+
+    refined_masks = vision.refine_masks(masks)
+    logger.info(f"{len(refined_masks)=}")
+    if show_images:
+        plotting.display_binary_images_grid(refined_masks)
+
+    masked_images = vision.extract_masked_images(image, refined_masks)
+    descriptions = ["" for _ in masked_images]
+    if show_images:
+        plotting.display_images_table_with_titles(masked_images, descriptions)
+
+    similar_idx_groups, ungrouped_idxs, ssim_matrix, _ = vision.get_similar_image_idxs(
+        masked_images,
+        min_segment_ssim,
+        min_segment_size_sim,
+    )
+    logger.info(f"{len(similar_idx_groups)=}")
+
+    return (
+        image,
+        masked_images,
+        refined_masks,
+        similar_idx_groups,
+        ungrouped_idxs,
+        ssim_matrix,
+    )
+
+
+def main() -> None:
+    """Main."""
+    image_file_path = os.path.join(config.ROOT_DIR_PATH, "../tests/assets/excel.png")
+
+    MAX_GROUPS = 2
+
+    for min_segment_ssim in (MIN_SEGMENT_SSIM, MIN_SEGMENT_SSIM // 3):
+        logger.info(f"{min_segment_ssim=}")
+        image, masked_images, masks, similar_idx_groups, ungrouped_idxs, ssim_matrix = (
+            get_similar_segment_groups(image_file_path)
+        )
+        similar_idx_groups = sorted(
+            similar_idx_groups,
+            key=lambda group: len(group),
+            reverse=True,
+        )
+        if MAX_GROUPS:
+            similar_idx_groups = similar_idx_groups[:MAX_GROUPS]
+        plotting.plot_similar_image_groups(
+            masked_images,
+            similar_idx_groups,
+            ssim_matrix,
+            [
+                f"min_ssim={MIN_SEGMENT_SSIM}",
+                f"min_size_sim={MIN_SEGMENT_SIZE_SIM}",
+            ],
+        )
+
+        """
+        - images:
+            - original
+            - one segment mask
+            - multiple segment masks
+            - original with one segment highlighted
+            - original with multiple segments highlighted
+            - original with one segment labelled
+            - original with multiple segments labelled
+            - original with one segment highlighted+labelled
+            - original with multiple segments highlighted+labelled
+            - individual segment
+            - individual segment labelled
+        - one or multiple segments per prompt
+        """
+        for similar_idx_group in similar_idx_groups:
+            similar_masks = [masks[idx] for idx in similar_idx_group]
+            highlighted_image = plotting.highlight_masks(image, similar_masks)
+            highlighted_image.show()
+
+        import ipdb
+
+        ipdb.set_trace()
+        foo = 1  # noqa
+
+
+if __name__ == "__main__":
+    main()
@@ -360,7 +360,7 @@ RunAndCheck "git clone -q https://github.com/MLDSAI/OpenAdapt.git" "clone git re
 Set-Location .\OpenAdapt
 RunAndCheck "pip install poetry" "Run ``pip install poetry``"
 RunAndCheck "poetry install" "Run ``poetry install``"
-RunAndCheck "poetry run install-dashboard" "Install dashboard dependencies" -SkipCleanup:$true
+RunAndCheck "poetry run postinstall" "Install other dependencies" -SkipCleanup:$true
 RunAndCheck "cd openadapt"
 RunAndCheck "poetry run alembic upgrade head" "Run ``alembic upgrade head``" -SkipCleanup:$true
 RunAndCheck "cd .."
 
@@ -157,7 +157,7 @@ RunAndCheck "git checkout $BRANCH" "Checkout branch $BRANCH"
 
 RunAndCheck "pip3.10 install poetry" "Install Poetry"
 RunAndCheck "poetry install" "Install Python dependencies"
-RunAndCheck "poetry run install-dashboard" "Install dashboard dependencies"
+RunAndCheck "poetry run postinstall" "Install other dependencies"
 RunAndCheck "cd openadapt"
 RunAndCheck "poetry run alembic upgrade head" "Update database"
 RunAndCheck "cd .."