feat: Add screenshot processing adapters

2026-03-07 12:30:32 +05:30
parent 8715ebafe8
commit e6c3eec5bb
7 changed files with 1340 additions and 2 deletions
--- a/hgot.png
+++ b/hgot.png
--- a/main.py
+++ b/main.py
@@ -8,7 +8,6 @@ def main():

    backend = get_backend()
    path = backend.active_window()
-    print(path)


 if __name__ == "__main__":
--- a/moondream_loader.py
+++ b/moondream_loader.py
--- a/processors.py
+++ b/processors.py
@@ -0,0 +1,42 @@
+import requests
+from utils import img2base64
+
+
+class ProcessorAdapter:
+    def __init__(self, name, base_url, headers, payload):
+        self.name = name
+        self.base_url = base_url
+        self.headers = headers
+        self.payload = payload
+
+    def get_ai_response(self, image_path: str):
+        resp = requests.get(self.base_url, headers=self.headers, data=self.payload)
+
+        data = resp.json()
+
+        return data
+
+
+class MoondreamOnlineAdapter(ProcessorAdapter):
+    def __init__(self, api_key):
+        self.name = "Moondream Online"
+        self.base_url = "https://api.moondream.ai/v1/caption"
+        self.api_key = api_key
+
+        self.headers = {
+            "X-Moondream-Auth": self.api_key,
+            "Content-Type": "application/json",
+        }
+
+        super().__init__(self.name, self.base_url, self.headers, self.payload)
+
+    def get_ai_response(self, image_path: str):
+        encoded_image = img2base64(image_path)
+        self.payload = {
+            "image_url": encoded_image,
+            "question": """Delegated Task: {task}\n\nreturn json data: {
+"user_productive": bool,
+"screenshot_description": str
+}""",
+        }
+        return super().get_ai_response(encoded_image)
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,4 +4,14 @@ version = "0.1.0"
 description = "watcha doin' there? Your own digital invigilator making sure you're not distracted"
 readme = "README.md"
 requires-python = ">=3.12"
-dependencies = []
+dependencies = [
+ "transformers<5.3.0",
+ "accelerate",
+ "pillow<=11.0.0",
+ "pip>=26.0.1",
+ "torch>=2.10.0",
+ "einops>=0.8.2",
+ "sentencepiece>=0.2.1",
+ "requests>=2.32.5",
+ "jsonschema>=4.26.0",
+]
--- a/utils.py
+++ b/utils.py
@@ -0,0 +1,23 @@
+import base64
+import json
+from jsonschema import validate, ValidationError
+
+
+def img2base64(image_path: str) -> str:
+    """
+    Converts image to base64 encoded string
+    """
+
+    with open(image_path, "rb") as f:
+        base64_encoded = base64.b64encode(f.read()).decode("utf-8")
+
+    return f"data:image/png;base64,{base64_encoded}"
+
+
+def is_valid_json_schema(json_string: str, schema: dict) -> bool:
+    try:
+        data = json.loads(json_string)
+        validate(instance=data, schema=schema)
+        return True
+    except (json.JSONDecodeError, ValidationError):
+        return False
--- a/uv.lock
+++ b/uv.lock