feat: Add screenshot processing adapters

This commit is contained in:
2026-03-07 12:30:32 +05:30
parent 8715ebafe8
commit e6c3eec5bb
7 changed files with 1340 additions and 2 deletions

BIN
hgot.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 KiB

View File

@@ -8,7 +8,6 @@ def main():
backend = get_backend()
path = backend.active_window()
print(path)
if __name__ == "__main__":

0
moondream_loader.py Normal file
View File

42
processors.py Normal file
View File

@@ -0,0 +1,42 @@
import requests
from utils import img2base64
class ProcessorAdapter:
def __init__(self, name, base_url, headers, payload):
self.name = name
self.base_url = base_url
self.headers = headers
self.payload = payload
def get_ai_response(self, image_path: str):
resp = requests.get(self.base_url, headers=self.headers, data=self.payload)
data = resp.json()
return data
class MoondreamOnlineAdapter(ProcessorAdapter):
def __init__(self, api_key):
self.name = "Moondream Online"
self.base_url = "https://api.moondream.ai/v1/caption"
self.api_key = api_key
self.headers = {
"X-Moondream-Auth": self.api_key,
"Content-Type": "application/json",
}
super().__init__(self.name, self.base_url, self.headers, self.payload)
def get_ai_response(self, image_path: str):
encoded_image = img2base64(image_path)
self.payload = {
"image_url": encoded_image,
"question": """Delegated Task: {task}\n\nreturn json data: {
"user_productive": bool,
"screenshot_description": str
}""",
}
return super().get_ai_response(encoded_image)

View File

@@ -4,4 +4,14 @@ version = "0.1.0"
description = "watcha doin' there? Your own digital invigilator making sure you're not distracted"
readme = "README.md"
requires-python = ">=3.12"
dependencies = []
dependencies = [
"transformers<5.3.0",
"accelerate",
"pillow<=11.0.0",
"pip>=26.0.1",
"torch>=2.10.0",
"einops>=0.8.2",
"sentencepiece>=0.2.1",
"requests>=2.32.5",
"jsonschema>=4.26.0",
]

23
utils.py Normal file
View File

@@ -0,0 +1,23 @@
import base64
import json
from jsonschema import validate, ValidationError
def img2base64(image_path: str) -> str:
"""
Converts image to base64 encoded string
"""
with open(image_path, "rb") as f:
base64_encoded = base64.b64encode(f.read()).decode("utf-8")
return f"data:image/png;base64,{base64_encoded}"
def is_valid_json_schema(json_string: str, schema: dict) -> bool:
try:
data = json.loads(json_string)
validate(instance=data, schema=schema)
return True
except (json.JSONDecodeError, ValidationError):
return False

1264
uv.lock generated

File diff suppressed because it is too large Load Diff