minilm testing

2026-04-08 17:27:11 +05:30
parent ec6fbe40e4
commit d227f37cc2
8 changed files with 24684 additions and 8 deletions
--- a/dataset_analysis.py
+++ b/dataset_analysis.py
@@ -55,7 +55,16 @@ if __name__ == "__main__":
    # number based menu to match file
    for file in annotated_dataset_list:
        print(f"{annotated_dataset_list.index(file)}: {file}")
-    _ = input("Enter the number of the annotated dataset to analyze: ")
-    path = annotated_dataset_list[int(_)]
-    counts = parse_annotated(path)
-    print(json.dumps(counts, indent=2))
+    print("a: all annotated datasets")
+    selection = input("Enter the number of the annotated dataset to analyze: ")
+    if selection.lower() == "a":
+        combined_counts = {label.name: 0 for label in TokenLabel}
+        for dataset_path in annotated_dataset_list:
+            counts = parse_annotated(dataset_path)
+            for label, value in counts.items():
+                combined_counts[label] += value
+        print(json.dumps(combined_counts, indent=2))
+    else:
+        path = annotated_dataset_list[int(selection)]
+        counts = parse_annotated(path)
+        print(json.dumps(counts, indent=2))