I can't use it at this point. I tried with both deepseek-r1:7b and llama3.2:3b having bad results. This is most likley not the tool, but a i7-8700 setup using no GPU and giving it a rather complex task of reading folder to use an LLM to give a summary of each .py file.
The great part is that it has a build in debugging and error correction loop and using either a large computer able to hold a larger model in a RAM strong GPU or having an API Key and therefore money, I am sure it works.
It ran next to other tasks, but had to use my small LLM models.
DeepSeek gave me that, after some tries and it works in a python 3.13 environment:
#!/usr/bin/env python3
"""
Enhanced File Summarizer using Ollama (deepseek-r1:7b)
This script asks the user for a folder path, then processes all .py, .sh, and .json files
inside that folder. For each file, it generates a summary:
- Python files: parse the AST to extract imports, classes, functions, and docstrings,
then ask the LLM to summarise the purpose based on this structured information.
- Shell scripts: send the full script content (truncated to a safe length) with a prompt.
- JSON files: parse the JSON structure (keys, nesting) and send a condensed summary.
The summaries are saved in 'whoiswho.md' in the same folder, together with a timestamp.
"""
import os
import sys
import time
import ast
import json
import requests
from pathlib import Path
# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "deepseek-r1:7b"
MAX_CONTENT_CHARS = 10000 # For non‑Python files, truncate to this many chars (safety)
NUM_PREDICT = 500
TEMPERATURE = 0.3
DEBUG = True # Set to True to see debug output (full analysis preview)
# ----------------------------------------------------------------------
# Python file analysis (using AST visitor)
# ----------------------------------------------------------------------
class PythonAnalyzer(ast.NodeVisitor):
"""Visits an AST to collect imports, classes (with methods), and top‑level functions."""
def __init__(self):
self.imports = []
self.classes = [] # list of dicts: {name, docstring, methods}
self.functions = [] # list of dicts: {name, docstring}
self.current_class = None # name of the class we are currently inside
def visit_Import(self, node):
for alias in node.names:
self.imports.append(f"import {alias.name}" + (f" as {alias.asname}" if alias.asname else ""))
self.generic_visit(node)
def visit_ImportFrom(self, node):
module = node.module or ''
names = [f"{alias.name}" + (f" as {alias.asname}" if alias.asname else "") for alias in node.names]
self.imports.append(f"from {module} import {', '.join(names)}")
self.generic_visit(node)
def visit_ClassDef(self, node):
# Store previous class context
prev_class = self.current_class
self.current_class = node.name
# Collect methods (functions inside the class)
methods = []
for item in node.body:
if isinstance(item, ast.FunctionDef):
doc = ast.get_docstring(item) or ""
methods.append(f" def {item.name}(...): {doc[:100] if doc else 'no docstring'}")
doc = ast.get_docstring(node) or ""
self.classes.append({
"name": node.name,
"docstring": doc[:200],
"methods": methods
})
# Recurse into the class body (to handle nested classes, etc.)
self.generic_visit(node)
# Restore previous class context
self.current_class = prev_class
def visit_FunctionDef(self, node):
# Only record functions that are not inside a class (top‑level)
if self.current_class is None:
doc = ast.get_docstring(node) or ""
self.functions.append({
"name": node.name,
"docstring": doc[:200]
})
# Recurse into the function body (to handle nested functions, but they won't be top‑level)
self.generic_visit(node)
def analyze_python_file(file_path):
"""
Parse a Python file using ast and return a structured summary as text.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
source = f.read()
except Exception as e:
return f"[ERROR reading file: {e}]"
try:
tree = ast.parse(source)
except SyntaxError as e:
return f"[SyntaxError in Python file: {e}]"
analyzer = PythonAnalyzer()
analyzer.visit(tree)
# Build compact representation for the LLM
lines = []
if analyzer.imports:
lines.append("Imports:")
for imp in analyzer.imports:
lines.append(f"- {imp}")
if analyzer.classes:
lines.append("\nClasses:")
for cls in analyzer.classes:
lines.append(f"- {cls['name']}: {cls['docstring']}")
for m in cls['methods']:
lines.append(f" {m}")
if analyzer.functions:
lines.append("\nTop‑level functions:")
for func in analyzer.functions:
lines.append(f"- {func['name']}: {func['docstring']}")
result = "\n".join(lines)
if not result.strip():
return "No imports, classes, or functions found."
return result
# ----------------------------------------------------------------------
# Helper functions for other file types
# ----------------------------------------------------------------------
def read_file_safely(file_path, max_chars=MAX_CONTENT_CHARS):
"""Read file content, return truncated string if needed."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if max_chars and len(content) > max_chars:
content = content[:max_chars] + "\n... (truncated)"
return content
except Exception as e:
return f"[ERROR reading file: {e}]"
def summarize_json(file_path):
"""Parse JSON and create a summary of keys/values."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Simple summary: top-level keys and types
if isinstance(data, dict):
summary = "Top-level keys:\n"
for k, v in data.items():
typ = type(v).__name__
summary += f"- {k} ({typ})\n"
if typ == 'list' and len(v) > 0:
summary += f" (list of {len(v)} items, first item type: {type(v[0]).__name__})\n"
elif typ == 'dict' and len(v) > 0:
summary += f" (dict with {len(v)} keys)\n"
elif isinstance(data, list):
summary = f"Top-level array of {len(data)} items.\n"
if data:
summary += f"First item type: {type(data[0]).__name__}\n"
else:
summary = f"JSON data type: {type(data).__name__}\nValue: {str(data)[:200]}\n"
return summary
except Exception as e:
return f"[ERROR parsing JSON: {e}]"
# ----------------------------------------------------------------------
# LLM summarization
# ----------------------------------------------------------------------
def summarize_with_ollama(file_name, content):
"""
Send the content (structured summary or raw text) to Ollama and ask for a concise summary.
"""
if not content:
return "[Empty content]"
prompt = (
f"Please provide a concise summary (a few sentences) of the file named '{file_name}'. "
f"Focus on its purpose, main functionality, and key components.\n\n"
f"Content to summarise:\n{content}\n\n"
f"Summary:"
)
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": NUM_PREDICT,
"temperature": TEMPERATURE
}
}
try:
if DEBUG:
print(f" [DEBUG] Sending request to Ollama for {file_name}...")
response = requests.post(OLLAMA_URL, json=payload, timeout=None)
response.raise_for_status()
result = response.json()
summary = result.get("response", "").strip()
if summary.lower().startswith("summary:"):
trimmed = summary[len("summary:"):].strip()
if trimmed:
summary = trimmed
if not summary:
summary = "[No summary generated]"
return summary
except Exception as e:
return f"[ERROR calling Ollama: {e}]"
# ----------------------------------------------------------------------
# Main processing
# ----------------------------------------------------------------------
def process_file(file_path):
"""Analyze the file and return a summary string."""
ext = file_path.suffix.lower()
if ext == '.py':
content = analyze_python_file(file_path)
if DEBUG:
# Show the full content (or up to 1000 chars) for debugging
preview = content[:1000] + ("..." if len(content) > 1000 else "")
print(f" [DEBUG] Python analysis result (first 1000 chars):\n{preview}\n")
elif ext == '.sh':
content = read_file_safely(file_path, MAX_CONTENT_CHARS)
elif ext == '.json':
content = summarize_json(file_path)
else:
content = "Unsupported file type."
# If analysis failed or is empty, fallback to raw content
if not content or content.startswith("[ERROR"):
content = read_file_safely(file_path, MAX_CONTENT_CHARS)
summary = summarize_with_ollama(file_path.name, content)
return summary
def ask_folder():
"""Ask user for a folder path and return a Path object."""
while True:
folder = input("Enter the folder path to summarize: ").strip()
if not folder:
print("No path entered. Please try again.")
continue
path = Path(folder).expanduser().resolve()
if not path.is_dir():
print(f"Error: '{path}' is not a valid directory.")
continue
return path
def is_target_file(filename):
return filename.suffix.lower() in {'.py', '.sh', '.json'}
def generate_markdown(folder_path, summaries):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
lines = [
f"# File Summaries – {folder_path.name}",
"",
f"**Generated:** {timestamp}",
"",
"## Files",
""
]
for file_path, summary in summaries:
lines.append(f"### `{file_path.name}`")
lines.append("")
lines.append(summary)
lines.append("")
lines.append("---")
lines.append("")
return "\n".join(lines)
def main():
print("=== Enhanced File Summarizer with Ollama (deepseek-r1:7b) ===\n")
folder = ask_folder()
files_to_process = [f for f in folder.iterdir() if f.is_file() and is_target_file(f)]
if not files_to_process:
print("No .py, .sh, or .json files found in the folder.")
return
print(f"Found {len(files_to_process)} file(s) to summarize.\n")
summaries = []
for idx, file_path in enumerate(files_to_process, 1):
print(f"[{idx}/{len(files_to_process)}] Processing {file_path.name} ...")
summary = process_file(file_path)
summaries.append((file_path, summary))
print(f" [DONE] Summary length: {len(summary)} chars\n")
output_file = folder / "whoiswho.md"
markdown_content = generate_markdown(folder, summaries)
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(markdown_content)
print(f"\nSummaries saved to: {output_file}")
except Exception as e:
print(f"Error writing output file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
"""
Enhanced File Summarizer using Ollama (deepseek-r1:7b)
This script asks the user for a folder path, then processes all .py, .sh, and .json files
inside that folder. For each file, it generates a summary:
- Python files: parse the AST to extract imports, classes, functions, and docstrings,
then ask the LLM to summarise the purpose based on this structured information.
- Shell scripts: send the full script content (truncated to a safe length) with a prompt.
- JSON files: parse the JSON structure (keys, nesting) and send a condensed summary.
The summaries are saved in 'whoiswho.md' in the same folder, together with a timestamp.
"""
import os
import sys
import time
import ast
import json
import requests
from pathlib import Path
# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "deepseek-r1:7b"
MAX_CONTENT_CHARS = 10000 # For non‑Python files, truncate to this many chars (safety)
NUM_PREDICT = 500
TEMPERATURE = 0.3
DEBUG = True # Set to True to see debug output (full analysis preview)
# ----------------------------------------------------------------------
# Python file analysis (using AST visitor)
# ----------------------------------------------------------------------
class PythonAnalyzer(ast.NodeVisitor):
"""Visits an AST to collect imports, classes (with methods), and top‑level functions."""
def __init__(self):
self.imports = []
self.classes = [] # list of dicts: {name, docstring, methods}
self.functions = [] # list of dicts: {name, docstring}
self.current_class = None # name of the class we are currently inside
def visit_Import(self, node):
for alias in node.names:
self.imports.append(f"import {alias.name}" + (f" as {alias.asname}" if alias.asname else ""))
self.generic_visit(node)
def visit_ImportFrom(self, node):
module = node.module or ''
names = [f"{alias.name}" + (f" as {alias.asname}" if alias.asname else "") for alias in node.names]
self.imports.append(f"from {module} import {', '.join(names)}")
self.generic_visit(node)
def visit_ClassDef(self, node):
# Store previous class context
prev_class = self.current_class
self.current_class = node.name
# Collect methods (functions inside the class)
methods = []
for item in node.body:
if isinstance(item, ast.FunctionDef):
doc = ast.get_docstring(item) or ""
methods.append(f" def {item.name}(...): {doc[:100] if doc else 'no docstring'}")
doc = ast.get_docstring(node) or ""
self.classes.append({
"name": node.name,
"docstring": doc[:200],
"methods": methods
})
# Recurse into the class body (to handle nested classes, etc.)
self.generic_visit(node)
# Restore previous class context
self.current_class = prev_class
def visit_FunctionDef(self, node):
# Only record functions that are not inside a class (top‑level)
if self.current_class is None:
doc = ast.get_docstring(node) or ""
self.functions.append({
"name": node.name,
"docstring": doc[:200]
})
# Recurse into the function body (to handle nested functions, but they won't be top‑level)
self.generic_visit(node)
def analyze_python_file(file_path):
"""
Parse a Python file using ast and return a structured summary as text.
"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
source = f.read()
except Exception as e:
return f"[ERROR reading file: {e}]"
try:
tree = ast.parse(source)
except SyntaxError as e:
return f"[SyntaxError in Python file: {e}]"
analyzer = PythonAnalyzer()
analyzer.visit(tree)
# Build compact representation for the LLM
lines = []
if analyzer.imports:
lines.append("Imports:")
for imp in analyzer.imports:
lines.append(f"- {imp}")
if analyzer.classes:
lines.append("\nClasses:")
for cls in analyzer.classes:
lines.append(f"- {cls['name']}: {cls['docstring']}")
for m in cls['methods']:
lines.append(f" {m}")
if analyzer.functions:
lines.append("\nTop‑level functions:")
for func in analyzer.functions:
lines.append(f"- {func['name']}: {func['docstring']}")
result = "\n".join(lines)
if not result.strip():
return "No imports, classes, or functions found."
return result
# ----------------------------------------------------------------------
# Helper functions for other file types
# ----------------------------------------------------------------------
def read_file_safely(file_path, max_chars=MAX_CONTENT_CHARS):
"""Read file content, return truncated string if needed."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
if max_chars and len(content) > max_chars:
content = content[:max_chars] + "\n... (truncated)"
return content
except Exception as e:
return f"[ERROR reading file: {e}]"
def summarize_json(file_path):
"""Parse JSON and create a summary of keys/values."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Simple summary: top-level keys and types
if isinstance(data, dict):
summary = "Top-level keys:\n"
for k, v in data.items():
typ = type(v).__name__
summary += f"- {k} ({typ})\n"
if typ == 'list' and len(v) > 0:
summary += f" (list of {len(v)} items, first item type: {type(v[0]).__name__})\n"
elif typ == 'dict' and len(v) > 0:
summary += f" (dict with {len(v)} keys)\n"
elif isinstance(data, list):
summary = f"Top-level array of {len(data)} items.\n"
if data:
summary += f"First item type: {type(data[0]).__name__}\n"
else:
summary = f"JSON data type: {type(data).__name__}\nValue: {str(data)[:200]}\n"
return summary
except Exception as e:
return f"[ERROR parsing JSON: {e}]"
# ----------------------------------------------------------------------
# LLM summarization
# ----------------------------------------------------------------------
def summarize_with_ollama(file_name, content):
"""
Send the content (structured summary or raw text) to Ollama and ask for a concise summary.
"""
if not content:
return "[Empty content]"
prompt = (
f"Please provide a concise summary (a few sentences) of the file named '{file_name}'. "
f"Focus on its purpose, main functionality, and key components.\n\n"
f"Content to summarise:\n{content}\n\n"
f"Summary:"
)
payload = {
"model": MODEL_NAME,
"prompt": prompt,
"stream": False,
"options": {
"num_predict": NUM_PREDICT,
"temperature": TEMPERATURE
}
}
try:
if DEBUG:
print(f" [DEBUG] Sending request to Ollama for {file_name}...")
response = requests.post(OLLAMA_URL, json=payload, timeout=None)
response.raise_for_status()
result = response.json()
summary = result.get("response", "").strip()
if summary.lower().startswith("summary:"):
trimmed = summary[len("summary:"):].strip()
if trimmed:
summary = trimmed
if not summary:
summary = "[No summary generated]"
return summary
except Exception as e:
return f"[ERROR calling Ollama: {e}]"
# ----------------------------------------------------------------------
# Main processing
# ----------------------------------------------------------------------
def process_file(file_path):
"""Analyze the file and return a summary string."""
ext = file_path.suffix.lower()
if ext == '.py':
content = analyze_python_file(file_path)
if DEBUG:
# Show the full content (or up to 1000 chars) for debugging
preview = content[:1000] + ("..." if len(content) > 1000 else "")
print(f" [DEBUG] Python analysis result (first 1000 chars):\n{preview}\n")
elif ext == '.sh':
content = read_file_safely(file_path, MAX_CONTENT_CHARS)
elif ext == '.json':
content = summarize_json(file_path)
else:
content = "Unsupported file type."
# If analysis failed or is empty, fallback to raw content
if not content or content.startswith("[ERROR"):
content = read_file_safely(file_path, MAX_CONTENT_CHARS)
summary = summarize_with_ollama(file_path.name, content)
return summary
def ask_folder():
"""Ask user for a folder path and return a Path object."""
while True:
folder = input("Enter the folder path to summarize: ").strip()
if not folder:
print("No path entered. Please try again.")
continue
path = Path(folder).expanduser().resolve()
if not path.is_dir():
print(f"Error: '{path}' is not a valid directory.")
continue
return path
def is_target_file(filename):
return filename.suffix.lower() in {'.py', '.sh', '.json'}
def generate_markdown(folder_path, summaries):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
lines = [
f"# File Summaries – {folder_path.name}",
"",
f"**Generated:** {timestamp}",
"",
"## Files",
""
]
for file_path, summary in summaries:
lines.append(f"### `{file_path.name}`")
lines.append("")
lines.append(summary)
lines.append("")
lines.append("---")
lines.append("")
return "\n".join(lines)
def main():
print("=== Enhanced File Summarizer with Ollama (deepseek-r1:7b) ===\n")
folder = ask_folder()
files_to_process = [f for f in folder.iterdir() if f.is_file() and is_target_file(f)]
if not files_to_process:
print("No .py, .sh, or .json files found in the folder.")
return
print(f"Found {len(files_to_process)} file(s) to summarize.\n")
summaries = []
for idx, file_path in enumerate(files_to_process, 1):
print(f"[{idx}/{len(files_to_process)}] Processing {file_path.name} ...")
summary = process_file(file_path)
summaries.append((file_path, summary))
print(f" [DONE] Summary length: {len(summary)} chars\n")
output_file = folder / "whoiswho.md"
markdown_content = generate_markdown(folder, summaries)
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(markdown_content)
print(f"\nSummaries saved to: {output_file}")
except Exception as e:
print(f"Error writing output file: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
#cyberpunkcoltoure