Friday, 27 March 2026

AI - Status Update

 open-interpreter? 

I can't use it at this point. I tried with both  deepseek-r1:7b and llama3.2:3b having bad results. This is most likley not the tool, but a i7-8700 setup using no GPU and giving it a rather complex task of reading folder to use an LLM to give a summary of each .py file.
 
The great part is that it has a build in debugging and error correction loop and using either a large computer able to hold a larger model in a RAM strong GPU or having an API Key and therefore money, I am sure it works.
It ran next to other tasks, but had to use my small LLM models.

DeepSeek gave me that, after some tries and it works in a python 3.13 environment:
 
 #!/usr/bin/env python3
"""
Enhanced File Summarizer using Ollama (deepseek-r1:7b)

This script asks the user for a folder path, then processes all .py, .sh, and .json files
inside that folder. For each file, it generates a summary:

- Python files: parse the AST to extract imports, classes, functions, and docstrings,
  then ask the LLM to summarise the purpose based on this structured information.
- Shell scripts: send the full script content (truncated to a safe length) with a prompt.
- JSON files: parse the JSON structure (keys, nesting) and send a condensed summary.

The summaries are saved in 'whoiswho.md' in the same folder, together with a timestamp.
"""

import os
import sys
import time
import ast
import json
import requests
from pathlib import Path

# ----------------------------------------------------------------------
# Configuration
# ----------------------------------------------------------------------
OLLAMA_URL = "http://localhost:11434/api/generate"
MODEL_NAME = "deepseek-r1:7b"
MAX_CONTENT_CHARS = 10000         # For non‑Python files, truncate to this many chars (safety)
NUM_PREDICT = 500
TEMPERATURE = 0.3
DEBUG = True                      # Set to True to see debug output (full analysis preview)

# ----------------------------------------------------------------------
# Python file analysis (using AST visitor)
# ----------------------------------------------------------------------
class PythonAnalyzer(ast.NodeVisitor):
    """Visits an AST to collect imports, classes (with methods), and top‑level functions."""
    def __init__(self):
        self.imports = []
        self.classes = []          # list of dicts: {name, docstring, methods}
        self.functions = []        # list of dicts: {name, docstring}
        self.current_class = None  # name of the class we are currently inside

    def visit_Import(self, node):
        for alias in node.names:
            self.imports.append(f"import {alias.name}" + (f" as {alias.asname}" if alias.asname else ""))
        self.generic_visit(node)

    def visit_ImportFrom(self, node):
        module = node.module or ''
        names = [f"{alias.name}" + (f" as {alias.asname}" if alias.asname else "") for alias in node.names]
        self.imports.append(f"from {module} import {', '.join(names)}")
        self.generic_visit(node)

    def visit_ClassDef(self, node):
        # Store previous class context
        prev_class = self.current_class
        self.current_class = node.name

        # Collect methods (functions inside the class)
        methods = []
        for item in node.body:
            if isinstance(item, ast.FunctionDef):
                doc = ast.get_docstring(item) or ""
                methods.append(f"  def {item.name}(...): {doc[:100] if doc else 'no docstring'}")

        doc = ast.get_docstring(node) or ""
        self.classes.append({
            "name": node.name,
            "docstring": doc[:200],
            "methods": methods
        })

        # Recurse into the class body (to handle nested classes, etc.)
        self.generic_visit(node)

        # Restore previous class context
        self.current_class = prev_class

    def visit_FunctionDef(self, node):
        # Only record functions that are not inside a class (top‑level)
        if self.current_class is None:
            doc = ast.get_docstring(node) or ""
            self.functions.append({
                "name": node.name,
                "docstring": doc[:200]
            })
        # Recurse into the function body (to handle nested functions, but they won't be top‑level)
        self.generic_visit(node)

def analyze_python_file(file_path):
    """
    Parse a Python file using ast and return a structured summary as text.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            source = f.read()
    except Exception as e:
        return f"[ERROR reading file: {e}]"

    try:
        tree = ast.parse(source)
    except SyntaxError as e:
        return f"[SyntaxError in Python file: {e}]"

    analyzer = PythonAnalyzer()
    analyzer.visit(tree)

    # Build compact representation for the LLM
    lines = []
    if analyzer.imports:
        lines.append("Imports:")
        for imp in analyzer.imports:
            lines.append(f"- {imp}")
    if analyzer.classes:
        lines.append("\nClasses:")
        for cls in analyzer.classes:
            lines.append(f"- {cls['name']}: {cls['docstring']}")
            for m in cls['methods']:
                lines.append(f"  {m}")
    if analyzer.functions:
        lines.append("\nTop‑level functions:")
        for func in analyzer.functions:
            lines.append(f"- {func['name']}: {func['docstring']}")

    result = "\n".join(lines)
    if not result.strip():
        return "No imports, classes, or functions found."
    return result

# ----------------------------------------------------------------------
# Helper functions for other file types
# ----------------------------------------------------------------------
def read_file_safely(file_path, max_chars=MAX_CONTENT_CHARS):
    """Read file content, return truncated string if needed."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        if max_chars and len(content) > max_chars:
            content = content[:max_chars] + "\n... (truncated)"
        return content
    except Exception as e:
        return f"[ERROR reading file: {e}]"

def summarize_json(file_path):
    """Parse JSON and create a summary of keys/values."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        # Simple summary: top-level keys and types
        if isinstance(data, dict):
            summary = "Top-level keys:\n"
            for k, v in data.items():
                typ = type(v).__name__
                summary += f"- {k} ({typ})\n"
                if typ == 'list' and len(v) > 0:
                    summary += f"  (list of {len(v)} items, first item type: {type(v[0]).__name__})\n"
                elif typ == 'dict' and len(v) > 0:
                    summary += f"  (dict with {len(v)} keys)\n"
        elif isinstance(data, list):
            summary = f"Top-level array of {len(data)} items.\n"
            if data:
                summary += f"First item type: {type(data[0]).__name__}\n"
        else:
            summary = f"JSON data type: {type(data).__name__}\nValue: {str(data)[:200]}\n"
        return summary
    except Exception as e:
        return f"[ERROR parsing JSON: {e}]"

# ----------------------------------------------------------------------
# LLM summarization
# ----------------------------------------------------------------------
def summarize_with_ollama(file_name, content):
    """
    Send the content (structured summary or raw text) to Ollama and ask for a concise summary.
    """
    if not content:
        return "[Empty content]"

    prompt = (
        f"Please provide a concise summary (a few sentences) of the file named '{file_name}'. "
        f"Focus on its purpose, main functionality, and key components.\n\n"
        f"Content to summarise:\n{content}\n\n"
        f"Summary:"
    )

    payload = {
        "model": MODEL_NAME,
        "prompt": prompt,
        "stream": False,
        "options": {
            "num_predict": NUM_PREDICT,
            "temperature": TEMPERATURE
        }
    }

    try:
        if DEBUG:
            print(f"  [DEBUG] Sending request to Ollama for {file_name}...")
        response = requests.post(OLLAMA_URL, json=payload, timeout=None)
        response.raise_for_status()
        result = response.json()
        summary = result.get("response", "").strip()
        if summary.lower().startswith("summary:"):
            trimmed = summary[len("summary:"):].strip()
            if trimmed:
                summary = trimmed
        if not summary:
            summary = "[No summary generated]"
        return summary
    except Exception as e:
        return f"[ERROR calling Ollama: {e}]"

# ----------------------------------------------------------------------
# Main processing
# ----------------------------------------------------------------------
def process_file(file_path):
    """Analyze the file and return a summary string."""
    ext = file_path.suffix.lower()
    if ext == '.py':
        content = analyze_python_file(file_path)
        if DEBUG:
            # Show the full content (or up to 1000 chars) for debugging
            preview = content[:1000] + ("..." if len(content) > 1000 else "")
            print(f"  [DEBUG] Python analysis result (first 1000 chars):\n{preview}\n")
    elif ext == '.sh':
        content = read_file_safely(file_path, MAX_CONTENT_CHARS)
    elif ext == '.json':
        content = summarize_json(file_path)
    else:
        content = "Unsupported file type."

    # If analysis failed or is empty, fallback to raw content
    if not content or content.startswith("[ERROR"):
        content = read_file_safely(file_path, MAX_CONTENT_CHARS)

    summary = summarize_with_ollama(file_path.name, content)
    return summary

def ask_folder():
    """Ask user for a folder path and return a Path object."""
    while True:
        folder = input("Enter the folder path to summarize: ").strip()
        if not folder:
            print("No path entered. Please try again.")
            continue
        path = Path(folder).expanduser().resolve()
        if not path.is_dir():
            print(f"Error: '{path}' is not a valid directory.")
            continue
        return path

def is_target_file(filename):
    return filename.suffix.lower() in {'.py', '.sh', '.json'}

def generate_markdown(folder_path, summaries):
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
    lines = [
        f"# File Summaries – {folder_path.name}",
        "",
        f"**Generated:** {timestamp}",
        "",
        "## Files",
        ""
    ]
    for file_path, summary in summaries:
        lines.append(f"### `{file_path.name}`")
        lines.append("")
        lines.append(summary)
        lines.append("")
        lines.append("---")
        lines.append("")
    return "\n".join(lines)

def main():
    print("=== Enhanced File Summarizer with Ollama (deepseek-r1:7b) ===\n")
    folder = ask_folder()

    files_to_process = [f for f in folder.iterdir() if f.is_file() and is_target_file(f)]
    if not files_to_process:
        print("No .py, .sh, or .json files found in the folder.")
        return

    print(f"Found {len(files_to_process)} file(s) to summarize.\n")

    summaries = []
    for idx, file_path in enumerate(files_to_process, 1):
        print(f"[{idx}/{len(files_to_process)}] Processing {file_path.name} ...")
        summary = process_file(file_path)
        summaries.append((file_path, summary))
        print(f"  [DONE] Summary length: {len(summary)} chars\n")

    output_file = folder / "whoiswho.md"
    markdown_content = generate_markdown(folder, summaries)
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(markdown_content)
        print(f"\nSummaries saved to: {output_file}")
    except Exception as e:
        print(f"Error writing output file: {e}")
        sys.exit(1)

if __name__ == "__main__":
    main()
 
#cyberpunkcoltoure