extractor = CursorExtractor(schema) for log_file in Path("data/raw/logs").glob("*.log"): content = log_file.read_text() extractor.extract_from_text(content, str(log_file))
def __init__(self, schema: Dict[str, str]): self.schema = schema # field -> regex pattern self.results = []
import re import json from pathlib import Path from typing import Dict, Any class CursorExtractor: """Hybrid regex + placeholder for AI refinement"""
def save(self, output_path: str): with open(output_path, 'w') as f: json.dump(self.results, f, indent=2) schema = "timestamp": r"(\d4-\d2-\d2T\d2:\d2:\d2.\d+Z)", "request_id": r"RequestId: ([a-f0-9-]+)", "duration_ms": r"Duration: (\d+.\d+) ms", "memory_mb": r"MemorySize: (\d+) MB"
extractor.save("extractor/output/structured_logs.json")
3D Metallica Through The Never And Justice For All Bay Area Thrash Metal Big Four of Thrash Metal Black Album Cinematography Cliff Burton Covers and Tributes Dave Mustaine David Ellefson Decibel Exodus Gary Holt Guitars and Amps Happy Birthday Hardwired... To Self-Destruct Heavy Press Release Iron Report James Hetfield Jason Newsted Kerry King Kill'Em All Killing Covers Kirk Hammett Lars Ulrich Lou Reed & Metallica (Loutallica) Master Of Puppets Merch / Metallica Store Metal Hammer Metallica Library Metbash.ru Money Proshot R.I.P. Ride the Lightning Robert Trujillo Scott Ian Side Projects Television Testament Знаменитости о Metallica Концерты Metallica в России Метальный дуэт Приколы Хобби и искусство