218 lines
7.1 KiB
Python
218 lines
7.1 KiB
Python
# pyright: reportUnknownMemberType=false
|
|
import os
|
|
from abc import ABC, abstractmethod
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from dataclasses import dataclass
|
|
from typing import Dict, List, Optional
|
|
|
|
from ci.util.paths import PROJECT_ROOT
|
|
|
|
|
|
SRC_ROOT = PROJECT_ROOT / "src"
|
|
|
|
NUM_WORKERS = 1 if os.environ.get("NO_PARALLEL") else (os.cpu_count() or 1) * 4
|
|
|
|
EXCLUDED_FILES = [
|
|
"stub_main.cpp",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class FileContent:
|
|
"""Container for file content and metadata."""
|
|
|
|
path: str
|
|
content: str
|
|
lines: List[str]
|
|
|
|
def __post_init__(self):
|
|
if not self.lines:
|
|
self.lines = self.content.splitlines()
|
|
|
|
|
|
class FileContentChecker(ABC):
|
|
"""Abstract base class for checking file content."""
|
|
|
|
@abstractmethod
|
|
def should_process_file(self, file_path: str) -> bool:
|
|
"""Predicate to determine if a file should be processed.
|
|
|
|
Args:
|
|
file_path: Path to the file to check
|
|
|
|
Returns:
|
|
True if the file should be processed, False otherwise
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
def check_file_content(self, file_content: FileContent) -> List[str]:
|
|
"""Check the file content and return any issues found.
|
|
|
|
Args:
|
|
file_content: FileContent object containing path, content, and lines
|
|
|
|
Returns:
|
|
List of error messages, empty if no issues found
|
|
"""
|
|
pass
|
|
|
|
|
|
class MultiCheckerFileProcessor:
|
|
"""Processor that can run multiple checkers on files."""
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def process_files_with_checkers(
|
|
self, file_paths: List[str], checkers: List[FileContentChecker]
|
|
) -> Dict[str, List[str]]:
|
|
"""Process files with multiple checkers.
|
|
|
|
Args:
|
|
file_paths: List of file paths to process
|
|
checkers: List of checker instances to run on the files
|
|
|
|
Returns:
|
|
Dictionary mapping checker class name to list of issues found
|
|
"""
|
|
# Initialize results dictionary for each checker
|
|
results: Dict[str, List[str]] = {}
|
|
for checker in checkers:
|
|
checker_name = checker.__class__.__name__
|
|
results[checker_name] = []
|
|
|
|
# Process each file
|
|
for file_path in file_paths:
|
|
# Check if any checker wants to process this file
|
|
interested_checkers = [
|
|
checker
|
|
for checker in checkers
|
|
if checker.should_process_file(file_path)
|
|
]
|
|
|
|
# If any checker is interested, read the file once
|
|
if interested_checkers:
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
# Create FileContent object with lines split
|
|
file_content = FileContent(
|
|
path=file_path, content=content, lines=content.splitlines()
|
|
)
|
|
|
|
# Pass the file content to all interested checkers
|
|
for checker in interested_checkers:
|
|
checker_name = checker.__class__.__name__
|
|
issues = checker.check_file_content(file_content)
|
|
results[checker_name].extend(issues)
|
|
|
|
except Exception as e:
|
|
# Add error to all interested checkers
|
|
error_msg = f"Error reading file {file_path}: {str(e)}"
|
|
for checker in interested_checkers:
|
|
checker_name = checker.__class__.__name__
|
|
results[checker_name].append(error_msg)
|
|
|
|
return results
|
|
|
|
|
|
# Legacy compatibility classes
|
|
class FileProcessorCallback(FileContentChecker):
|
|
"""Legacy compatibility wrapper - delegates to FileContentChecker methods."""
|
|
|
|
def check_file_content_legacy(self, file_path: str, content: str) -> List[str]:
|
|
"""Legacy method signature for backward compatibility."""
|
|
file_content = FileContent(path=file_path, content=content, lines=[])
|
|
return self.check_file_content(file_content)
|
|
|
|
|
|
class GenericFileSearcher:
|
|
"""Generic file searcher that processes files using a callback pattern."""
|
|
|
|
def __init__(self, max_workers: Optional[int] = None):
|
|
self.max_workers = max_workers or NUM_WORKERS
|
|
|
|
def search_directory(
|
|
self, start_dir: str, callback: FileProcessorCallback
|
|
) -> List[str]:
|
|
"""Search a directory and process files using the provided callback.
|
|
|
|
Args:
|
|
start_dir: Directory to start searching from
|
|
callback: Callback class to handle file processing
|
|
|
|
Returns:
|
|
List of all issues found across all files
|
|
"""
|
|
files_to_check: List[str] = []
|
|
|
|
# Collect all files that should be processed
|
|
for root, _, files in os.walk(start_dir):
|
|
for file in files:
|
|
file_path = os.path.join(root, file)
|
|
if callback.should_process_file(file_path):
|
|
files_to_check.append(file_path)
|
|
|
|
# Process files in parallel
|
|
all_issues: List[str] = []
|
|
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
futures = [
|
|
executor.submit(self._process_single_file, file_path, callback)
|
|
for file_path in files_to_check
|
|
]
|
|
for future in futures:
|
|
all_issues.extend(future.result())
|
|
|
|
return all_issues
|
|
|
|
def _process_single_file(
|
|
self, file_path: str, callback: FileProcessorCallback
|
|
) -> List[str]:
|
|
"""Process a single file using the callback.
|
|
|
|
Args:
|
|
file_path: Path to the file to process
|
|
callback: Callback to use for processing
|
|
|
|
Returns:
|
|
List of issues found in this file
|
|
"""
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
file_content = FileContent(path=file_path, content=content, lines=[])
|
|
return callback.check_file_content(file_content)
|
|
except Exception as e:
|
|
return [f"Error processing file {file_path}: {str(e)}"]
|
|
|
|
|
|
def collect_files_to_check(
|
|
test_directories: List[str], extensions: Optional[List[str]] = None
|
|
) -> List[str]:
|
|
"""Collect all files to check from the given directories."""
|
|
if extensions is None:
|
|
extensions = [".cpp", ".h", ".hpp"]
|
|
|
|
files_to_check: List[str] = []
|
|
|
|
# Search each directory
|
|
for directory in test_directories:
|
|
if os.path.exists(directory):
|
|
for root, _, files in os.walk(directory):
|
|
for file in files:
|
|
if any(file.endswith(ext) for ext in extensions):
|
|
file_path = os.path.join(root, file)
|
|
files_to_check.append(file_path)
|
|
|
|
# Also check the main src directory files (not subdirectories)
|
|
for file in os.listdir(SRC_ROOT):
|
|
file_path = os.path.join(SRC_ROOT, file)
|
|
if os.path.isfile(file_path) and any(
|
|
file_path.endswith(ext) for ext in extensions
|
|
):
|
|
files_to_check.append(file_path)
|
|
|
|
return files_to_check
|