90 lines
3.1 KiB
Python
90 lines
3.1 KiB
Python
import os
|
|
import re
|
|
from pathlib import Path
|
|
|
|
# Configuration
|
|
BASE_DIR = Path(r"d:\nap-dms.lcbp3\specs")
|
|
DIRECTORIES = [
|
|
"00-overview",
|
|
"01-requirements",
|
|
"02-architecture",
|
|
"03-implementation",
|
|
"04-operations",
|
|
"05-decisions"
|
|
]
|
|
|
|
# Regex for Markdown links: [label](path)
|
|
# Handles relative paths, absolute file paths, and anchors
|
|
LINK_PATTERN = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
|
|
|
|
def verify_links():
|
|
results = {
|
|
"total_links": 0,
|
|
"broken_links": []
|
|
}
|
|
|
|
for dir_name in DIRECTORIES:
|
|
directory = BASE_DIR / dir_name
|
|
if not directory.exists():
|
|
print(f"Directory not found: {directory}")
|
|
continue
|
|
|
|
for file_path in directory.glob("*.md"):
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
links = LINK_PATTERN.findall(content)
|
|
|
|
for label, target in links:
|
|
# Ignore external links (http/https)
|
|
if target.startswith("http"):
|
|
continue
|
|
|
|
# Ignore anchors within the same file
|
|
if target.startswith("#"):
|
|
continue
|
|
|
|
results["total_links"] += 1
|
|
|
|
# Process target path
|
|
# 1. Handle file:/// Absolute paths if any
|
|
if target.startswith("file:///"):
|
|
clean_target = Path(target.replace("file:///", "").replace("/", os.sep))
|
|
else:
|
|
# 2. Handle relative paths
|
|
# Remove anchor if present
|
|
clean_target_str = target.split("#")[0]
|
|
if not clean_target_str: # It was just an anchor to another file but path is empty? Wait.
|
|
continue
|
|
|
|
# Resolve path relative to current file
|
|
clean_target = (file_path.parent / clean_target_str).resolve()
|
|
|
|
# Verify existence
|
|
if not clean_target.exists():
|
|
results["broken_links"].append({
|
|
"source": str(file_path),
|
|
"label": label,
|
|
"target": target,
|
|
"resolved": str(clean_target)
|
|
})
|
|
|
|
return results
|
|
|
|
if __name__ == "__main__":
|
|
print(f"Starting link verification in {BASE_DIR}...")
|
|
audit_results = verify_links()
|
|
|
|
print(f"\nAudit Summary:")
|
|
print(f"Total Internal Links Scanned: {audit_results['total_links']}")
|
|
print(f"Total Broken Links Found: {len(audit_results['broken_links'])}")
|
|
|
|
if audit_results['broken_links']:
|
|
print("\nBroken Links Detail:")
|
|
for idx, link in enumerate(audit_results['broken_links'], 1):
|
|
print(f"{idx}. Source: {link['source']}")
|
|
print(f" Link: [{link['label']}]({link['target']})")
|
|
print(f" Resolved Path: {link['resolved']}")
|
|
print("-" * 20)
|
|
else:
|
|
print("\nNo broken links found! 🎉")
|