Files
lcbp3/verify_links.py
admin 83704377f4
Some checks are pending
Spec Validation / validate-markdown (push) Waiting to run
Spec Validation / validate-diagrams (push) Waiting to run
Spec Validation / check-todos (push) Waiting to run
251218:1701 On going update to 1.7.0: Documnet Number rebuild
2025-12-18 17:01:42 +07:00

90 lines
3.1 KiB
Python

import os
import re
from pathlib import Path
# Configuration
BASE_DIR = Path(r"d:\nap-dms.lcbp3\specs")
DIRECTORIES = [
"00-overview",
"01-requirements",
"02-architecture",
"03-implementation",
"04-operations",
"05-decisions"
]
# Regex for Markdown links: [label](path)
# Handles relative paths, absolute file paths, and anchors
LINK_PATTERN = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')
def verify_links():
results = {
"total_links": 0,
"broken_links": []
}
for dir_name in DIRECTORIES:
directory = BASE_DIR / dir_name
if not directory.exists():
print(f"Directory not found: {directory}")
continue
for file_path in directory.glob("*.md"):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
links = LINK_PATTERN.findall(content)
for label, target in links:
# Ignore external links (http/https)
if target.startswith("http"):
continue
# Ignore anchors within the same file
if target.startswith("#"):
continue
results["total_links"] += 1
# Process target path
# 1. Handle file:/// Absolute paths if any
if target.startswith("file:///"):
clean_target = Path(target.replace("file:///", "").replace("/", os.sep))
else:
# 2. Handle relative paths
# Remove anchor if present
clean_target_str = target.split("#")[0]
if not clean_target_str: # It was just an anchor to another file but path is empty? Wait.
continue
# Resolve path relative to current file
clean_target = (file_path.parent / clean_target_str).resolve()
# Verify existence
if not clean_target.exists():
results["broken_links"].append({
"source": str(file_path),
"label": label,
"target": target,
"resolved": str(clean_target)
})
return results
if __name__ == "__main__":
print(f"Starting link verification in {BASE_DIR}...")
audit_results = verify_links()
print(f"\nAudit Summary:")
print(f"Total Internal Links Scanned: {audit_results['total_links']}")
print(f"Total Broken Links Found: {len(audit_results['broken_links'])}")
if audit_results['broken_links']:
print("\nBroken Links Detail:")
for idx, link in enumerate(audit_results['broken_links'], 1):
print(f"{idx}. Source: {link['source']}")
print(f" Link: [{link['label']}]({link['target']})")
print(f" Resolved Path: {link['resolved']}")
print("-" * 20)
else:
print("\nNo broken links found! 🎉")