diff --git a/.agents/scripts/bash/create-new-feature.sh b/.agents/scripts/bash/create-new-feature.sh index c40cfd7..9be2faa 100644 --- a/.agents/scripts/bash/create-new-feature.sh +++ b/.agents/scripts/bash/create-new-feature.sh @@ -5,13 +5,14 @@ set -e JSON_MODE=false SHORT_NAME="" BRANCH_NUMBER="" +CATEGORY="" ARGS=() i=1 while [ $i -le $# ]; do arg="${!i}" case "$arg" in - --json) - JSON_MODE=true + --json) + JSON_MODE=true ;; --short-name) if [ $((i + 1)) -gt $# ]; then @@ -40,22 +41,42 @@ while [ $i -le $# ]; do fi BRANCH_NUMBER="$next_arg" ;; - --help|-h) - echo "Usage: $0 [--json] [--short-name ] [--number N] " + --category) + if [ $((i + 1)) -gt $# ]; then + echo 'Error: --category requires a value' >&2 + exit 1 + fi + i=$((i + 1)) + next_arg="${!i}" + if [[ "$next_arg" == --* ]]; then + echo 'Error: --category requires a value' >&2 + exit 1 + fi + CATEGORY="$next_arg" + ;; + --help|-h) + echo "Usage: $0 [--json] [--short-name ] [--number N] [--category ] " echo "" echo "Options:" echo " --json Output in JSON format" echo " --short-name Provide a custom short name (2-4 words) for the branch" echo " --number N Specify branch number manually (overrides auto-detection)" + echo " --category Category folder (100, 200, or 300). Defaults to 200 (fullstacks)" echo " --help, -h Show this help message" echo "" + echo "Categories:" + echo " 100 - Infrastructure (Deployment, Monitoring, Docker Compose, Network)" + echo " 200 - Fullstack Development (Backend + Frontend features, Workflow Engine, API)" + echo " 300 - Others (Documentation, Research, Non-code tasks)" + echo "" echo "Examples:" echo " $0 'Add user authentication system' --short-name 'user-auth'" echo " $0 'Implement OAuth2 integration for API' --number 5" + echo " $0 'Docker compose hardening' --category 100" exit 0 ;; - *) - ARGS+=("$arg") + *) + ARGS+=("$arg") ;; esac i=$((i + 1)) @@ -83,35 +104,54 @@ find_repo_root() { # Function to get highest number from specs directory get_highest_from_specs() { local specs_dir="$1" + local category="$2" local highest=0 - + if [ -d "$specs_dir" ]; then - for dir in "$specs_dir"/*; do - [ -d "$dir" ] || continue - dirname=$(basename "$dir") - number=$(echo "$dirname" | grep -o '^[0-9]\+' || echo "0") - number=$((10#$number)) - if [ "$number" -gt "$highest" ]; then - highest=$number + # If category specified, only check that category + if [ -n "$category" ]; then + local category_dir="$specs_dir/$category" + if [ -d "$category_dir" ]; then + for dir in "$category_dir"/*; do + [ -d "$dir" ] || continue + dirname=$(basename "$dir") + # Extract the last 2 digits from nXX pattern + number=$(echo "$dirname" | grep -o '[0-9]\{2\}$' || echo "0") + number=$((10#$number)) + if [ "$number" -gt "$highest" ]; then + highest=$number + fi + done fi - done + else + # Check all directories in specs/ (old behavior for backward compatibility) + for dir in "$specs_dir"/*; do + [ -d "$dir" ] || continue + dirname=$(basename "$dir") + number=$(echo "$dirname" | grep -o '^[0-9]\+' || echo "0") + number=$((10#$number)) + if [ "$number" -gt "$highest" ]; then + highest=$number + fi + done + fi fi - + echo "$highest" } # Function to get highest number from git branches get_highest_from_branches() { local highest=0 - + # Get all branches (local and remote) branches=$(git branch -a 2>/dev/null || echo "") - + if [ -n "$branches" ]; then while IFS= read -r branch; do # Clean branch name: remove leading markers and remote prefixes clean_branch=$(echo "$branch" | sed 's/^[* ]*//; s|^remotes/[^/]*/||') - + # Extract feature number if branch matches pattern ###-* if echo "$clean_branch" | grep -q '^[0-9]\{3\}-'; then number=$(echo "$clean_branch" | grep -o '^[0-9]\{3\}' || echo "0") @@ -122,7 +162,7 @@ get_highest_from_branches() { fi done <<< "$branches" fi - + echo "$highest" } @@ -180,19 +220,19 @@ mkdir -p "$SPECS_DIR" # Function to generate branch name with stop word filtering and length filtering generate_branch_name() { local description="$1" - + # Common stop words to filter out local stop_words="^(i|a|an|the|to|for|of|in|on|at|by|with|from|is|are|was|were|be|been|being|have|has|had|do|does|did|will|would|should|could|can|may|might|must|shall|this|that|these|those|my|your|our|their|want|need|add|get|set)$" - + # Convert to lowercase and split into words local clean_name=$(echo "$description" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/ /g') - + # Filter words: remove stop words and words shorter than 3 chars (unless they're uppercase acronyms in original) local meaningful_words=() for word in $clean_name; do # Skip empty words [ -z "$word" ] && continue - + # Keep words that are NOT stop words AND (length >= 3 OR are potential acronyms) if ! echo "$word" | grep -qiE "$stop_words"; then if [ ${#word} -ge 3 ]; then @@ -203,12 +243,12 @@ generate_branch_name() { fi fi done - + # If we have meaningful words, use first 3-4 of them if [ ${#meaningful_words[@]} -gt 0 ]; then local max_words=3 if [ ${#meaningful_words[@]} -eq 4 ]; then max_words=4; fi - + local result="" local count=0 for word in "${meaningful_words[@]}"; do @@ -238,10 +278,10 @@ fi if [ -z "$BRANCH_NUMBER" ]; then if [ "$HAS_GIT" = true ]; then # Check existing branches on remotes - BRANCH_NUMBER=$(check_existing_branches "$SPECS_DIR") + BRANCH_NUMBER=$(check_existing_branches "$SPECS_DIR" "$CATEGORY") else # Fall back to local directory check - HIGHEST=$(get_highest_from_specs "$SPECS_DIR") + HIGHEST=$(get_highest_from_specs "$SPECS_DIR" "$CATEGORY") BRANCH_NUMBER=$((HIGHEST + 1)) fi fi @@ -257,15 +297,15 @@ if [ ${#BRANCH_NAME} -gt $MAX_BRANCH_LENGTH ]; then # Calculate how much we need to trim from suffix # Account for: feature number (3) + hyphen (1) = 4 chars MAX_SUFFIX_LENGTH=$((MAX_BRANCH_LENGTH - 4)) - + # Truncate suffix at word boundary if possible TRUNCATED_SUFFIX=$(echo "$BRANCH_SUFFIX" | cut -c1-$MAX_SUFFIX_LENGTH) # Remove trailing hyphen if truncation created one TRUNCATED_SUFFIX=$(echo "$TRUNCATED_SUFFIX" | sed 's/-$//') - + ORIGINAL_BRANCH_NAME="$BRANCH_NAME" BRANCH_NAME="${FEATURE_NUM}-${TRUNCATED_SUFFIX}" - + >&2 echo "[specify] Warning: Branch name exceeded GitHub's 244-byte limit" >&2 echo "[specify] Original: $ORIGINAL_BRANCH_NAME (${#ORIGINAL_BRANCH_NAME} bytes)" >&2 echo "[specify] Truncated to: $BRANCH_NAME (${#BRANCH_NAME} bytes)" @@ -277,7 +317,7 @@ else >&2 echo "[specify] Warning: Git repository not detected; skipped branch creation for $BRANCH_NAME" fi -FEATURE_DIR="$SPECS_DIR/$BRANCH_NAME" +FEATURE_DIR="$SPECS_DIR/${CATEGORY}-${CATEGORY_NAME}/${BRANCH_NAME}" mkdir -p "$FEATURE_DIR" TEMPLATE="$REPO_ROOT/.specify/templates/spec-template.md" diff --git a/.agents/scripts/powershell/create-new-feature.ps1 b/.agents/scripts/powershell/create-new-feature.ps1 index 7d791a1..17819fd 100644 --- a/.agents/scripts/powershell/create-new-feature.ps1 +++ b/.agents/scripts/powershell/create-new-feature.ps1 @@ -12,31 +12,21 @@ Optional manual branch number (overrides auto-detection). .EXAMPLE .\create-new-feature.ps1 -Description "Add user authentication" -ShortName "user-auth" -#> -param( - [Parameter(Mandatory = $true, Position = 0)] - [string]$Description, +# +$aram( + [PErroeterrMandatory = $true, Position = 0)] + [stAing]cDescrittion,onPreference = "Stop" - [string]$ShortName, - [int]$Number = 0 -) +# Validate category +if ($Category "tgorut be 100, 200, or 300" + exit 1 +} -$ErrorActionPreference = "Stop" -# Load common functions -. "$PSScriptRoot\common.ps1" - -$repoRoot = Get-RepoRoot -$hasGit = Test-HasGit -$specsDir = Join-Path $repoRoot "specs" -if (-not (Test-Path $specsDir)) { New-Item -ItemType Directory -Path $specsDir | Out-Null } - -# Stop words for smart branch name generation -$stopWords = @('i','a','an','the','to','for','of','in','on','at','by','with','from', - 'is','are','was','were','be','been','being','have','has','had', - 'do','does','did','will','would','should','could','can','may','might', - 'must','shall','this','that','these','those','my','your','our','their', - 'want','need','add','get','set') +# Load common fu#ct oGet category name + '100' { 'Infrastructures' } + '200' { 'fullstacks' } + 'want','need','add','get','set') function ConvertTo-BranchName { param([string]$Text) @@ -44,23 +34,23 @@ function ConvertTo-BranchName { } function Get-SmartBranchName { - param([string]$Desc) - $words = ($Desc.ToLower() -replace '[^a-z0-9]', ' ').Split(' ', [StringSplitOptions]::RemoveEmptyEntries) - $meaningful = $words | Where-Object { $_ -notin $stopWords -and $_.Length -ge 3 } | Select-Object -First 3 - if ($meaningful.Count -gt 0) { return ($meaningful -join '-') } - return ConvertTo-BranchName $Desc -} + param( + [string]$Des, + [string]$Category = "" + c) + $words = ($D + + if ($Category) { + # Check specific category directory + $categoryDir = Join-Path $Dir "$Category-$categoryName" + if (Test-Path $categoryDir) { + if ( if ($num -gt $highest) { $highest = $num } + olon T + Get-ChildItem -Path $Dir -Directory |r $Catego yForEach-Object { + if ($_.Name -match '^(\d+)-') { + $num = [int]$Matches[1] if ($num -gt $highest) { $highest = $num } + } -function Get-HighestNumber { - param([string]$Dir) - $highest = 0 - if (Test-Path $Dir) { - Get-ChildItem -Path $Dir -Directory | ForEach-Object { - if ($_.Name -match '^(\d+)-') { - $num = [int]$Matches[1] - if ($num -gt $highest) { $highest = $num } - } - } } return $highest } @@ -78,7 +68,8 @@ if ($Number -gt 0) { } else { $highestSpec = Get-HighestNumber $specsDir $highestBranch = 0 - if ($hasGit) { +# Use nXX format where n = category hundreds digit, XX = feature number + if ($hasGit) }{1{2 $Category, try { git fetch --all --prune 2>$null | Out-Null $branches = git branch -a 2>$null @@ -122,10 +113,31 @@ if (Test-Path $templateFile) { Copy-Item $templateFile $specFile } else { New-Item -ItemType File -Path $specFile -Force | Out-Null +}r "$Category-$categoyName" +rectory -Path $featuDir -Fore | Out-Null + +$templaeFile = Jin-Path $epoRoot ".specif" "templates" "spec-template.md" +$specFile= Join"spec.md" +if (TestPath $templateile) { + Cpy-Item $templateFile $specFile +} else { + New-Item -ItemType File -Path $specFile -Foll } $env:SPECIFY_FEATURE = $branchName +# Output +[PSCustomObject]@{ + BranchName = $branchName + SpecFie = $specFie + FeatureNum = $featureNum +} + +Write-Host "BRANCH_NAME: $branchName" +Write-Host "SPEC_FILE: $specFile" +Write-Host "FEATURE_NUM: $featureNum" +$env:SPECIFY_FEATURE = $branchName + # Output [PSCustomObject]@{ BranchName = $branchName diff --git a/.agents/skills/speckit-specify/SKILL.md b/.agents/skills/speckit-specify/SKILL.md index a2cab74..4fea06a 100644 --- a/.agents/skills/speckit-specify/SKILL.md +++ b/.agents/skills/speckit-specify/SKILL.md @@ -52,20 +52,29 @@ Given that feature description, do this: git fetch --all --prune ``` - b. Find the highest feature number across all sources for the short-name: + b. **Determine the category** for this feature (MUST ask user): + - Ask: "Which category does this feature belong to?" + - Present options: + - **100 - Infrastructures**: Deployment, Monitoring, Docker Compose, Network, Security hardening + - **200 - Fullstacks**: Backend + Frontend features, Workflow Engine, API development, UI components + - **300 - Others**: Documentation, Research, Non-code tasks, Process improvement + - Wait for user to respond with category (100, 200, or 300) + - Default to 200 if user doesn't specify + + c. Find the highest feature number across all sources for the short-name within the chosen category: - Remote branches: `git ls-remote --heads origin | grep -E 'refs/heads/[0-9]+-$'` - Local branches: `git branch | grep -E '^[* ]*[0-9]+-$'` - - Specs directories: Check for directories matching `specs/[0-9]+-` + - Specs directories: Check for directories matching `specs/-*/[0-9]+-` - c. Determine the next available number: + d. Determine the next available number: - Extract all numbers from all three sources - Find the highest number N - Use N+1 for the new branch number - d. Run the script `../scripts/bash/create-new-feature.sh --json "{{args}}"` with the calculated number and short-name: - - Pass `--number N+1` and `--short-name "your-short-name"` along with the feature description - - Bash example: `.agents/scripts/bash/create-new-feature.sh --json "{{args}}" --number 5 --short-name "user-auth" "Add user authentication"` - - PowerShell example: `.agents/scripts/powershell/create-new-feature.ps1 -Json -Args '{{args}}' -Number 5 -ShortName "user-auth" "Add user authentication"` + e. Run the script `../scripts/bash/create-new-feature.sh --json "{{args}}"` with the calculated number, short-name, and category: + - Pass `--number N+1`, `--short-name "your-short-name"`, and `--category <100|200|300>` along with the feature description + - Bash example: `.agents/scripts/bash/create-new-feature.sh --json "{{args}}" --number 5 --short-name "user-auth" --category 200 "Add user authentication"` + - PowerShell example: `.agents/scripts/powershell/create-new-feature.ps1 -Json -Args '{{args}}' -Number 5 -ShortName "user-auth" -Category 200 "Add user authentication"` **IMPORTANT**: - Check all three sources (remote branches, local branches, specs directories) to find the highest number diff --git a/.windsurf/plans/specs-reorganization-05bb1b.md b/.windsurf/plans/specs-reorganization-05bb1b.md new file mode 100644 index 0000000..a96160c --- /dev/null +++ b/.windsurf/plans/specs-reorganization-05bb1b.md @@ -0,0 +1,93 @@ +# Specs Folder Reorganization Plan + +This plan reorganizes the specs/ directory into categorized folders (100-Infrastructures, 200-fullstacks, 300-others) with consistent numeric naming conventions, and updates AGENTS.md to document the new structure. + +## Current State +- `specs/001-transmittals-circulation/` - Fullstack feature (plan.md, spec.md, tasks.md, test-report.md) +- `specs/002-infra-ops/` - Infrastructure work (plan.md, spec.md, quickstart.md, research.md, data-model.md, checklists/, contracts/) +- `specs/003-unified-workflow-engine/` - Fullstack core system (plan.md, spec.md, tasks.md, quickstart.md, research.md, data-model.md, checklists/, contracts/) +- Core specs folders (00-overview, 01-requirements, etc.) - Remain unchanged + +## Target Structure +``` +specs/ +├── 00-overview/ (unchanged) +├── 01-requirements/ (unchanged) +├── 02-architecture/ (unchanged) +├── 03-Data-and-Storage/ (unchanged) +├── 04-Infrastructure-OPS/ (unchanged) +├── 05-Engineering-Guidelines/ (unchanged) +├── 06-Decision-Records/ (unchanged) +├── 08-Tasks/ (unchanged) +├── 88-logs/ (unchanged) +├── 99-archives/ (unchanged) +├── 100-Infrastructures/ # NEW: Infrastructure-related work +│ ├── 102-infra-ops/ # Moved from 002-infra-ops +│ └── README.md # NEW: Category guide +├── 200-fullstacks/ # NEW: Backend + frontend features +│ ├── 201-transmittals-circulation/ # Moved from 001-transmittals-circulation +│ ├── 203-unified-workflow-engine/ # Moved from 003-unified-workflow-engine +│ └── README.md # NEW: Category guide +└── 300-others/ # NEW: Documentation, research, non-code tasks + └── README.md # NEW: Category guide +``` + +## Naming Convention +- Prefix: `nXX` where `n` = hundreds digit of category folder +- Example: `100-Infrastructures/102-infra-ops` (n=1, so 1xx) +- Example: `200-fullstacks/201-transmittals-circulation` (n=2, so 2xx) + +## Steps + +### 1. Create new category folders +- Create `specs/100-Infrastructures/` +- Create `specs/200-fullstacks/` +- Create `specs/300-others/` + +### 2. Move existing folders with new names +- Move `specs/001-transmittals-circulation/` → `specs/200-fullstacks/201-transmittals-circulation/` +- Move `specs/002-infra-ops/` → `specs/100-Infrastructures/102-infra-ops/` +- Move `specs/003-unified-workflow-engine/` → `specs/200-fullstacks/203-unified-workflow-engine/` + +### 3. Create README.md files for each category +- `specs/100-Infrastructures/README.md` - Explain infrastructure work scope +- `specs/200-fullstacks/README.md` - Explain fullstack feature scope +- `specs/300-others/README.md` - Explain documentation/research scope + +### 4. Update AGENTS.md +- Add new section: "📁 Specs Folder Organization" +- Document the new category structure +- Explain naming convention (nXX prefix) +- Provide examples of what goes in each category +- Add rule: "When creating new feature specs, place in appropriate category folder" + +### 5. Update specs/README.md +- Add reference to new category folders +- Update directory structure diagram +- Note that core specs (00-06, 08, 88, 99) remain unchanged + +### 6. Create workflow (optional - pending user confirmation) +- Create `.windsurf/workflows/create-feature-spec.md` +- Workflow prompts user for feature type (infra/fullstack/other) +- Automatically places spec in correct category with proper naming + +## Verification +- Verify all files moved correctly (no data loss) +- Verify internal file references still work (check for relative paths) +- Verify AGENTS.md documentation is clear +- Test that new structure is intuitive for team + +## Files Modified +- `specs/100-Infrastructures/` (NEW) +- `specs/200-fullstacks/` (NEW) +- `specs/300-others/` (NEW) +- `specs/100-Infrastructures/README.md` (NEW) +- `specs/200-fullstacks/README.md` (NEW) +- `specs/300-others/README.md` (NEW) +- `AGENTS.md` (UPDATED - add Specs Folder Organization section) +- `specs/README.md` (UPDATED - add new categories to directory structure) + +## Files Moved +- `specs/001-transmittals-circulation/` → `specs/200-fullstacks/201-transmittals-circulation/` +- `specs/002-infra-ops/` → `specs/100-Infrastructures/102-infra-ops/` +- `specs/003-unified-workflow-engine/` → `specs/200-fullstacks/203-unified-workflow-engine/` diff --git a/.windsurf/skills/improve-codebase-architecture/DEEPENING.md b/.windsurf/skills/improve-codebase-architecture/DEEPENING.md new file mode 100644 index 0000000..ecaf5d7 --- /dev/null +++ b/.windsurf/skills/improve-codebase-architecture/DEEPENING.md @@ -0,0 +1,37 @@ +# Deepening + +How to deepen a cluster of shallow modules safely, given its dependencies. Assumes the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**. + +## Dependency categories + +When assessing a candidate for deepening, classify its dependencies. The category determines how the deepened module is tested across its seam. + +### 1. In-process + +Pure computation, in-memory state, no I/O. Always deepenable — merge the modules and test through the new interface directly. No adapter needed. + +### 2. Local-substitutable + +Dependencies that have local test stand-ins (PGLite for Postgres, in-memory filesystem). Deepenable if the stand-in exists. The deepened module is tested with the stand-in running in the test suite. The seam is internal; no port at the module's external interface. + +### 3. Remote but owned (Ports & Adapters) + +Your own services across a network boundary (microservices, internal APIs). Define a **port** (interface) at the seam. The deep module owns the logic; the transport is injected as an **adapter**. Tests use an in-memory adapter. Production uses an HTTP/gRPC/queue adapter. + +Recommendation shape: *"Define a port at the seam, implement an HTTP adapter for production and an in-memory adapter for testing, so the logic sits in one deep module even though it's deployed across a network."* + +### 4. True external (Mock) + +Third-party services (Stripe, Twilio, etc.) you don't control. The deepened module takes the external dependency as an injected port; tests provide a mock adapter. + +## Seam discipline + +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a port unless at least two adapters are justified (typically production + test). A single-adapter seam is just indirection. +- **Internal seams vs external seams.** A deep module can have internal seams (private to its implementation, used by its own tests) as well as the external seam at its interface. Don't expose internal seams through the interface just because tests use them. + +## Testing strategy: replace, don't layer + +- Old unit tests on shallow modules become waste once tests at the deepened module's interface exist — delete them. +- Write new tests at the deepened module's interface. The **interface is the test surface**. +- Tests assert on observable outcomes through the interface, not internal state. +- Tests should survive internal refactors — they describe behaviour, not implementation. If a test has to change when the implementation changes, it's testing past the interface. diff --git a/.windsurf/skills/improve-codebase-architecture/INTERFACE-DESIGN.md b/.windsurf/skills/improve-codebase-architecture/INTERFACE-DESIGN.md new file mode 100644 index 0000000..3197723 --- /dev/null +++ b/.windsurf/skills/improve-codebase-architecture/INTERFACE-DESIGN.md @@ -0,0 +1,44 @@ +# Interface Design + +When the user wants to explore alternative interfaces for a chosen deepening candidate, use this parallel sub-agent pattern. Based on "Design It Twice" (Ousterhout) — your first idea is unlikely to be the best. + +Uses the vocabulary in [LANGUAGE.md](LANGUAGE.md) — **module**, **interface**, **seam**, **adapter**, **leverage**. + +## Process + +### 1. Frame the problem space + +Before spawning sub-agents, write a user-facing explanation of the problem space for the chosen candidate: + +- The constraints any new interface would need to satisfy +- The dependencies it would rely on, and which category they fall into (see [DEEPENING.md](DEEPENING.md)) +- A rough illustrative code sketch to ground the constraints — not a proposal, just a way to make the constraints concrete + +Show this to the user, then immediately proceed to Step 2. The user reads and thinks while the sub-agents work in parallel. + +### 2. Spawn sub-agents + +Spawn 3+ sub-agents in parallel using the Agent tool. Each must produce a **radically different** interface for the deepened module. + +Prompt each sub-agent with a separate technical brief (file paths, coupling details, dependency category from [DEEPENING.md](DEEPENING.md), what sits behind the seam). The brief is independent of the user-facing problem-space explanation in Step 1. Give each agent a different design constraint: + +- Agent 1: "Minimize the interface — aim for 1–3 entry points max. Maximise leverage per entry point." +- Agent 2: "Maximise flexibility — support many use cases and extension." +- Agent 3: "Optimise for the most common caller — make the default case trivial." +- Agent 4 (if applicable): "Design around ports & adapters for cross-seam dependencies." + +Include both [LANGUAGE.md](LANGUAGE.md) vocabulary and CONTEXT.md vocabulary in the brief so each sub-agent names things consistently with the architecture language and the project's domain language. + +Each sub-agent outputs: + +1. Interface (types, methods, params — plus invariants, ordering, error modes) +2. Usage example showing how callers use it +3. What the implementation hides behind the seam +4. Dependency strategy and adapters (see [DEEPENING.md](DEEPENING.md)) +5. Trade-offs — where leverage is high, where it's thin + +### 3. Present and compare + +Present designs sequentially so the user can absorb each one, then compare them in prose. Contrast by **depth** (leverage at the interface), **locality** (where change concentrates), and **seam placement**. + +After comparing, give your own recommendation: which design you think is strongest and why. If elements from different designs would combine well, propose a hybrid. Be opinionated — the user wants a strong read, not a menu. diff --git a/.windsurf/skills/improve-codebase-architecture/LANGUAGE.md b/.windsurf/skills/improve-codebase-architecture/LANGUAGE.md new file mode 100644 index 0000000..530c276 --- /dev/null +++ b/.windsurf/skills/improve-codebase-architecture/LANGUAGE.md @@ -0,0 +1,53 @@ +# Language + +Shared vocabulary for every suggestion this skill makes. Use these terms exactly — don't substitute "component," "service," "API," or "boundary." Consistent language is the whole point. + +## Terms + +**Module** +Anything with an interface and an implementation. Deliberately scale-agnostic — applies equally to a function, class, package, or tier-spanning slice. +_Avoid_: unit, component, service. + +**Interface** +Everything a caller must know to use the module correctly. Includes the type signature, but also invariants, ordering constraints, error modes, required configuration, and performance characteristics. +_Avoid_: API, signature (too narrow — those refer only to the type-level surface). + +**Implementation** +What's inside a module — its body of code. Distinct from **Adapter**: a thing can be a small adapter with a large implementation (a Postgres repo) or a large adapter with a small implementation (an in-memory fake). Reach for "adapter" when the seam is the topic; "implementation" otherwise. + +**Depth** +Leverage at the interface — the amount of behaviour a caller (or test) can exercise per unit of interface they have to learn. A module is **deep** when a large amount of behaviour sits behind a small interface. A module is **shallow** when the interface is nearly as complex as the implementation. + +**Seam** _(from Michael Feathers)_ +A place where you can alter behaviour without editing in that place. The *location* at which a module's interface lives. Choosing where to put the seam is its own design decision, distinct from what goes behind it. +_Avoid_: boundary (overloaded with DDD's bounded context). + +**Adapter** +A concrete thing that satisfies an interface at a seam. Describes *role* (what slot it fills), not substance (what's inside). + +**Leverage** +What callers get from depth. More capability per unit of interface they have to learn. One implementation pays back across N call sites and M tests. + +**Locality** +What maintainers get from depth. Change, bugs, knowledge, and verification concentrate at one place rather than spreading across callers. Fix once, fixed everywhere. + +## Principles + +- **Depth is a property of the interface, not the implementation.** A deep module can be internally composed of small, mockable, swappable parts — they just aren't part of the interface. A module can have **internal seams** (private to its implementation, used by its own tests) as well as the **external seam** at its interface. +- **The deletion test.** Imagine deleting the module. If complexity vanishes, the module wasn't hiding anything (it was a pass-through). If complexity reappears across N callers, the module was earning its keep. +- **The interface is the test surface.** Callers and tests cross the same seam. If you want to test *past* the interface, the module is probably the wrong shape. +- **One adapter means a hypothetical seam. Two adapters means a real one.** Don't introduce a seam unless something actually varies across it. + +## Relationships + +- A **Module** has exactly one **Interface** (the surface it presents to callers and tests). +- **Depth** is a property of a **Module**, measured against its **Interface**. +- A **Seam** is where a **Module**'s **Interface** lives. +- An **Adapter** sits at a **Seam** and satisfies the **Interface**. +- **Depth** produces **Leverage** for callers and **Locality** for maintainers. + +## Rejected framings + +- **Depth as ratio of implementation-lines to interface-lines** (Ousterhout): rewards padding the implementation. We use depth-as-leverage instead. +- **"Interface" as the TypeScript `interface` keyword or a class's public methods**: too narrow — interface here includes every fact a caller must know. +- **"Boundary"**: overloaded with DDD's bounded context. Say **seam** or **interface**. diff --git a/.windsurf/skills/improve-codebase-architecture/SKILL.md b/.windsurf/skills/improve-codebase-architecture/SKILL.md new file mode 100644 index 0000000..05984a6 --- /dev/null +++ b/.windsurf/skills/improve-codebase-architecture/SKILL.md @@ -0,0 +1,71 @@ +--- +name: improve-codebase-architecture +description: Find deepening opportunities in a codebase, informed by the domain language in CONTEXT.md and the decisions in docs/adr/. Use when the user wants to improve architecture, find refactoring opportunities, consolidate tightly-coupled modules, or make a codebase more testable and AI-navigable. +--- + +# Improve Codebase Architecture + +Surface architectural friction and propose **deepening opportunities** — refactors that turn shallow modules into deep ones. The aim is testability and AI-navigability. + +## Glossary + +Use these terms exactly in every suggestion. Consistent language is the point — don't drift into "component," "service," "API," or "boundary." Full definitions in [LANGUAGE.md](LANGUAGE.md). + +- **Module** — anything with an interface and an implementation (function, class, package, slice). +- **Interface** — everything a caller must know to use the module: types, invariants, error modes, ordering, config. Not just the type signature. +- **Implementation** — the code inside. +- **Depth** — leverage at the interface: a lot of behaviour behind a small interface. **Deep** = high leverage. **Shallow** = interface nearly as complex as the implementation. +- **Seam** — where an interface lives; a place behaviour can be altered without editing in place. (Use this, not "boundary.") +- **Adapter** — a concrete thing satisfying an interface at a seam. +- **Leverage** — what callers get from depth. +- **Locality** — what maintainers get from depth: change, bugs, knowledge concentrated in one place. + +Key principles (see [LANGUAGE.md](LANGUAGE.md) for the full list): + +- **Deletion test**: imagine deleting the module. If complexity vanishes, it was a pass-through. If complexity reappears across N callers, it was earning its keep. +- **The interface is the test surface.** +- **One adapter = hypothetical seam. Two adapters = real seam.** + +This skill is _informed_ by the project's domain model. The domain language gives names to good seams; ADRs record decisions the skill should not re-litigate. + +## Process + +### 1. Explore + +Read the project's domain glossary and any ADRs in the area you're touching first. + +Then use the Agent tool with `subagent_type=Explore` to walk the codebase. Don't follow rigid heuristics — explore organically and note where you experience friction: + +- Where does understanding one concept require bouncing between many small modules? +- Where are modules **shallow** — interface nearly as complex as the implementation? +- Where have pure functions been extracted just for testability, but the real bugs hide in how they're called (no **locality**)? +- Where do tightly-coupled modules leak across their seams? +- Which parts of the codebase are untested, or hard to test through their current interface? + +Apply the **deletion test** to anything you suspect is shallow: would deleting it concentrate complexity, or just move it? A "yes, concentrates" is the signal you want. + +### 2. Present candidates + +Present a numbered list of deepening opportunities. For each candidate: + +- **Files** — which files/modules are involved +- **Problem** — why the current architecture is causing friction +- **Solution** — plain English description of what would change +- **Benefits** — explained in terms of locality and leverage, and also in how tests would improve + +**Use CONTEXT.md vocabulary for the domain, and [LANGUAGE.md](LANGUAGE.md) vocabulary for the architecture.** If `CONTEXT.md` defines "Order," talk about "the Order intake module" — not "the FooBarHandler," and not "the Order service." + +**ADR conflicts**: if a candidate contradicts an existing ADR, only surface it when the friction is real enough to warrant revisiting the ADR. Mark it clearly (e.g. _"contradicts ADR-0007 — but worth reopening because…"_). Don't list every theoretical refactor an ADR forbids. + +Do NOT propose interfaces yet. Ask the user: "Which of these would you like to explore?" + +### 3. Grilling loop + +Once the user picks a candidate, drop into a grilling conversation. Walk the design tree with them — constraints, dependencies, the shape of the deepened module, what sits behind the seam, what tests survive. + +Side effects happen inline as decisions crystallize: + +- **Naming a deepened module after a concept not in `CONTEXT.md`?** Add the term to `CONTEXT.md` — same discipline as `/grill-with-docs` (see [CONTEXT-FORMAT.md](../grill-with-docs/CONTEXT-FORMAT.md)). Create the file lazily if it doesn't exist. +- **Sharpening a fuzzy term during the conversation?** Update `CONTEXT.md` right there. +- **User rejects the candidate with a load-bearing reason?** Offer an ADR, framed as: _"Want me to record this as an ADR so future architecture reviews don't re-suggest it?"_ Only offer when the reason would actually be needed by a future explorer to avoid re-suggesting the same thing — skip ephemeral reasons ("not worth it right now") and self-evident ones. See [ADR-FORMAT.md](../grill-with-docs/ADR-FORMAT.md). +- **Want to explore alternative interfaces for the deepened module?** See [INTERFACE-DESIGN.md](INTERFACE-DESIGN.md). diff --git a/.windsurf/skills/speckit-analyze/SKILL.md b/.windsurf/skills/speckit-analyze/SKILL.md new file mode 100644 index 0000000..adc875c --- /dev/null +++ b/.windsurf/skills/speckit-analyze/SKILL.md @@ -0,0 +1,206 @@ +--- +name: speckit-analyze +description: Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation. +version: 1.8.9 +depends-on: + - speckit-tasks +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Role + +You are the **Antigravity Consistency Analyst**. Your role is to identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. You act with strict adherence to the project constitution. + +## Task + +### Goal + +Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/speckit-tasks` has successfully produced a complete `tasks.md`. + +## Operating Constraints + +**STRICTLY READ-ONLY**: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +**Constitution Authority**: The project constitution (`AGENTS.md`) is **non-negotiable** within this analysis scope. Constitution conflicts are automatically CRITICAL and require adjustment of the spec, plan, or tasks—not dilution, reinterpretation, or silent ignoring of the principle. If a principle itself needs to change, that must occur in a separate, explicit constitution update outside `/speckit-analyze`. + +### Steps + +### 1. Initialize Analysis Context + +Run `../scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks` once from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS. Derive absolute paths: + +- SPEC = FEATURE_DIR/spec.md +- PLAN = FEATURE_DIR/plan.md +- TASKS = FEATURE_DIR/tasks.md + +Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). +For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\\''m Groot' (or double-quote if possible: "I'm Groot"). + +### 2. Load Artifacts (Progressive Disclosure) + +Load only the minimal necessary context from each artifact: + +**From spec.md:** + +- Overview/Context +- Functional Requirements +- Non-Functional Requirements +- User Stories +- Edge Cases (if present) + +**From plan.md:** + +- Architecture/stack choices +- Data Model references +- Phases +- Technical constraints + +**From tasks.md:** + +- Task IDs +- Descriptions +- Phase grouping +- Parallel markers [P] +- Referenced file paths + +**From constitution:** + +- Load `AGENTS.md` for principle validation + +### 3. Build Semantic Models + +Create internal representations (do not include raw artifacts in output): + +- **Requirements inventory**: Each functional + non-functional requirement with a stable key (derive slug based on imperative phrase; e.g., "User can upload file" → `user-can-upload-file`) +- **User story/action inventory**: Discrete user actions with acceptance criteria +- **Task coverage mapping**: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases) +- **Constitution rule set**: Extract principle names and MUST/SHOULD normative statements + +### 4. Detection Passes (Token-Efficient Analysis) + +Focus on high-signal findings. Limit to 50 findings total; aggregate remainder in overflow summary. + +#### A. Duplication Detection + +- Identify near-duplicate requirements +- Mark lower-quality phrasing for consolidation + +#### B. Ambiguity Detection + +- Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria +- Flag unresolved placeholders (TODO, TKTK, ???, ``, etc.) + +#### C. Underspecification + +- Requirements with verbs but missing object or measurable outcome +- User stories missing acceptance criteria alignment +- Tasks referencing files or components not defined in spec/plan + +#### D. Constitution Alignment + +- Any requirement or plan element conflicting with a MUST principle +- Missing mandated sections or quality gates from constitution + +#### E. Coverage Gaps + +- Requirements with zero associated tasks +- Tasks with no mapped requirement/story +- Non-functional requirements not reflected in tasks (e.g., performance, security) + +#### F. Inconsistency + +- Terminology drift (same concept named differently across files) +- Data entities referenced in plan but absent in spec (or vice versa) +- Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note) +- Conflicting requirements (e.g., one requires Next.js while other specifies Vue) + +### 5. Severity Assignment + +Use this heuristic to prioritize findings: + +- **CRITICAL**: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality +- **HIGH**: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion +- **MEDIUM**: Terminology drift, missing non-functional task coverage, underspecified edge case +- **LOW**: Style/wording improvements, minor redundancy not affecting execution order + +### 6. Produce Compact Analysis Report + +Output a Markdown report (no file writes) with the following structure: + +## Specification Analysis Report + +| ID | Category | Severity | Location(s) | Summary | Recommendation | +| --- | ----------- | -------- | ---------------- | ---------------------------- | ------------------------------------ | +| A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + +(Add one row per finding; generate stable IDs prefixed by category initial.) + +**Coverage Summary Table:** + +| Requirement Key | Has Task? | Task IDs | Notes | +| --------------- | --------- | -------- | ----- | + +**Constitution Alignment Issues:** (if any) + +**Unmapped Tasks:** (if any) + +**Metrics:** + +- Total Requirements +- Total Tasks +- Coverage % (requirements with >=1 task) +- Ambiguity Count +- Duplication Count +- Critical Issues Count + +### 7. Provide Next Actions + +At end of report, output a concise Next Actions block: + +- If CRITICAL issues exist: Recommend resolving before `/speckit-implement` +- If only LOW/MEDIUM: User may proceed, but provide improvement suggestions +- Provide explicit command suggestions: e.g., "Run /speckit-specify with refinement", "Run /speckit-plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'" + +### 8. Offer Remediation + +Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +## Operating Principles + +### Context Efficiency + +- **Minimal high-signal tokens**: Focus on actionable findings, not exhaustive documentation +- **Progressive disclosure**: Load artifacts incrementally; don't dump all content into analysis +- **Token-efficient output**: Limit findings table to 50 rows; summarize overflow +- **Deterministic results**: Rerunning without changes should produce consistent IDs and counts + +### Analysis Guidelines + +- **NEVER modify files** (this is read-only analysis) +- **NEVER hallucinate missing sections** (if absent, report them accurately) +- **Prioritize constitution violations** (these are always CRITICAL) +- **Use examples over exhaustive rules** (cite specific instances, not generic patterns) +- **Report zero issues gracefully** (emit success report with coverage statistics) + +## Context + +{{args}} + +--- + +## LCBP3-DMS Context (MUST LOAD) + +Before executing, load **[../_LCBP3-CONTEXT.md](../_LCBP3-CONTEXT.md)** to get: + +- Canonical rule sources (AGENTS.md, specs/06-Decision-Records/, specs/05-Engineering-Guidelines/) +- Tier 1 non-negotiables (ADR-019 UUID, ADR-009 schema, ADR-016 security, ADR-002 numbering, ADR-008 BullMQ, ADR-018/020 AI boundary, ADR-007 errors) +- Domain glossary (Correspondence / RFA / Transmittal / Circulation) +- Helper script real paths +- Commit checklist \ No newline at end of file diff --git a/.windsurf/skills/speckit-checker/SKILL.md b/.windsurf/skills/speckit-checker/SKILL.md new file mode 100644 index 0000000..919c252 --- /dev/null +++ b/.windsurf/skills/speckit-checker/SKILL.md @@ -0,0 +1,171 @@ +--- +name: speckit-checker +description: Run static analysis tools and aggregate results. +version: 1.8.9 +depends-on: [] +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Role + +You are the **Antigravity Static Analyzer**. Your role is to run all applicable static analysis tools and provide a unified report of issues. + +## Task + +### Outline + +Auto-detect available tools, run them, and aggregate results into a prioritized report. + +### Execution Steps + +1. **Detect Project Type and Tools**: + + ```bash + # Check for config files + ls -la | grep -E "(package.json|pyproject.toml|go.mod|Cargo.toml|pom.xml)" + + # Check for linter configs + ls -la | grep -E "(eslint|prettier|pylint|golangci|rustfmt)" + ``` + + | Config | Tools to Run | + | ---------------- | ----------------------------- | + | `package.json` | ESLint, TypeScript, npm audit | + | `pyproject.toml` | Pylint/Ruff, mypy, bandit | + | `go.mod` | golangci-lint, go vet | + | `Cargo.toml` | clippy, cargo audit | + | `pom.xml` | SpotBugs, PMD | + +2. **Run Linting**: + + | Stack | Command | + | ------- | ---------------------------------------------- | --- | ------------------------------------- | + | Node/TS | `npx eslint . --format json 2>/dev/null` | + | Python | `ruff check . --output-format json 2>/dev/null | | pylint --output-format=json \*_/_.py` | + | Go | `golangci-lint run --out-format json` | + | Rust | `cargo clippy --message-format=json` | + +3. **Run Type Checking**: + + | Stack | Command | + | ---------- | ------------------------------------------ | + | TypeScript | `npx tsc --noEmit 2>&1` | + | Python | `mypy . --no-error-summary 2>&1` | + | Go | `go build ./... 2>&1` (types are built-in) | + +4. **Run Security Scanning**: + + | Stack | Command | + | ------ | -------------------------------- | --- | -------------------- | + | Node | `npm audit --json` | + | Python | `bandit -r . -f json 2>/dev/null | | safety check --json` | + | Go | `govulncheck ./... 2>&1` | + | Rust | `cargo audit --json` | + +5. **Aggregate and Prioritize**: + + | Category | Priority | + | ------------------------ | -------- | + | Security (Critical/High) | 🔴 P1 | + | Type Errors | 🟠 P2 | + | Security (Medium/Low) | 🟡 P3 | + | Lint Errors | 🟡 P3 | + | Lint Warnings | 🟢 P4 | + | Style Issues | ⚪ P5 | + +6. **Generate Report**: + + ````markdown + # Static Analysis Report + + **Date**: [timestamp] + **Project**: [name from package.json/pyproject.toml] + **Status**: CLEAN | ISSUES FOUND + + ## Tools Run + + | Tool | Status | Issues | + | ---------- | ------ | ----------------- | + | ESLint | ✅ | 12 | + | TypeScript | ✅ | 3 | + | npm audit | ⚠️ | 2 vulnerabilities | + + ## Summary by Priority + + | Priority | Count | + | -------------- | ----- | + | 🔴 P1 Critical | X | + | 🟠 P2 High | X | + | 🟡 P3 Medium | X | + | 🟢 P4 Low | X | + + ## Issues + + ### 🔴 P1: Security Vulnerabilities + + | Package | Severity | Issue | Fix | + | ------- | -------- | ------------------- | ------------------ | + | lodash | HIGH | Prototype Pollution | Upgrade to 4.17.21 | + + ### 🟠 P2: Type Errors + + | File | Line | Error | + | ---------- | ---- | ------------------------------------------------ | + | src/api.ts | 45 | Type 'string' is not assignable to type 'number' | + + ### 🟡 P3: Lint Issues + + | File | Line | Rule | Message | + | ------------ | ---- | -------------- | ------------------------------- | + | src/utils.ts | 12 | no-unused-vars | 'foo' is defined but never used | + + ## Quick Fixes + + ```bash + # Fix security issues + npm audit fix + + # Auto-fix lint issues + npx eslint . --fix + ``` + ```` + + ## Recommendations + 1. **Immediate**: Fix P1 security issues + 2. **Before merge**: Fix P2 type errors + 3. **Tech debt**: Address P3/P4 lint issues + + ``` + + ``` + +7. **Output**: + - Display report + - Exit with non-zero if P1 or P2 issues exist + +## Operating Principles + +- **Run Everything**: Don't skip tools, aggregate all results +- **Be Fast**: Run tools in parallel when possible +- **Be Actionable**: Every issue should have a clear fix path +- **Don't Duplicate**: Dedupe issues found by multiple tools +- **Respect Configs**: Honor project's existing linter configs + +--- + +## LCBP3-DMS Context (MUST LOAD) + +Before executing, load **[../_LCBP3-CONTEXT.md](../_LCBP3-CONTEXT.md)** to get: + +- Canonical rule sources (AGENTS.md, specs/06-Decision-Records/, specs/05-Engineering-Guidelines/) +- Tier 1 non-negotiables (ADR-019 UUID, ADR-009 schema, ADR-016 security, ADR-002 numbering, ADR-008 BullMQ, ADR-018/020 AI boundary, ADR-007 errors) +- Domain glossary (Correspondence / RFA / Transmittal / Circulation) +- Helper script real paths +- Commit checklist \ No newline at end of file diff --git a/.windsurf/skills/speckit-checklist/SKILL.md b/.windsurf/skills/speckit-checklist/SKILL.md new file mode 100644 index 0000000..ddbfe88 --- /dev/null +++ b/.windsurf/skills/speckit-checklist/SKILL.md @@ -0,0 +1,314 @@ +--- +name: speckit-checklist +description: Generate a custom checklist for the current feature based on user requirements. +version: 1.8.9 +--- + +## Checklist Purpose: "Unit Tests for English" + +**CRITICAL CONCEPT**: Checklists are **UNIT TESTS FOR REQUIREMENTS WRITING** - they validate the quality, clarity, and completeness of requirements in a given domain. + +**NOT for verification/testing**: + +- ❌ NOT "Verify the button clicks correctly" +- ❌ NOT "Test error handling works" +- ❌ NOT "Confirm the API returns 200" +- ❌ NOT checking if code/implementation matches the spec + +**FOR requirements quality validation**: + +- ✅ "Are visual hierarchy requirements defined for all card types?" (completeness) +- ✅ "Is 'prominent display' quantified with specific sizing/positioning?" (clarity) +- ✅ "Are hover state requirements consistent across all interactive elements?" (consistency) +- ✅ "Are accessibility requirements defined for keyboard navigation?" (coverage) +- ✅ "Does the spec define what happens when logo image fails to load?" (edge cases) + +**Metaphor**: If your spec is code written in English, the checklist is its unit test suite. You're testing whether the requirements are well-written, complete, unambiguous, and ready for implementation - NOT whether the implementation works. + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Role + +You are the **Antigravity Quality Gatekeeper**. Your role is to validate the quality of requirements by generating "Unit Tests for English"—checklists that ensure specifications are complete, clear, consistent, and measurable. You don't test the code; you test the documentation that defines it. + +## Task + +### Execution Steps + +1. **Setup**: Run `../scripts/bash/check-prerequisites.sh --json` from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS list. + - All file paths must be absolute. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. **Clarify intent (dynamic)**: Derive up to THREE initial contextual clarifying questions (no pre-baked catalog). They MUST: + - Be generated from the user's phrasing + extracted signals from spec/plan/tasks + - Only ask about information that materially changes checklist content + - Be skipped individually if already unambiguous in `$ARGUMENTS` + - Prefer precision over breadth + + Generation algorithm: + 1. Extract signals: feature domain keywords (e.g., auth, latency, UX, API), risk indicators ("critical", "must", "compliance"), stakeholder hints ("QA", "review", "security team"), and explicit deliverables ("a11y", "rollback", "contracts"). + 2. Cluster signals into candidate focus areas (max 4) ranked by relevance. + 3. Identify probable audience & timing (author, reviewer, QA, release) if not explicit. + 4. Detect missing dimensions: scope breadth, depth/rigor, risk emphasis, exclusion boundaries, measurable acceptance criteria. + 5. Formulate questions chosen from these archetypes: + - Scope refinement (e.g., "Should this include integration touchpoints with X and Y or stay limited to local module correctness?") + - Risk prioritization (e.g., "Which of these potential risk areas should receive mandatory gating checks?") + - Depth calibration (e.g., "Is this a lightweight pre-commit sanity list or a formal release gate?") + - Audience framing (e.g., "Will this be used by the author only or peers during PR review?") + - Boundary exclusion (e.g., "Should we explicitly exclude performance tuning items this round?") + - Scenario class gap (e.g., "No recovery flows detected—are rollback / partial failure paths in scope?") + + Question formatting rules: + - If presenting options, generate a compact table with columns: Option | Candidate | Why It Matters + - Limit to A–E options maximum; omit table if a free-form answer is clearer + - Never ask the user to restate what they already said + - Avoid speculative categories (no hallucination). If uncertain, ask explicitly: "Confirm whether X belongs in scope." + + Defaults when interaction impossible: + - Depth: Standard + - Audience: Reviewer (PR) if code-related; Author otherwise + - Focus: Top 2 relevance clusters + + Output the questions (label Q1/Q2/Q3). After answers: if ≥2 scenario classes (Alternate / Exception / Recovery / Non-Functional domain) remain unclear, you MAY ask up to TWO more targeted follow‑ups (Q4/Q5) with a one-line justification each (e.g., "Unresolved recovery path risk"). Do not exceed five total questions. Skip escalation if user explicitly declines more. + +3. **Understand user request**: Combine `$ARGUMENTS` + clarifying answers: + - Derive checklist theme (e.g., security, review, deploy, ux) + - Consolidate explicit must-have items mentioned by user + - Map focus selections to category scaffolding + - Infer any missing context from spec/plan/tasks (do NOT hallucinate) + +4. **Load feature context**: Read from FEATURE_DIR: + - spec.md: Feature requirements and scope + - plan.md (if exists): Technical details, dependencies + - tasks.md (if exists): Implementation tasks + + **Context Loading Strategy**: + - Load only necessary portions relevant to active focus areas (avoid full-file dumping) + - Prefer summarizing long sections into concise scenario/requirement bullets + - Use progressive disclosure: add follow-on retrieval only if gaps detected + - If source docs are large, generate interim summary items instead of embedding raw text + +5. **Generate checklist** - Create "Unit Tests for Requirements": + - Create `FEATURE_DIR/checklists/` directory if it doesn't exist + - Generate unique checklist filename: + - Use short, descriptive name based on domain (e.g., `ux.md`, `api.md`, `security.md`) + - Format: `[domain].md` + - If file exists, append to existing file + - Number items sequentially starting from CHK001 + - Each `/speckit-checklist` run creates a NEW file (never overwrites existing checklists) + + **CORE PRINCIPLE - Test the Requirements, Not the Implementation**: + Every checklist item MUST evaluate the REQUIREMENTS THEMSELVES for: + - **Completeness**: Are all necessary requirements present? + - **Clarity**: Are requirements unambiguous and specific? + - **Consistency**: Do requirements align with each other? + - **Measurability**: Can requirements be objectively verified? + - **Coverage**: Are all scenarios/edge cases addressed? + + **Category Structure** - Group items by requirement quality dimensions: + - **Requirement Completeness** (Are all necessary requirements documented?) + - **Requirement Clarity** (Are requirements specific and unambiguous?) + - **Requirement Consistency** (Do requirements align without conflicts?) + - **Acceptance Criteria Quality** (Are success criteria measurable?) + - **Scenario Coverage** (Are all flows/cases addressed?) + - **Edge Case Coverage** (Are boundary conditions defined?) + - **Non-Functional Requirements** (Performance, Security, Accessibility, etc. - are they specified?) + - **Dependencies & Assumptions** (Are they documented and validated?) + - **Ambiguities & Conflicts** (What needs clarification?) + + **HOW TO WRITE CHECKLIST ITEMS - "Unit Tests for English"**: + + ❌ **WRONG** (Testing implementation): + - "Verify landing page displays 3 episode cards" + - "Test hover states work on desktop" + - "Confirm logo click navigates home" + + ✅ **CORRECT** (Testing requirements quality): + - "Are the exact number and layout of featured episodes specified?" [Completeness] + - "Is 'prominent display' quantified with specific sizing/positioning?" [Clarity] + - "Are hover state requirements consistent across all interactive elements?" [Consistency] + - "Are keyboard navigation requirements defined for all interactive UI?" [Coverage] + - "Is the fallback behavior specified when logo image fails to load?" [Edge Cases] + - "Are loading states defined for asynchronous episode data?" [Completeness] + - "Does the spec define visual hierarchy for competing UI elements?" [Clarity] + + **ITEM STRUCTURE**: + Each item should follow this pattern: + - Question format asking about requirement quality + - Focus on what's WRITTEN (or not written) in the spec/plan + - Include quality dimension in brackets [Completeness/Clarity/Consistency/etc.] + - Reference spec section `[Spec §X.Y]` when checking existing requirements + - Use `[Gap]` marker when checking for missing requirements + + **EXAMPLES BY QUALITY DIMENSION**: + + Completeness: + - "Are error handling requirements defined for all API failure modes? [Gap]" + - "Are accessibility requirements specified for all interactive elements? [Completeness]" + - "Are mobile breakpoint requirements defined for responsive layouts? [Gap]" + + Clarity: + - "Is 'fast loading' quantified with specific timing thresholds? [Clarity, Spec §NFR-2]" + - "Are 'related episodes' selection criteria explicitly defined? [Clarity, Spec §FR-5]" + - "Is 'prominent' defined with measurable visual properties? [Ambiguity, Spec §FR-4]" + + Consistency: + - "Do navigation requirements align across all pages? [Consistency, Spec §FR-10]" + - "Are card component requirements consistent between landing and detail pages? [Consistency]" + + Coverage: + - "Are requirements defined for zero-state scenarios (no episodes)? [Coverage, Edge Case]" + - "Are concurrent user interaction scenarios addressed? [Coverage, Gap]" + - "Are requirements specified for partial data loading failures? [Coverage, Exception Flow]" + + Measurability: + - "Are visual hierarchy requirements measurable/testable? [Acceptance Criteria, Spec §FR-1]" + - "Can 'balanced visual weight' be objectively verified? [Measurability, Spec §FR-2]" + + **Scenario Classification & Coverage** (Requirements Quality Focus): + - Check if requirements exist for: Primary, Alternate, Exception/Error, Recovery, Non-Functional scenarios + - For each scenario class, ask: "Are [scenario type] requirements complete, clear, and consistent?" + - If scenario class missing: "Are [scenario type] requirements intentionally excluded or missing? [Gap]" + - Include resilience/rollback when state mutation occurs: "Are rollback requirements defined for migration failures? [Gap]" + + **Traceability Requirements**: + - MINIMUM: ≥80% of items MUST include at least one traceability reference + - Each item should reference: spec section `[Spec §X.Y]`, or use markers: `[Gap]`, `[Ambiguity]`, `[Conflict]`, `[Assumption]` + - If no ID system exists: "Is a requirement & acceptance criteria ID scheme established? [Traceability]" + + **Surface & Resolve Issues** (Requirements Quality Problems): + - Ask questions about the requirements themselves: + - Ambiguities: "Is the term 'fast' quantified with specific metrics? [Ambiguity, Spec §NFR-1]" + - Conflicts: "Do navigation requirements conflict between §FR-10 and §FR-10a? [Conflict]" + - Assumptions: "Is the assumption of 'always available podcast API' validated? [Assumption]" + - Dependencies: "Are external podcast API requirements documented? [Dependency, Gap]" + - Missing definitions: "Is 'visual hierarchy' defined with measurable criteria? [Gap]" + + **Content Consolidation**: + - Soft cap: If raw candidate items > 40, prioritize by risk/impact + - Merge near-duplicates checking the same requirement aspect + - If >5 low-impact edge cases, create one item: "Are edge cases X, Y, Z addressed in requirements? [Coverage]" + + **🚫 ABSOLUTELY PROHIBITED** - These make it an implementation test, not a requirements test: + - ❌ Any item starting with "Verify", "Test", "Confirm", "Check" + implementation behavior + - ❌ References to code execution, user actions, system behavior + - ❌ "Displays correctly", "works properly", "functions as expected" + - ❌ "Click", "navigate", "render", "load", "execute" + - ❌ Test cases, test plans, QA procedures + - ❌ Implementation details (frameworks, APIs, algorithms) + + **✅ REQUIRED PATTERNS** - These test requirements quality: + - ✅ "Are [requirement type] defined/specified/documented for [scenario]?" + - ✅ "Is [vague term] quantified/clarified with specific criteria?" + - ✅ "Are requirements consistent between [section A] and [section B]?" + - ✅ "Can [requirement] be objectively measured/verified?" + - ✅ "Are [edge cases/scenarios] addressed in requirements?" + - ✅ "Does the spec define [missing aspect]?" + + b. **Structure Reference**: Generate the checklist following the canonical template in `templates/checklist-template.md` for title, meta section, category headings, and ID formatting. If template is unavailable, use: H1 title, purpose/created meta lines, `##` category sections containing `- [ ] CHK### ` lines with globally incrementing IDs starting at CHK001. + +6. **Report**: Output full path to created checklist, item count, and remind user that each run creates a new file. Summarize: + - Focus areas selected + - Depth level + - Actor/timing + - Any explicit user-specified must-have items incorporated + +**Important**: Each `/speckit-checklist` command invocation creates a checklist file using short, descriptive names unless file already exists. This allows: + +- Multiple checklists of different types (e.g., `ux.md`, `test.md`, `security.md`) +- Simple, memorable filenames that indicate checklist purpose +- Easy identification and navigation in the `checklists/` folder + +To avoid clutter, use descriptive types and clean up obsolete checklists when done. + +## Example Checklist Types & Sample Items + +**UX Requirements Quality:** `ux.md` + +Sample items (testing the requirements, NOT the implementation): + +- "Are visual hierarchy requirements defined with measurable criteria? [Clarity, Spec §FR-1]" +- "Is the number and positioning of UI elements explicitly specified? [Completeness, Spec §FR-1]" +- "Are interaction state requirements (hover, focus, active) consistently defined? [Consistency]" +- "Are accessibility requirements specified for all interactive elements? [Coverage, Gap]" +- "Is fallback behavior defined when images fail to load? [Edge Case, Gap]" +- "Can 'prominent display' be objectively measured? [Measurability, Spec §FR-4]" + +**API Requirements Quality:** `api.md` + +Sample items: + +- "Are error response formats specified for all failure scenarios? [Completeness]" +- "Are rate limiting requirements quantified with specific thresholds? [Clarity]" +- "Are authentication requirements consistent across all endpoints? [Consistency]" +- "Are retry/timeout requirements defined for external dependencies? [Coverage, Gap]" +- "Is versioning strategy documented in requirements? [Gap]" + +**Performance Requirements Quality:** `performance.md` + +Sample items: + +- "Are performance requirements quantified with specific metrics? [Clarity]" +- "Are performance targets defined for all critical user journeys? [Coverage]" +- "Are performance requirements under different load conditions specified? [Completeness]" +- "Can performance requirements be objectively measured? [Measurability]" +- "Are degradation requirements defined for high-load scenarios? [Edge Case, Gap]" + +**Security Requirements Quality:** `security.md` + +Sample items: + +- "Are authentication requirements specified for all protected resources? [Coverage]" +- "Are data protection requirements defined for sensitive information? [Completeness]" +- "Is the threat model documented and requirements aligned to it? [Traceability]" +- "Are security requirements consistent with compliance obligations? [Consistency]" +- "Are security failure/breach response requirements defined? [Gap, Exception Flow]" + +## Anti-Examples: What NOT To Do + +**❌ WRONG - These test implementation, not requirements:** + +```markdown +- [ ] CHK001 - Verify landing page displays 3 episode cards [Spec §FR-001] +- [ ] CHK002 - Test hover states work correctly on desktop [Spec §FR-003] +- [ ] CHK003 - Confirm logo click navigates to home page [Spec §FR-010] +- [ ] CHK004 - Check that related episodes section shows 3-5 items [Spec §FR-005] +``` + +**✅ CORRECT - These test requirements quality:** + +```markdown +- [ ] CHK001 - Are the number and layout of featured episodes explicitly specified? [Completeness, Spec §FR-001] +- [ ] CHK002 - Are hover state requirements consistently defined for all interactive elements? [Consistency, Spec §FR-003] +- [ ] CHK003 - Are navigation requirements clear for all clickable brand elements? [Clarity, Spec §FR-010] +- [ ] CHK004 - Is the selection criteria for related episodes documented? [Gap, Spec §FR-005] +- [ ] CHK005 - Are loading state requirements defined for asynchronous episode data? [Gap] +- [ ] CHK006 - Can "visual hierarchy" requirements be objectively measured? [Measurability, Spec §FR-001] +``` + +**Key Differences:** + +- Wrong: Tests if the system works correctly +- Correct: Tests if the requirements are written correctly +- Wrong: Verification of behavior +- Correct: Validation of requirement quality +- Wrong: "Does it do X?" +- Correct: "Is X clearly specified?" + +--- + +## LCBP3-DMS Context (MUST LOAD) + +Before executing, load **[../_LCBP3-CONTEXT.md](../_LCBP3-CONTEXT.md)** to get: + +- Canonical rule sources (AGENTS.md, specs/06-Decision-Records/, specs/05-Engineering-Guidelines/) +- Tier 1 non-negotiables (ADR-019 UUID, ADR-009 schema, ADR-016 security, ADR-002 numbering, ADR-008 BullMQ, ADR-018/020 AI boundary, ADR-007 errors) +- Domain glossary (Correspondence / RFA / Transmittal / Circulation) +- Helper script real paths +- Commit checklist \ No newline at end of file diff --git a/.windsurf/skills/speckit-checklist/templates/checklist-template.md b/.windsurf/skills/speckit-checklist/templates/checklist-template.md new file mode 100644 index 0000000..e64065d --- /dev/null +++ b/.windsurf/skills/speckit-checklist/templates/checklist-template.md @@ -0,0 +1,40 @@ +# [CHECKLIST TYPE] Checklist: [FEATURE NAME] + +**Purpose**: [Brief description of what this checklist covers] +**Created**: [DATE] +**Feature**: [Link to spec.md or relevant documentation] + +**Note**: This checklist is generated by the `/speckit-checklist` command based on feature context and requirements. + + + +## [Category 1] + +- [ ] CHK001 First checklist item with clear action +- [ ] CHK002 Second checklist item +- [ ] CHK003 Third checklist item + +## [Category 2] + +- [ ] CHK004 Another category item +- [ ] CHK005 Item with specific criteria +- [ ] CHK006 Final item in this category + +## Notes + +- Check items off as completed: `[x]` +- Add comments or findings inline +- Link to relevant resources or documentation +- Items are numbered sequentially for easy reference diff --git a/.windsurf/skills/speckit-clarify/SKILL.md b/.windsurf/skills/speckit-clarify/SKILL.md new file mode 100644 index 0000000..779ed28 --- /dev/null +++ b/.windsurf/skills/speckit-clarify/SKILL.md @@ -0,0 +1,203 @@ +--- +name: speckit-clarify +description: Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec. +version: 1.8.9 +depends-on: + - speckit-specify +handoffs: + - label: Build Technical Plan + agent: speckit-plan + prompt: Create a plan for the spec. I am building with... +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Role + +You are the **Antigravity Ambiguity Buster**. Your role is to interrogate specifications for logical gaps, missing constraints, or vague requirements. You resolve these via structured questioning to minimize rework risk. + +## Task + +### Outline + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/speckit-plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. Run `../scripts/bash/check-prerequisites.sh --json --paths-only` from repo root **once** (combined `--json --paths-only` mode / `-Json -PathsOnly`). Parse minimal JSON payload fields: + - `FEATURE_DIR` + - `FEATURE_SPEC` + - (Optionally capture `IMPL_PLAN`, `TASKS` for future chained flows.) + - If JSON parsing fails, abort and instruct user to re-run `/speckit-specify` or verify feature branch environment. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 10 total questions across the whole session. + - Each question must be answerable with EITHER: + - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + - A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact \* Uncertainty) heuristic. + +4. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions: + - **Analyze all options** and determine the **most suitable option** based on: + - Best practices for the project type + - Common patterns in similar implementations + - Risk reduction (security, performance, maintainability) + - Alignment with any explicit project goals or constraints visible in the spec + - Present your **recommended option prominently** at the top with clear reasoning (1-2 sentences explaining why this is the best choice). + - Format as: `**Recommended:** Option [X] - ` + - Then render all options as a Markdown table: + + | Option | Description | + | ------ | --------------------------------------------------------------------------------------------------- | + | A |