claude-code/claude-zh/skills/skill-stocktake/scripts/scan.sh

#!/usr/bin/env bash
# scan.sh — enumerate skill files, extract frontmatter and UTC mtime
# Usage: scan.sh [CWD_SKILLS_DIR]
# Output: JSON to stdout
#
# When CWD_SKILLS_DIR is omitted, defaults to $PWD/claude/skills so the
# script always picks up project-level skills without relying on the caller.
#
# Environment:
#   SKILL_STOCKTAKE_GLOBAL_DIR   Override ~/claude/skills (for testing only;
#                                do not set in production — intended for bats tests)
#   SKILL_STOCKTAKE_PROJECT_DIR  Override project dir detection (for testing only)

set -euo pipefail

GLOBAL_DIR="${SKILL_STOCKTAKE_GLOBAL_DIR:-$HOME/claude/skills}"
CWD_SKILLS_DIR="${SKILL_STOCKTAKE_PROJECT_DIR:-${1:-$PWD/claude/skills}}"
# Path to JSONL file containing tool-use observations (optional; used for usage frequency counts).
# Override via SKILL_STOCKTAKE_OBSERVATIONS env var if your setup uses a different path.
OBSERVATIONS="${SKILL_STOCKTAKE_OBSERVATIONS:-$HOME/claude/observations.jsonl}"

# Validate CWD_SKILLS_DIR looks like a claude/skills path (defense-in-depth).
# Only warn when the path exists — a nonexistent path poses no traversal risk.
if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" && "$CWD_SKILLS_DIR" != */claude/skills* ]]; then
  echo "Warning: CWD_SKILLS_DIR does not look like a .claude/skills path: $CWD_SKILLS_DIR" >&2
fi

# Extract a frontmatter field (handles both quoted and unquoted single-line values).
# Does NOT support multi-line YAML blocks (| or >) or nested YAML keys.
extract_field() {
  local file="$1" field="$2"
  awk -v f="$field" '
    BEGIN { fm=0 }
    /^---$/ { fm++; next }
    fm==1 {
      n = length(f) + 2
      if (substr($0, 1, n) == f ": ") {
        val = substr($0, n+1)
        gsub(/^"/, "", val)
        gsub(/"$/, "", val)
        print val
        exit
      }
    }
    fm>=2 { exit }
  ' "$file"
}

# Get UTC timestamp N days ago (supports both macOS and GNU date)
date_ago() {
  local n="$1"
  date -u -v-"${n}d" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null ||
  date -u -d "${n} days ago" +%Y-%m-%dT%H:%M:%SZ
}

# Count observations matching a file path since a cutoff timestamp
count_obs() {
  local file="$1" cutoff="$2"
  if [[ ! -f "$OBSERVATIONS" ]]; then
    echo 0
    return
  fi
  jq -r --arg p "$file" --arg c "$cutoff" \
    'select(.tool=="Read" and .path==$p and .timestamp>=$c) | 1' \
    "$OBSERVATIONS" 2>/dev/null | wc -l | tr -d ' '
}

# Scan a directory and produce a JSON array of skill objects
scan_dir_to_json() {
  local dir="$1"
  local c7 c30
  c7=$(date_ago 7)
  c30=$(date_ago 30)

  local tmpdir
  tmpdir=$(mktemp -d)
  # Use a function to avoid embedding $tmpdir in a quoted string (prevents injection
  # if TMPDIR were crafted to contain shell metacharacters).
  local _scan_tmpdir="$tmpdir"
  _scan_cleanup() { rm -rf "$_scan_tmpdir"; }
  trap _scan_cleanup RETURN

  # Pre-aggregate observation counts in two passes (one per window) instead of
  # calling jq per-file — reduces from O(n*m) to O(n+m) jq invocations.
  local obs_7d_counts obs_30d_counts
  obs_7d_counts=""
  obs_30d_counts=""
  if [[ -f "$OBSERVATIONS" ]]; then
    obs_7d_counts=$(jq -r --arg c "$c7" \
      'select(.tool=="Read" and .timestamp>=$c) | .path' \
      "$OBSERVATIONS" 2>/dev/null | sort | uniq -c)
    obs_30d_counts=$(jq -r --arg c "$c30" \
      'select(.tool=="Read" and .timestamp>=$c) | .path' \
      "$OBSERVATIONS" 2>/dev/null | sort | uniq -c)
  fi

  local i=0
  while IFS= read -r file; do
    local name desc mtime u7 u30 dp
    name=$(extract_field "$file" "name")
    desc=$(extract_field "$file" "description")
    mtime=$(date -u -r "$file" +%Y-%m-%dT%H:%M:%SZ)
    # Use awk exact field match to avoid substring false-positives from grep -F.
    # uniq -c output format: "   N /path/to/file" — path is always field 2.
    u7=$(echo "$obs_7d_counts" | awk -v f="$file" '$2 == f {print $1}' | head -1)
    u7="${u7:-0}"
    u30=$(echo "$obs_30d_counts" | awk -v f="$file" '$2 == f {print $1}' | head -1)
    u30="${u30:-0}"
    dp="${file/#$HOME/~}"

    jq -n \
      --arg path "$dp" \
      --arg name "$name" \
      --arg description "$desc" \
      --arg mtime "$mtime" \
      --argjson use_7d "$u7" \
      --argjson use_30d "$u30" \
      '{path:$path,name:$name,description:$description,use_7d:$use_7d,use_30d:$use_30d,mtime:$mtime}' \
      > "$tmpdir/$i.json"
    i=$((i+1))
  done < <(find "$dir" -name "*.md" -type f 2>/dev/null | sort)

  if [[ $i -eq 0 ]]; then
    echo "[]"
  else
    jq -s '.' "$tmpdir"/*.json
  fi
}

# --- Main ---

global_found="false"
global_count=0
global_skills="[]"

if [[ -d "$GLOBAL_DIR" ]]; then
  global_found="true"
  global_skills=$(scan_dir_to_json "$GLOBAL_DIR")
  global_count=$(echo "$global_skills" | jq 'length')
fi

project_found="false"
project_path=""
project_count=0
project_skills="[]"

if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" ]]; then
  project_found="true"
  project_path="$CWD_SKILLS_DIR"
  project_skills=$(scan_dir_to_json "$CWD_SKILLS_DIR")
  project_count=$(echo "$project_skills" | jq 'length')
fi

# Merge global + project skills into one array
all_skills=$(jq -s 'add' <(echo "$global_skills") <(echo "$project_skills"))

jq -n \
  --arg global_found "$global_found" \
  --argjson global_count "$global_count" \
  --arg project_found "$project_found" \
  --arg project_path "$project_path" \
  --argjson project_count "$project_count" \
  --argjson skills "$all_skills" \
  '{
    scan_summary: {
      global: { found: ($global_found == "true"), count: $global_count },
      project: { found: ($project_found == "true"), path: $project_path, count: $project_count }
    },
    skills: $skills
  }'
rename 2026-02-27 13:45:37 +00:00			`#!/usr/bin/env bash`
			`# scan.sh — enumerate skill files, extract frontmatter and UTC mtime`
			`# Usage: scan.sh [CWD_SKILLS_DIR]`
			`# Output: JSON to stdout`
			`#`
			`# When CWD_SKILLS_DIR is omitted, defaults to $PWD/claude/skills so the`
			`# script always picks up project-level skills without relying on the caller.`
			`#`
			`# Environment:`
			`# SKILL_STOCKTAKE_GLOBAL_DIR Override ~/claude/skills (for testing only;`
			`# do not set in production — intended for bats tests)`
			`# SKILL_STOCKTAKE_PROJECT_DIR Override project dir detection (for testing only)`

			`set -euo pipefail`

			`GLOBAL_DIR="${SKILL_STOCKTAKE_GLOBAL_DIR:-$HOME/claude/skills}"`
			`CWD_SKILLS_DIR="${SKILL_STOCKTAKE_PROJECT_DIR:-${1:-$PWD/claude/skills}}"`
			`# Path to JSONL file containing tool-use observations (optional; used for usage frequency counts).`
			`# Override via SKILL_STOCKTAKE_OBSERVATIONS env var if your setup uses a different path.`
			`OBSERVATIONS="${SKILL_STOCKTAKE_OBSERVATIONS:-$HOME/claude/observations.jsonl}"`

			`# Validate CWD_SKILLS_DIR looks like a claude/skills path (defense-in-depth).`
			`# Only warn when the path exists — a nonexistent path poses no traversal risk.`
			`if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" && "$CWD_SKILLS_DIR" != /claude/skills ]]; then`
			`echo "Warning: CWD_SKILLS_DIR does not look like a .claude/skills path: $CWD_SKILLS_DIR" >&2`
			`fi`

			`# Extract a frontmatter field (handles both quoted and unquoted single-line values).`
			`# Does NOT support multi-line YAML blocks (\| or >) or nested YAML keys.`
			`extract_field() {`
			`local file="$1" field="$2"`
			`awk -v f="$field" '`
			`BEGIN { fm=0 }`
			`/^---$/ { fm++; next }`
			`fm==1 {`
			`n = length(f) + 2`
			`if (substr($0, 1, n) == f ": ") {`
			`val = substr($0, n+1)`
			`gsub(/^"/, "", val)`
			`gsub(/"$/, "", val)`
			`print val`
			`exit`
			`}`
			`}`
			`fm>=2 { exit }`
			`' "$file"`
			`}`

			`# Get UTC timestamp N days ago (supports both macOS and GNU date)`
			`date_ago() {`
			`local n="$1"`
			`date -u -v-"${n}d" +%Y-%m-%dT%H:%M:%SZ 2>/dev/null \|\|`
			`date -u -d "${n} days ago" +%Y-%m-%dT%H:%M:%SZ`
			`}`

			`# Count observations matching a file path since a cutoff timestamp`
			`count_obs() {`
			`local file="$1" cutoff="$2"`
			`if [[ ! -f "$OBSERVATIONS" ]]; then`
			`echo 0`
			`return`
			`fi`
			`jq -r --arg p "$file" --arg c "$cutoff" \`
			`'select(.tool=="Read" and .path==$p and .timestamp>=$c) \| 1' \`
			`"$OBSERVATIONS" 2>/dev/null \| wc -l \| tr -d ' '`
			`}`

			`# Scan a directory and produce a JSON array of skill objects`
			`scan_dir_to_json() {`
			`local dir="$1"`
			`local c7 c30`
			`c7=$(date_ago 7)`
			`c30=$(date_ago 30)`

			`local tmpdir`
			`tmpdir=$(mktemp -d)`
			`# Use a function to avoid embedding $tmpdir in a quoted string (prevents injection`
			`# if TMPDIR were crafted to contain shell metacharacters).`
			`local _scan_tmpdir="$tmpdir"`
			`_scan_cleanup() { rm -rf "$_scan_tmpdir"; }`
			`trap _scan_cleanup RETURN`

			`# Pre-aggregate observation counts in two passes (one per window) instead of`
			`# calling jq per-file — reduces from O(n*m) to O(n+m) jq invocations.`
			`local obs_7d_counts obs_30d_counts`
			`obs_7d_counts=""`
			`obs_30d_counts=""`
			`if [[ -f "$OBSERVATIONS" ]]; then`
			`obs_7d_counts=$(jq -r --arg c "$c7" \`
			`'select(.tool=="Read" and .timestamp>=$c) \| .path' \`
			`"$OBSERVATIONS" 2>/dev/null \| sort \| uniq -c)`
			`obs_30d_counts=$(jq -r --arg c "$c30" \`
			`'select(.tool=="Read" and .timestamp>=$c) \| .path' \`
			`"$OBSERVATIONS" 2>/dev/null \| sort \| uniq -c)`
			`fi`

			`local i=0`
			`while IFS= read -r file; do`
			`local name desc mtime u7 u30 dp`
			`name=$(extract_field "$file" "name")`
			`desc=$(extract_field "$file" "description")`
			`mtime=$(date -u -r "$file" +%Y-%m-%dT%H:%M:%SZ)`
			`# Use awk exact field match to avoid substring false-positives from grep -F.`
			`# uniq -c output format: " N /path/to/file" — path is always field 2.`
			`u7=$(echo "$obs_7d_counts" \| awk -v f="$file" '$2 == f {print $1}' \| head -1)`
			`u7="${u7:-0}"`
			`u30=$(echo "$obs_30d_counts" \| awk -v f="$file" '$2 == f {print $1}' \| head -1)`
			`u30="${u30:-0}"`
			`dp="${file/#$HOME/~}"`

			`jq -n \`
			`--arg path "$dp" \`
			`--arg name "$name" \`
			`--arg description "$desc" \`
			`--arg mtime "$mtime" \`
			`--argjson use_7d "$u7" \`
			`--argjson use_30d "$u30" \`
			`'{path:$path,name:$name,description:$description,use_7d:$use_7d,use_30d:$use_30d,mtime:$mtime}' \`
			`> "$tmpdir/$i.json"`
			`i=$((i+1))`
			`done < <(find "$dir" -name "*.md" -type f 2>/dev/null \| sort)`

			`if [[ $i -eq 0 ]]; then`
			`echo "[]"`
			`else`
			`jq -s '.' "$tmpdir"/*.json`
			`fi`
			`}`

			`# --- Main ---`

			`global_found="false"`
			`global_count=0`
			`global_skills="[]"`

			`if [[ -d "$GLOBAL_DIR" ]]; then`
			`global_found="true"`
			`global_skills=$(scan_dir_to_json "$GLOBAL_DIR")`
			`global_count=$(echo "$global_skills" \| jq 'length')`
			`fi`

			`project_found="false"`
			`project_path=""`
			`project_count=0`
			`project_skills="[]"`

			`if [[ -n "$CWD_SKILLS_DIR" && -d "$CWD_SKILLS_DIR" ]]; then`
			`project_found="true"`
			`project_path="$CWD_SKILLS_DIR"`
			`project_skills=$(scan_dir_to_json "$CWD_SKILLS_DIR")`
			`project_count=$(echo "$project_skills" \| jq 'length')`
			`fi`

			`# Merge global + project skills into one array`
			`all_skills=$(jq -s 'add' <(echo "$global_skills") <(echo "$project_skills"))`

			`jq -n \`
			`--arg global_found "$global_found" \`
			`--argjson global_count "$global_count" \`
			`--arg project_found "$project_found" \`
			`--arg project_path "$project_path" \`
			`--argjson project_count "$project_count" \`
			`--argjson skills "$all_skills" \`
			`'{`
			`scan_summary: {`
			`global: { found: ($global_found == "true"), count: $global_count },`
			`project: { found: ($project_found == "true"), path: $project_path, count: $project_count }`
			`},`
			`skills: $skills`
			`}'`