複数の設定パラメータでシークレットフラグが誤って欠落しています

I noticed that multiple settings in admin/site_settings.json miss a secret: true flag even though they are actually secret. I checked more thoroughly and found these:

Potentially Sensitive site_settings

Total site_settings scanned: 1337
Potentially sensitive entries: 17

  • ai_hugging_face_tei_reranker_api_key
    Stores Hugging Face TEI API key
  • ai_sentiment_model_configs
    JSON schema expects per-model API credentials
  • chat_integration_matrix_access_token
    Matrix integration access token grants bot permissions
  • chat_integration_mattermost_incoming_webhook_token
    Incoming Mattermost webhook shared secret token
  • chat_integration_mattermost_webhook_url
    Mattermost webhook URL embeds secret token
  • chat_integration_rocketchat_webhook_url
    Rocket.Chat webhook URL embeds secret token
  • chat_integration_slack_access_token
    Slack bot access token
  • chat_integration_slack_incoming_webhook_token
    Slack incoming webhook token
  • chat_integration_slack_outbound_webhook_url
    Slack outbound webhook URL includes secret token
  • chat_integration_telegram_access_token
    Telegram bot access token
  • chat_integration_zulip_bot_api_key
    Zulip bot API key
  • discourse_subscriptions_secret_key
    Secret key used by discourse-subscriptions integration
  • discourse_subscriptions_webhook_secret
    Webhook signing secret for discourse-subscriptions
  • google_oauth2_hd_groups_service_account_json
    Service account JSON contains private key material
  • hcaptcha_secret_key
    hCaptcha secret key
  • microsoft_auth_client_secret
    Microsoft OAuth client secret
  • zendesk_incoming_webhook_token
    Zendesk webhook token

AI usage disclosure

I had github copilot generate the filtering script below that uses a bunch of keywords. That revealed some 90ish keys. I then had copilot review those and remove the false positives.

find_sensitive_settings.py
#!/usr/bin/env python3
from __future__ import annotations

import json
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple

ROOT = Path(__file__).resolve().parents[1]
SETTINGS_PATH = ROOT / "settings.json"
OUTPUT_MD_PATH = ROOT / "sensitive_site_settings_report.md"

NAME_KEYWORDS: Tuple[str, ...] = (
    "api_key",
    "apikey",
    "api-key",
    "access_key",
    "secret",
    "token",
    "password",
    "client_secret",
    "client_id",
    "private_key",
    "encryption_key",
    "webhook",
    "credential",
    "bearer",
    "oauth",
    "saml",
    "jwt",
)

TEXT_KEYWORDS: Tuple[str, ...] = (
    "api key",
    "access key",
    "secret",
    "token",
    "password",
    "client secret",
    "client id",
    "private key",
    "encryption key",
    "webhook",
    "credential",
    "bearer",
    "oauth",
    "saml",
    "jwt",
)


def strip_json_comments(text: str) -> str:
    result_chars: List[str] = []
    in_string = False
    in_single_line_comment = False
    in_multi_line_comment = False
    escape = False
    i = 0
    length = len(text)

    while i < length:
        char = text[i]
        next_char = text[i + 1] if i + 1 < length else ""

        if in_single_line_comment:
            if char == "\n":
                in_single_line_comment = False
                result_chars.append(char)
            i += 1
            continue

        if in_multi_line_comment:
            if char == "*" and next_char == "/":
                in_multi_line_comment = False
                i += 2
            else:
                i += 1
            continue

        if in_string:
            result_chars.append(char)
            if escape:
                escape = False
            elif char == "\\":
                escape = True
            elif char == "\"":
                in_string = False
            i += 1
            continue

        if char == "/" and next_char == "/":
            in_single_line_comment = True
            i += 2
            continue

        if char == "/" and next_char == "*":
            in_multi_line_comment = True
            i += 2
            continue

        if char == "\"":
            in_string = True
            result_chars.append(char)
            i += 1
            continue

        result_chars.append(char)
        i += 1

    return "".join(result_chars)


def load_settings() -> Dict[str, Any]:
    text = SETTINGS_PATH.read_text(encoding="utf-8")
    sanitized = strip_json_comments(text)
    return json.loads(sanitized)


def is_setting_node(node: Any) -> bool:
    if not isinstance(node, dict):
        return False
    if "value" not in node:
        return False
    metadata_keys = {"category", "description", "default", "type", "secret", "plugin"}
    return any(key in node for key in metadata_keys)


def find_settings(container: Dict[str, Any], prefix: List[str] | None = None) -> Iterable[Tuple[List[str], Dict[str, Any]]]:
    if prefix is None:
        prefix = []
    for key, value in container.items():
        current_path = prefix + [key]
        if is_setting_node(value):
            yield current_path, value
        elif isinstance(value, dict):
            yield from find_settings(value, current_path)


def text_contains_keyword(text: str, keywords: Tuple[str, ...]) -> Tuple[bool, str | None]:
    lower = text.lower()
    for keyword in keywords:
        if keyword in lower:
            return True, keyword
    return False, None


def scan_schema(schema: Any, reasons: List[str]) -> None:
    if isinstance(schema, dict):
        for key, value in schema.items():
            if key == "properties" and isinstance(value, dict):
                for prop_name in value.keys():
                    hit, keyword = text_contains_keyword(prop_name, NAME_KEYWORDS)
                    if hit and keyword:
                        reasons.append(f"json_schema property '{prop_name}' contains keyword '{keyword}'")
            else:
                scan_schema(value, reasons)
    elif isinstance(schema, list):
        for item in schema:
            scan_schema(item, reasons)


def analyze_setting(path: List[str], node: Dict[str, Any]) -> List[str]:
    reasons: List[str] = []
    key = path[-1]
    hit, keyword = text_contains_keyword(key, NAME_KEYWORDS)
    if hit and keyword:
        reasons.append(f"setting key contains keyword '{keyword}'")

    if node.get("secret"):
        reasons.append("'secret' flag is true")

    for field_name in ("description", "humanized_name", "placeholder"):
        value = node.get(field_name)
        if isinstance(value, str):
            hit, keyword = text_contains_keyword(value, TEXT_KEYWORDS)
            if hit and keyword:
                reasons.append(f"{field_name} contains keyword '{keyword}'")

    if "json_schema" in node:
        scan_schema(node["json_schema"], reasons)

    if node.get("type") == "secret":
        reasons.append("type is 'secret'")

    return reasons


def main() -> None:
    data = load_settings()
    sensitive: List[Dict[str, Any]] = []
    total_settings = 0

    site_settings = data.get("site_settings") if isinstance(data, dict) else None
    if isinstance(site_settings, dict):
        for path, node in find_settings(site_settings, ["site_settings"]):
            total_settings += 1
            reasons = analyze_setting(path, node)
            if reasons:
                sensitive.append(
                    {
                        "path": ".".join(path),
                        "reasons": reasons,
                    }
                )

    sensitive.sort(key=lambda item: item["path"])

    lines: List[str] = []
    lines.append("# Potentially Sensitive `site_settings`")
    lines.append("")
    lines.append(f"- Total site_settings scanned: {total_settings}")
    lines.append(f"- Potentially sensitive entries: {len(sensitive)}")
    lines.append("")

    for entry in sensitive:
        lines.append(f"- `{entry['path']}`")
        reasons_text = "; ".join(entry["reasons"])
        lines.append(f"  <!-- {reasons_text} -->")

    if not sensitive:
        lines.append("No potentially sensitive settings were detected.")

    lines.append("")
    OUTPUT_MD_PATH.write_text("\n".join(lines), encoding="utf-8")


if __name__ == "__main__":
    main()