I noticed that multiple settings in admin/site_settings.json
miss a secret: true
flag even though they are actually secret. I checked more thoroughly and found these:
Potentially Sensitive site_settings
Total site_settings scanned: 1337
Potentially sensitive entries: 17
ai_hugging_face_tei_reranker_api_key
Stores Hugging Face TEI API keyai_sentiment_model_configs
JSON schema expects per-model API credentialschat_integration_matrix_access_token
Matrix integration access token grants bot permissionschat_integration_mattermost_incoming_webhook_token
Incoming Mattermost webhook shared secret tokenchat_integration_mattermost_webhook_url
Mattermost webhook URL embeds secret tokenchat_integration_rocketchat_webhook_url
Rocket.Chat webhook URL embeds secret tokenchat_integration_slack_access_token
Slack bot access tokenchat_integration_slack_incoming_webhook_token
Slack incoming webhook tokenchat_integration_slack_outbound_webhook_url
Slack outbound webhook URL includes secret tokenchat_integration_telegram_access_token
Telegram bot access tokenchat_integration_zulip_bot_api_key
Zulip bot API keydiscourse_subscriptions_secret_key
Secret key used by discourse-subscriptions integrationdiscourse_subscriptions_webhook_secret
Webhook signing secret for discourse-subscriptionsgoogle_oauth2_hd_groups_service_account_json
Service account JSON contains private key materialhcaptcha_secret_key
hCaptcha secret keymicrosoft_auth_client_secret
Microsoft OAuth client secretzendesk_incoming_webhook_token
Zendesk webhook token
AI usage disclosure
I had github copilot generate the filtering script below that uses a bunch of keywords. That revealed some 90ish keys. I then had copilot review those and remove the false positives.
find_sensitive_settings.py
#!/usr/bin/env python3
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, Iterable, List, Tuple
ROOT = Path(__file__).resolve().parents[1]
SETTINGS_PATH = ROOT / "settings.json"
OUTPUT_MD_PATH = ROOT / "sensitive_site_settings_report.md"
NAME_KEYWORDS: Tuple[str, ...] = (
"api_key",
"apikey",
"api-key",
"access_key",
"secret",
"token",
"password",
"client_secret",
"client_id",
"private_key",
"encryption_key",
"webhook",
"credential",
"bearer",
"oauth",
"saml",
"jwt",
)
TEXT_KEYWORDS: Tuple[str, ...] = (
"api key",
"access key",
"secret",
"token",
"password",
"client secret",
"client id",
"private key",
"encryption key",
"webhook",
"credential",
"bearer",
"oauth",
"saml",
"jwt",
)
def strip_json_comments(text: str) -> str:
result_chars: List[str] = []
in_string = False
in_single_line_comment = False
in_multi_line_comment = False
escape = False
i = 0
length = len(text)
while i < length:
char = text[i]
next_char = text[i + 1] if i + 1 < length else ""
if in_single_line_comment:
if char == "\n":
in_single_line_comment = False
result_chars.append(char)
i += 1
continue
if in_multi_line_comment:
if char == "*" and next_char == "/":
in_multi_line_comment = False
i += 2
else:
i += 1
continue
if in_string:
result_chars.append(char)
if escape:
escape = False
elif char == "\\":
escape = True
elif char == "\"":
in_string = False
i += 1
continue
if char == "/" and next_char == "/":
in_single_line_comment = True
i += 2
continue
if char == "/" and next_char == "*":
in_multi_line_comment = True
i += 2
continue
if char == "\"":
in_string = True
result_chars.append(char)
i += 1
continue
result_chars.append(char)
i += 1
return "".join(result_chars)
def load_settings() -> Dict[str, Any]:
text = SETTINGS_PATH.read_text(encoding="utf-8")
sanitized = strip_json_comments(text)
return json.loads(sanitized)
def is_setting_node(node: Any) -> bool:
if not isinstance(node, dict):
return False
if "value" not in node:
return False
metadata_keys = {"category", "description", "default", "type", "secret", "plugin"}
return any(key in node for key in metadata_keys)
def find_settings(container: Dict[str, Any], prefix: List[str] | None = None) -> Iterable[Tuple[List[str], Dict[str, Any]]]:
if prefix is None:
prefix = []
for key, value in container.items():
current_path = prefix + [key]
if is_setting_node(value):
yield current_path, value
elif isinstance(value, dict):
yield from find_settings(value, current_path)
def text_contains_keyword(text: str, keywords: Tuple[str, ...]) -> Tuple[bool, str | None]:
lower = text.lower()
for keyword in keywords:
if keyword in lower:
return True, keyword
return False, None
def scan_schema(schema: Any, reasons: List[str]) -> None:
if isinstance(schema, dict):
for key, value in schema.items():
if key == "properties" and isinstance(value, dict):
for prop_name in value.keys():
hit, keyword = text_contains_keyword(prop_name, NAME_KEYWORDS)
if hit and keyword:
reasons.append(f"json_schema property '{prop_name}' contains keyword '{keyword}'")
else:
scan_schema(value, reasons)
elif isinstance(schema, list):
for item in schema:
scan_schema(item, reasons)
def analyze_setting(path: List[str], node: Dict[str, Any]) -> List[str]:
reasons: List[str] = []
key = path[-1]
hit, keyword = text_contains_keyword(key, NAME_KEYWORDS)
if hit and keyword:
reasons.append(f"setting key contains keyword '{keyword}'")
if node.get("secret"):
reasons.append("'secret' flag is true")
for field_name in ("description", "humanized_name", "placeholder"):
value = node.get(field_name)
if isinstance(value, str):
hit, keyword = text_contains_keyword(value, TEXT_KEYWORDS)
if hit and keyword:
reasons.append(f"{field_name} contains keyword '{keyword}'")
if "json_schema" in node:
scan_schema(node["json_schema"], reasons)
if node.get("type") == "secret":
reasons.append("type is 'secret'")
return reasons
def main() -> None:
data = load_settings()
sensitive: List[Dict[str, Any]] = []
total_settings = 0
site_settings = data.get("site_settings") if isinstance(data, dict) else None
if isinstance(site_settings, dict):
for path, node in find_settings(site_settings, ["site_settings"]):
total_settings += 1
reasons = analyze_setting(path, node)
if reasons:
sensitive.append(
{
"path": ".".join(path),
"reasons": reasons,
}
)
sensitive.sort(key=lambda item: item["path"])
lines: List[str] = []
lines.append("# Potentially Sensitive `site_settings`")
lines.append("")
lines.append(f"- Total site_settings scanned: {total_settings}")
lines.append(f"- Potentially sensitive entries: {len(sensitive)}")
lines.append("")
for entry in sensitive:
lines.append(f"- `{entry['path']}`")
reasons_text = "; ".join(entry["reasons"])
lines.append(f" <!-- {reasons_text} -->")
if not sensitive:
lines.append("No potentially sensitive settings were detected.")
lines.append("")
OUTPUT_MD_PATH.write_text("\n".join(lines), encoding="utf-8")
if __name__ == "__main__":
main()