Suggested changes to ICS → Discourse sync script
Here are my proposed changes to harden the script for production use.
Each change is shown with new code outside and the old code in a details pane for comparison where applicable.
Change 1 — Use JSON "tags"
(not "tags[]"
) when creating topics
Why: When sending JSON to /posts.json
, Discourse expects tags
as an array. tags[]
is for form-encoded payloads.
New
def create_topic(s, title, raw, category_id, tags):
payload = {
"title": title,
"raw": raw,
"category": int(category_id) if category_id else None,
"tags": tags or [] # JSON array key
}
r = s.post(f"{BASE}/posts.json", json=payload, timeout=30)
r.raise_for_status()
data = r.json()
return data["topic_id"], data["id"]
Old
def create_topic(s, title, raw, category_id, tags):
payload = {
"title": title,
"raw": raw,
"category": int(category_id) if category_id else None,
"tags[]": tags or []
}
r = s.post(f"{BASE}/posts.json", json=payload)
r.raise_for_status()
data = r.json()
return data["topic_id"], data["id"]
Change 2 — Robust HTTP with retries & timeouts (+ use for ICS fetch)
Why: Cron runs shouldn’t fail on transient 429/502/503 or slow endpoints.
New
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
DEFAULT_TIMEOUT = 30
class _TimeoutSession(requests.Session):
def request(self, *args, **kwargs):
kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
return super().request(*args, **kwargs)
def _session():
s = _TimeoutSession()
s.headers.update({
"Api-Key": API_KEY,
"Api-Username": API_USER,
"Content-Type": "application/json"
})
retry = Retry(
total=5, backoff_factor=0.5,
status_forcelist=(429, 500, 502, 503, 504),
allowed_methods=frozenset(["GET", "POST", "PUT"])
)
s.mount("https://", HTTPAdapter(max_retries=retry))
s.mount("http://", HTTPAdapter(max_retries=retry))
return s
Replace urllib ICS fetch with session:
s = _session()
if args.ics_url:
log.info(f"Fetching ICS: {args.ics_url}")
resp = s.get(args.ics_url)
resp.raise_for_status()
data = resp.content
else:
with open(args.ics_file, "rb") as f:
data = f.read()
cal = Calendar.from_ical(data)
Old
if args.ics_url:
import urllib.request
log.info(f"Fetching ICS: {args.ics_url}")
with urllib.request.urlopen(args.ics_url) as resp:
data = resp.read()
else:
with open(args.ics_file, "rb") as f:
data = f.read()
cal = Calendar.from_ical(data)
s = _session()
Change 3 — Namespaced UID tags with feed + UID hashes (collision-safe)
Why: Different ICS feeds may reuse the same UID values. Namespacing by feed plus UID hash prevents collisions.
Also enforces a tag length limit (default 30, override with DISCOURSE_TAG_MAX_LEN env).
New
# Tag length cap: defaults to 30, override with DISCOURSE_TAG_MAX_LEN env
TAG_MAX_LEN = max(int(os.environ.get("DISCOURSE_TAG_MAX_LEN", "30")), 15)
def _short_hash(s: str, n: int = 8) -> str:
return hashlib.sha1((s or "").encode("utf-8")).hexdigest()[:n]
def _feed_namespace(args) -> str:
if getattr(args, "feed_id", None):
return _short_hash(args.feed_id)
if getattr(args, "ics_url", None):
return _short_hash(args.ics_url)
if getattr(args, "ics_file", None):
return _short_hash(args.ics_file)
return _short_hash("default-ics-namespace")
def _uid_tag(feed_ns: str, uid: str, max_len: int = None) -> str:
max_len = max_len or TAG_MAX_LEN
base = _sanitize_tag(f"ics-{feed_ns}-uid")
uid8 = _short_hash(uid)
tag = f"{base}-{uid8}"
if len(tag) <= max_len:
return tag
overflow = len(tag) - max_len
base_trim = max(len(base) - overflow, 8)
tag = f"{base[:base_trim].rstrip('-')}-{uid8}"
return tag[:max_len].rstrip("-")
# inside process_vevent
feed_ns = _feed_namespace(args)
uid_tag = _uid_tag(feed_ns, uid, TAG_MAX_LEN)
Old
uid_tag = _sanitize_tag(f"uid-{uid}")
Change 4 — Skip unsupported recurrence masters
Why: If the ICS has RRULE but no expanded instances, importing the master is misleading.
New
def _is_recurrence_master(vevent):
return bool(vevent.get('rrule')) and not vevent.get('recurrence-id')
# inside process_vevent, after UID check
if _is_recurrence_master(vevent):
log.info(f"Skipping RRULE master (no expansion implemented) UID={uid}")
return
Change 5 — Deterministic tag order
Why: Avoid churn when Discourse reorders tags.
New
extra_tags = [t for t in (args.tags or []) if t]
tags = sorted(dict.fromkeys(DEFAULT_TAGS + extra_tags + [uid_tag]))
old
extra_tags = [t for t in (args.tags or []) if t]
tags = list(dict.fromkeys(DEFAULT_TAGS + extra_tags + [uid_tag]))
Change 6 — Enforce tag length for user/default tags
Why: Prevent API errors if any human-provided or default tag is too long. Overlong tags are skipped with a warning.
New (inside process_vevent, when building tags)
extra_tags_raw = [t for t in (args.tags or []) if t]
extra_tags = []
for t in extra_tags_raw:
st = _sanitize_tag(t)
if len(st) > TAG_MAX_LEN:
log.warning(f"Skipping overlong tag (> {TAG_MAX_LEN}): {st}")
continue
extra_tags.append(st)
default_tags_sane = []
for t in DEFAULT_TAGS:
st = _sanitize_tag(t)
if len(st) > TAG_MAX_LEN:
log.warning(f"Skipping overlong default tag (> {TAG_MAX_LEN}): {st}")
continue
default_tags_sane.append(st)
tags = sorted(dict.fromkeys(default_tags_sane + extra_tags + [uid_tag]))
(No “old” here — this is a new safeguard.)
With these changes:
• Each feed+UID has a unique tag, always within length limits.
• No transient failures on flaky networks.
• No collisions across different ICS sources.
• Human-provided tags are respected but skipped if invalid.
• Safe to run in cron without churn or surprises.