Well done and thanks @meglio, interesting results across a few Discourse instances.
I turned this into a parametrised query for Data Explorer (@riking awesome job on this plugin!)
most-used-words-per-user.dcquery (1).json (1.8 KB)
-- [params]
-- int :past_x_days = 90
-- int :top_x_words = 60
-- int :top_x_users = 100
WITH posters AS (
SELECT
sum (p.word_count ) AS total_words,
u.*
FROM
users u
LEFT JOIN posts p ON p.user_id = u.id
WHERE
p.created_at > now() ::date - ( :past_x_days + 1 )
GROUP BY
u.id
)
SELECT
u.id AS user_id,
(
SELECT
string_agg ( word || ' - ' || nentry, ' ' )
FROM (
SELECT
*
FROM
ts_stat ('
SELECT search_data FROM post_search_data psd
LEFT JOIN posts p
ON p.id = psd.post_id
WHERE p.created_at > now()::date - ' || (
:past_x_days + 1 )
|| ' AND p.user_id = ' || u.id
)
ORDER BY
nentry DESC,
ndoc DESC,
word
LIMIT :top_x_words
)
AS freq
)
AS top_words
FROM
posters AS u
ORDER BY
u.total_words DESC
LIMIT :top_x_users