我找到了 ai_api_audit_logs,我认为我已经找到了问题所在。
当发送翻译时,有一个 get_max_tokens 函数会根据文本长度分配最大 token 数。
问题是,它大部分都被推理消耗掉了。看看这个审计日志,限制设置为 1000,推理在开始生成输出之前就占用了全部 1000 个 token。
推理模型的限制应该高得多。
data: {"id":"chatcmpl-CQ7XU4Ep16RClb7OZQAxOXN9JWgIG","object":"chat.completion.chunk","created":1760341544,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null}}],"usage":null,"obfuscation":"dPNNK7ojEf"}
data: {"id":"chatcmpl-CQ7XU4Ep16RClb7OZQAxOXN9JWgIG","object":"chat.completion.chunk","created":1760341544,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[{"index":0,"delta":{},"finish_reason":"length"}],"usage":null,"obfuscation":"dM2r"}
data: {"id":"chatcmpl-CQ7XU4Ep16RClb7OZQAxOXN9JWgIG","object":"chat.completion.chunk","created":1760341544,"model":"gpt-5-2025-08-07","service_tier":"default","system_fingerprint":null,"choices":[],"usage":{"prompt_tokens":1075,"completion_tokens":1000,"total_tokens":2075,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":1000,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"j4"}
data: [DONE]