import { openai } from "./openai"; const MODERATION_MODEL = process.env.OPENAI_MODERATION_MODEL ?? "omni-moderation-latest"; export interface ModerationResult { flagged: boolean; /** OpenAI category keys that tripped, e.g. ["hate", "violence"]. */ categories: string[]; } /** * Screen text with OpenAI's moderation endpoint. * * Fails OPEN: if the moderation call errors (outage, quota), we log and return * `flagged: false` so a moderation hiccup never blocks the product. The result * is advisory — callers decide whether to reject input or flag generated output. */ export async function moderateText(input: string): Promise { const text = input.trim(); if (!text) return { flagged: false, categories: [] }; try { const res = await openai().moderations.create({ model: MODERATION_MODEL, input: text }); const result = res.results[0]; if (!result) return { flagged: false, categories: [] }; const categories = Object.entries(result.categories) .filter(([, tripped]) => tripped) .map(([key]) => key); return { flagged: result.flagged, categories }; } catch (err) { console.error("[moderation] check failed — failing open", err); return { flagged: false, categories: [] }; } } /** Short human-readable reason for a content flag from a moderation result. */ export function moderationReason(result: ModerationResult): string { const cats = result.categories.length ? result.categories.join(", ") : "policy violation"; return `Automated moderation flagged: ${cats}`; }