// /utils/profanityCheck.js

import { profanityList_en, profanityList_vn } from '../constants/ProfanityLists';
import { Filter } from 'bad-words';

// Initialize bad-words filter
const badWordsFilter = new Filter();

// Normalize text for English (case-insensitive, remove accents)
const normalizeText_en = (input) =>
    input.normalize('NFD').replace(/[\u0300-\u036f]/g, '').toLowerCase();

// Normalize text for Vietnamese (case-insensitive, keep tones and accents)
const normalizeText_vn = (input) => input.toLowerCase();

// Function to strip HTML tags
const stripHtmlTags = (input) => input.replace(/<\/?[^>]+(>|$)/g, '');

export const containsProfanity = (text) => {
    // 1. Sanitize and normalize input text
    const sanitizedText = stripHtmlTags(text); // Remove HTML tags

    // 2. Check Vietnamese list (case-insensitive, tones intact)
    const normalizedVietnameseText = normalizeText_vn(sanitizedText);
    const normalizedVietnameseList = profanityList_vn.map(normalizeText_vn);
    const vietnameseRegex = new RegExp(`\\b(${normalizedVietnameseList.join('|')})\\b`, 'g');
    if (normalizedVietnameseText.match(vietnameseRegex)) {
        return true; // Match found in Vietnamese list
    }

    // 3. Check bad-words library for English profanity
    if (badWordsFilter.clean(sanitizedText) !== sanitizedText) {
        return true; // Match found in bad-words library
    }

    // 4. Check English list (case-insensitive, remove accents)
    const normalizedEnglishText = normalizeText_en(sanitizedText);
    const normalizedEnglishList = profanityList_en.map(normalizeText_en);
    const englishRegex = new RegExp(`\\b(${normalizedEnglishList.join('|')})\\b`, 'g');
    if (normalizedEnglishText.match(englishRegex)) {
        return true; // Match found in English list
    }

    return false; // No profanity found
};