sena_db_api_layer/import-vocab-to-vocab.js

/**
 * Import vocabulary from JSON files into Vocab table
 * Each vocab word will become a Vocab record with:
 * - text: the vocabulary word
 * - base_word: same as text (can be updated later)
 * - grade: the grade/unit/lesson code (e.g., 101, 102, etc.)
 * - form_key: 'base' by default
 */

const { sequelize } = require('./config/database');
const { Vocab } = require('./models');
const fs = require('fs');
const path = require('path');

// Configuration
const DATA_DIR = path.join(__dirname, 'data', 'familynfriend');
const GRADES = ['g1', 'g2', 'g3', 'g4', 'g5'];

async function importVocabToVocabTable() {
  try {
    console.log('🔄 Starting vocab import to Vocab table...');

    await sequelize.authenticate();
    console.log('✅ Database connection OK');

    let totalImported = 0;
    let totalSkipped = 0;

    // Process each grade folder
    for (const gradeFolder of GRADES) {
      const gradeDir = path.join(DATA_DIR, gradeFolder);

      if (!fs.existsSync(gradeDir)) {
        console.log(`⚠️  Directory not found: ${gradeDir}`);
        continue;
      }

      // Get all JSON files in the grade directory
      const jsonFiles = fs.readdirSync(gradeDir).filter(f => f.endsWith('.json'));

      console.log(`\n📂 Processing ${gradeFolder.toUpperCase()} (${jsonFiles.length} files)...`);

      for (const jsonFile of jsonFiles) {
        const filePath = path.join(gradeDir, jsonFile);
        console.log(`  📄 Reading ${jsonFile}...`);

        try {
          const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));

          if (!Array.isArray(data)) {
            console.log(`  ⚠️  Skipping ${jsonFile}: not an array`);
            continue;
          }

          // Process each lesson/unit entry
          for (const entry of data) {
            const { grade, vocab } = entry;

            if (!grade || !vocab || !Array.isArray(vocab)) {
              continue;
            }

            console.log(`    📚 Grade ${grade}: ${vocab.length} vocab words`);

            // Import each vocabulary word
            for (const word of vocab) {
              if (!word || word.trim() === '') {
                continue;
              }

              try {
                const trimmedWord = word.trim();

                // Check if this vocab already exists (same text and grade)
                const existing = await Vocab.findOne({
                  where: {
                    text: trimmedWord
                  }
                });

                if (existing) {
                  totalSkipped++;
                  continue;
                }

                // Create new vocab entry
                await Vocab.create({
                  text: trimmedWord,
                  base_word: trimmedWord, // Default to same as text, can be updated later
                  grade: parseInt(grade),
                  form_key: 'base',
                  vi: '', // Can be filled later
                  category: null,
                  topic: null,
                  etc: 'familynfriend',
                  is_active: true
                });

                totalImported++;
              } catch (err) {
                console.error(`    ❌ Error importing "${word}": ${err.message}`);
              }
            }
          }
        } catch (err) {
          console.error(`  ❌ Error reading ${jsonFile}: ${err.message}`);
        }
      }
    }

    console.log('\n✅ Import complete!');
    console.log(`📊 Total imported: ${totalImported}`);
    console.log(`⏭️  Total skipped (duplicates): ${totalSkipped}`);

    // Show final stats
    const totalCount = await Vocab.count();
    console.log(`📊 Total vocab in database: ${totalCount}`);

    process.exit(0);
  } catch (error) {
    console.error('❌ Error during import:', error.message);
    console.error(error.stack);
    process.exit(1);
  }
}

// Run the import
importVocabToVocabTable();