This commit is contained in:
128
import-vocab-to-vocab.js
Normal file
128
import-vocab-to-vocab.js
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
* Import vocabulary from JSON files into Vocab table
|
||||
* Each vocab word will become a Vocab record with:
|
||||
* - text: the vocabulary word
|
||||
* - base_word: same as text (can be updated later)
|
||||
* - grade: the grade/unit/lesson code (e.g., 101, 102, etc.)
|
||||
* - form_key: 'base' by default
|
||||
*/
|
||||
|
||||
const { sequelize } = require('./config/database');
|
||||
const { Vocab } = require('./models');
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
// Configuration
|
||||
const DATA_DIR = path.join(__dirname, 'data', 'familynfriend');
|
||||
const GRADES = ['g1', 'g2', 'g3', 'g4', 'g5'];
|
||||
|
||||
async function importVocabToVocabTable() {
|
||||
try {
|
||||
console.log('🔄 Starting vocab import to Vocab table...');
|
||||
|
||||
await sequelize.authenticate();
|
||||
console.log('✅ Database connection OK');
|
||||
|
||||
let totalImported = 0;
|
||||
let totalSkipped = 0;
|
||||
|
||||
// Process each grade folder
|
||||
for (const gradeFolder of GRADES) {
|
||||
const gradeDir = path.join(DATA_DIR, gradeFolder);
|
||||
|
||||
if (!fs.existsSync(gradeDir)) {
|
||||
console.log(`⚠️ Directory not found: ${gradeDir}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get all JSON files in the grade directory
|
||||
const jsonFiles = fs.readdirSync(gradeDir).filter(f => f.endsWith('.json'));
|
||||
|
||||
console.log(`\n📂 Processing ${gradeFolder.toUpperCase()} (${jsonFiles.length} files)...`);
|
||||
|
||||
for (const jsonFile of jsonFiles) {
|
||||
const filePath = path.join(gradeDir, jsonFile);
|
||||
console.log(` 📄 Reading ${jsonFile}...`);
|
||||
|
||||
try {
|
||||
const data = JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
||||
|
||||
if (!Array.isArray(data)) {
|
||||
console.log(` ⚠️ Skipping ${jsonFile}: not an array`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Process each lesson/unit entry
|
||||
for (const entry of data) {
|
||||
const { grade, vocab } = entry;
|
||||
|
||||
if (!grade || !vocab || !Array.isArray(vocab)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(` 📚 Grade ${grade}: ${vocab.length} vocab words`);
|
||||
|
||||
// Import each vocabulary word
|
||||
for (const word of vocab) {
|
||||
if (!word || word.trim() === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const trimmedWord = word.trim();
|
||||
|
||||
// Check if this vocab already exists (same text and grade)
|
||||
const existing = await Vocab.findOne({
|
||||
where: {
|
||||
text: trimmedWord
|
||||
}
|
||||
});
|
||||
|
||||
if (existing) {
|
||||
totalSkipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create new vocab entry
|
||||
await Vocab.create({
|
||||
text: trimmedWord,
|
||||
base_word: trimmedWord, // Default to same as text, can be updated later
|
||||
grade: parseInt(grade),
|
||||
form_key: 'base',
|
||||
vi: '', // Can be filled later
|
||||
category: null,
|
||||
topic: null,
|
||||
etc: 'familynfriend',
|
||||
is_active: true
|
||||
});
|
||||
|
||||
totalImported++;
|
||||
} catch (err) {
|
||||
console.error(` ❌ Error importing "${word}": ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(` ❌ Error reading ${jsonFile}: ${err.message}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('\n✅ Import complete!');
|
||||
console.log(`📊 Total imported: ${totalImported}`);
|
||||
console.log(`⏭️ Total skipped (duplicates): ${totalSkipped}`);
|
||||
|
||||
// Show final stats
|
||||
const totalCount = await Vocab.count();
|
||||
console.log(`📊 Total vocab in database: ${totalCount}`);
|
||||
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
console.error('❌ Error during import:', error.message);
|
||||
console.error(error.stack);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run the import
|
||||
importVocabToVocabTable();
|
||||
Reference in New Issue
Block a user