This commit is contained in:
vuongps38770
2026-01-13 09:33:10 +07:00
parent 29544da4c6
commit 7c41ddaa82
9 changed files with 1362 additions and 599 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
game_registry.py - Tự động load games từ thư mục games/
Hệ thống sẽ:
1. Scan thư mục games/
1. Scan thư mục games/
2. Load mọi file .py (trừ _template.py và __init__.py)
3. Chỉ load games có active: True
4. Đăng ký tự động vào registry
@@ -10,6 +10,7 @@ Hệ thống sẽ:
THÊM GAME MỚI = TẠO FILE TRONG games/
BẬT/TẮT GAME = SỬA active: True/False trong file game
"""
import importlib.util
from pathlib import Path
from typing import Dict, List, Any, Optional
@@ -20,75 +21,78 @@ class GameRegistry:
"""
Registry tự động load games từ thư mục games/
Chỉ load games có active: True
Supports lookup by:
- game_type (string): "quiz", "sequence"
- type_id (int): 1, 2
"""
_instance: Optional["GameRegistry"] = None
_all_games: Dict[str, GameType] = {} # Keyed by game_type
_id_map: Dict[int, str] = {} # type_id -> game_type
_loaded: bool = False
def __new__(cls):
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._instance._all_games = {}
cls._instance._id_map = {}
return cls._instance
def __init__(self):
if not self._loaded:
self._load_all_games()
self._loaded = True
def _load_all_games(self):
"""Scan và load tất cả game definitions từ games/"""
games_dir = Path(__file__).parent / "games"
if not games_dir.exists():
print(f"⚠️ Games directory not found: {games_dir}")
return
for file_path in games_dir.glob("*.py"):
# Skip __init__.py và _template.py và base.py
if file_path.name.startswith("_") or file_path.name == "base.py":
continue
try:
game_def = self._load_game_from_file(file_path)
if game_def:
self._all_games[game_def.game_type] = game_def
if game_def.type_id > 0:
if game_def.type_id >= 0: # 0=quiz, 1=sequence are valid
self._id_map[game_def.type_id] = game_def.game_type
status = "" if game_def.active else "⏸️"
print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})")
print(
f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})"
)
except Exception as e:
print(f"❌ Error loading {file_path.name}: {e}")
def _load_game_from_file(self, file_path: Path) -> Optional[GameType]:
"""Load 1 game definition từ file"""
module_name = f"games.{file_path.stem}"
spec = importlib.util.spec_from_file_location(module_name, file_path)
if spec is None or spec.loader is None:
return None
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
config = getattr(module, "GAME_CONFIG", None)
examples = getattr(module, "EXAMPLES", [])
if config is None:
return None
# Inject examples if not in config
if examples and "examples" not in config:
config["examples"] = examples
return create_game_type(config)
def reload(self):
"""Reload tất cả games"""
self._all_games.clear()
@@ -96,55 +100,57 @@ class GameRegistry:
self._loaded = False
self._load_all_games()
self._loaded = True
# ============== PUBLIC API ==============
def get_game(self, game_type: str) -> Optional[GameType]:
"""Lấy game by game_type (chỉ active)"""
game = self._all_games.get(game_type)
return game if game and game.active else None
def get_game_by_id(self, type_id: int) -> Optional[GameType]:
"""Lấy game by type_id (chỉ active)"""
game_type = self._id_map.get(type_id)
if game_type:
return self.get_game(game_type)
return None
def get_game_type_by_id(self, type_id: int) -> Optional[str]:
"""Convert type_id -> game_type"""
return self._id_map.get(type_id)
def get_id_by_game_type(self, game_type: str) -> int:
"""Convert game_type -> type_id"""
"""Convert game_type -> type_id. Returns -1 if not found."""
game = self._all_games.get(game_type)
return game.type_id if game else 0
return game.type_id if game else -1 # -1 = not found
def get_all_games(self) -> Dict[str, GameType]:
"""Lấy tất cả games ACTIVE"""
return {k: v for k, v in self._all_games.items() if v.active}
def get_all_games_including_inactive(self) -> Dict[str, GameType]:
"""Lấy tất cả games (kể cả inactive)"""
return self._all_games.copy()
def get_game_types(self) -> List[str]:
"""Lấy danh sách game types ACTIVE"""
return [k for k, v in self._all_games.items() if v.active]
def get_type_ids(self) -> List[int]:
"""Lấy danh sách type_ids ACTIVE"""
return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0]
return [
v.type_id for v in self._all_games.values() if v.active and v.type_id > 0
]
def get_analyzer_context(self) -> str:
"""Tạo context cho Analyzer (chỉ từ active games)"""
context_parts = []
for game_type, game in self._all_games.items():
if not game.active:
continue
hints = game.analyzer_rules # New field name
hints = game.analyzer_rules # New field name
if hints:
hints_text = "\n - ".join(hints)
context_parts.append(
@@ -152,9 +158,9 @@ class GameRegistry:
f" Description: {game.description}\n"
f" Suitable when:\n - {hints_text}"
)
return "\n\n".join(context_parts)
def is_active(self, game_type: str) -> bool:
"""Kiểm tra game có active không"""
game = self._all_games.get(game_type)

View File

@@ -4,88 +4,180 @@ games/_template.py - TEMPLATE CHO GAME MỚI
THÊM GAME MỚI CHỈ CẦN:
1. Copy file này
2. Rename thành <game_type>.py (ví dụ: matching.py)
3. Sửa nội dung bên trong
3. Sửa nội dung bên trong theo hướng dẫn
4. DONE! Hệ thống tự động nhận diện.
Không cần sửa bất kỳ file nào khác!
"""
from typing import List, Optional
from typing import List, Literal, Optional
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# ============== 1. SCHEMA ==============
# ============== 1. ITEM SCHEMA ==============
# Định nghĩa structure của 1 item trong game
# BẮT BUỘC phải có: original_quote và explanation
# BẮT BUỘC phải có: original_quote
class YourGameItem(BaseModel):
"""Schema cho 1 item của game"""
# Các trường BẮT BUỘC (để chống hallucination)
# === TRƯỜNG BẮT BUỘC ===
original_quote: str = Field(
description="Trích dẫn NGUYÊN VĂN từ văn bản gốc"
description="EXACT quote from source text - dùng để verify không hallucinate"
)
explanation: str = Field(description="Giải thích")
# Thêm các trường riêng của game ở đây
# === TRƯỜNG RIÊNG CỦA GAME ===
# Thêm các trường cần thiết cho game của bạn
# Ví dụ:
# question: str = Field(description="Câu hỏi")
# answer: str = Field(description="Đáp án")
question: str = Field(description="The question")
answer: str = Field(description="The correct answer")
# === TRƯỜNG HÌNH ẢNH (Khuyến nghị) ===
image_description: str = Field(default="", description="Visual description in English")
image_keywords: List[str] = Field(default=[], description="2-3 English keywords for image search")
image_is_complex: bool = Field(default=False, description="True if needs precise quantities/humans/complex scene")
# ============== 2. CONFIG ==============
# Cấu hình cho game
# ============== 2. METADATA SCHEMA ==============
# Metadata mô tả nội dung được generate
class YourGameMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary")
grade: int = Field(description="Grade level 1-5 (1=easy, 5=advanced)")
type: Literal["your_game"] = Field(default="your_game", description="Game type - MUST match game_type below")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
# ============== 3. OUTPUT SCHEMA ==============
# Wrapper chứa danh sách items và metadata
class YourGameOutput(BaseModel):
"""Output wrapper - BẮT BUỘC phải có"""
items: List[YourGameItem] = Field(description="List of game items")
metadata: YourGameMetadata = Field(description="Metadata about the content")
# Output parser - tự động từ output schema
output_parser = PydanticOutputParser(pydantic_object=YourGameOutput)
# ============== 4. CONFIG ==============
# Cấu hình cho game - ĐÂY LÀ PHẦN QUAN TRỌNG NHẤT
GAME_CONFIG = {
# Key duy nhất cho game (dùng trong API)
"game_type": "your_game",
# === REQUIRED FIELDS ===
# Key duy nhất cho game (dùng trong API) - PHẢI unique
"game_type": "your_game",
# ID số nguyên unique - PHẢI khác các game khác
# Quiz=1, Sequence=2, ... tiếp tục từ 3
"type_id": 99, # TODO: Đổi thành số unique
# Tên hiển thị
"display_name": "Tên Game",
"display_name": "Your Game Name",
# Mô tả ngắn
"description": "Mô tả game của bạn",
"description": "Description of your game",
# Số lượng items
"max_items": 5,
# Trỏ đến schema class
# Schema classes - BẮT BUỘC
"schema": YourGameItem,
"output_schema": YourGameOutput,
"output_parser": output_parser,
# Prompt cho LLM
"system_prompt": """Bạn là chuyên gia tạo [tên game].
NHIỆM VỤ: [Mô tả nhiệm vụ]
QUY TẮC:
1. original_quote PHẢI là trích dẫn NGUYÊN VĂN
2. [Quy tắc khác]
3. [Quy tắc khác]""",
# === OPTIONAL FIELDS (có default) ===
# Game có active không
"active": True,
# Số lượng items tối đa
"max_items": 10,
# Rules validate input trước khi generate (Direct Mode)
"input_format_rules": [
"Text should contain ... suitable for this game.",
"Text MUST have ...",
],
# Rules cho Analyzer nhận diện game phù hợp
"analyzer_rules": [
"Text MUST contain ...",
"NOT suitable if text is ...",
],
# Rules cho Generator tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text",
"ALL content must come from source only - do NOT invent",
# Thêm rules riêng cho game của bạn
"Your specific rule 1",
"Your specific rule 2",
# Visual fields
"image_description: MUST be visual description in ENGLISH",
"image_keywords: MUST provide 2-3 English keywords",
"NEVER leave image fields empty!",
],
# Examples - giúp LLM học format
"examples": [] # Sẽ định nghĩa bên dưới
}
# ============== 3. EXAMPLES ==============
# Ví dụ input/output để:
# - Analyzer học khi nào nên suggest game này
# - Generator dùng làm few-shot
# ============== 5. EXAMPLES ==============
# Ví dụ input/output để LLM học pattern
EXAMPLES = [
{
# Input text mẫu
"input": "Văn bản mẫu ở đây...",
"input": "Sample text for your game...",
# Output mong đợi
# Output mong đợi - PHẢI match schema
"output": {
"items": [
{
"original_quote": "Trích dẫn từ văn bản",
"explanation": "Giải thích",
# Các trường khác của schema...
"original_quote": "Exact quote from input",
"question": "Sample question?",
"answer": "Sample answer",
"image_description": "Visual description",
"image_keywords": ["keyword1", "keyword2"],
"image_is_complex": False
}
]
],
"metadata": {
"title": "Sample Title",
"description": "Sample description",
"grade": 2,
"type": "your_game",
"difficulty": 2
}
},
# Analyzer học từ trường này
"why_suitable": "Giải thích tại sao văn bản này phù hợp với game này"
# Giải thích tại sao phù hợp - Analyzer học từ đây
"why_suitable": "Explain why this input is suitable for this game"
},
# Thêm 1-2 examples nữa...
# Thêm 1-2 examples nữa để LLM học tốt hơn...
]
# Gán examples vào config
GAME_CONFIG["examples"] = EXAMPLES
# ============== 6. POST PROCESS (Optional) ==============
# Function xử lý output sau khi LLM generate
def post_process_your_game(items: List[dict]) -> List[dict]:
"""Clean up hoặc transform items sau khi generate"""
for item in items:
# Ví dụ: clean up text
if item.get("answer"):
item["answer"] = item["answer"].strip()
return items
# Đăng ký handler (optional)
# GAME_CONFIG["post_process_handler"] = post_process_your_game

View File

@@ -1,139 +1,172 @@
"""
games/quiz.py - Quiz Game - Multiple choice questions
games/quiz.py - Optimized for LLM Performance while keeping System Integrity
"""
from typing import List, Literal
import re
from pydantic import BaseModel, Field
from typing import List, Literal, Optional
from pydantic import BaseModel, Field, field_validator
from langchain_core.output_parsers import PydanticOutputParser
import re
# ============== SCHEMA ==============
# ==========================================
# 1. OPTIMIZED SCHEMA (Thông minh hơn)
# ==========================================
class QuizItem(BaseModel):
question: str = Field(description="The question based on source content")
answers: str = Field(description="The correct answer")
options: List[str] = Field(description="List of options including correct answer")
original_quote: str = Field(description="EXACT quote from source text")
image_description: str = Field(default="", description="Visual description for the question")
image_keywords: List[str] = Field(default=[], description="Keywords for image search")
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects")
# LLM chỉ cần tập trung sinh ra raw data, việc clean để code lo
question: str = Field(description="Question text. Use ____ for blanks.")
# Request field có thể để default, logic xử lý sau
request: str = Field(
default="Choose the correct answer", description="Instruction type"
)
answer: str = Field(description="Correct answer text")
options: List[str] = Field(description="List of options")
original_quote: str = Field(description="Exact source sentence")
# Gom nhóm image fields để prompt gọn hơn
image_description: str = Field(
default="", description="Visual description (if needed)"
)
image_keywords: List[str] = Field(default=[])
image_is_complex: bool = Field(default=False)
@field_validator("answer", "options", mode="before")
@classmethod
def clean_prefixes(cls, v):
"""Tự động xóa A., B., (1)... ngay khi nhận dữ liệu từ LLM"""
def clean_str(text):
# Regex xóa (A), 1., Q: ở đầu và (1) ở cuối
text = re.sub(
r"^(\([A-Za-z0-9]\)|[A-Za-z0-9]\.|Q\d*:)\s*",
"",
str(text),
flags=re.IGNORECASE,
)
text = re.sub(r"\s*\([A-Za-z0-9]\)$", "", text)
return text.strip()
if isinstance(v, list):
return [clean_str(item) for item in v]
return clean_str(v)
class QuizMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(
description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title."
)
description: str = Field(
description="Short description summarizing the content/topic of the quiz."
)
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge."
)
type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')")
difficulty: int = Field(
description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts."
)
title: str = Field(description="Short content title")
description: str = Field(description="Summary")
grade: int = Field(description="Level 1-5")
type: Literal["quiz"] = "quiz"
difficulty: int = Field(description="Level 1-5")
class QuizOutput(BaseModel):
"""Output wrapper for quiz items"""
items: List[QuizItem] = Field(description="List of quiz items generated from source text")
metadata: QuizMetadata = Field(description="Metadata about the quiz content")
items: List[QuizItem]
metadata: QuizMetadata
# Output parser
output_parser = PydanticOutputParser(pydantic_object=QuizOutput)
# ==========================================
# 2. COMPACT CONFIG (Giữ đủ key, giảm nội dung)
# ==========================================
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
# --- SYSTEM FIELDS (Giữ nguyên không đổi) ---
"game_type": "quiz",
"display_name": "Quiz",
"description": "Multiple choice questions",
"type_id": 1,
"type_id": 0,
"active": True,
"max_items": 10,
"schema": QuizItem,
"output_schema": QuizOutput,
"output_parser": output_parser,
# --- USER UI HINTS (Rút gọn văn bản hiển thị) ---
"input_format_rules": [
"Text should contain facts or questions suitable for a quiz.",
"Prefer extracting existing multiple choice questions if available.",
"Text MUST contain questions with multiple choice options",
"Text must contain specific facts or Q&A content.",
"Suitable for multiple choice extraction.",
],
# 1. Recognition Rules (for Analyzer)
# --- PRE-CHECK LOGIC (Rút gọn) ---
"analyzer_rules": [
"Text MUST contain questions with multiple choice options",
"NOT suitable if text is just a list of words with no questions",
"Contains questions with options OR factual statements.",
"Not just a list of unconnected words.",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text (full question block)",
"ALL content must come from source only - do NOT invent",
"REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers",
"STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.",
"FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.",
"IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description relevant to the question in ENGLISH.",
"image_keywords: MUST provide 2-3 English keywords for search.",
"image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes",
"NEVER leave image fields empty!",
# --- LLM INSTRUCTIONS ---
"generation_rules": [
"MODE: STRICT EXTRACTION & LOCALITY PRIORITIZED.",
"1. MANDATORY OPTIONS & LOCALITY: Only create a quiz item if 2-4 options are EXPLICITLY present and located immediately after/below the question. SKIP if options are shared in a 'Word Box' or 'Word Bank' tại đầu/cuối trang.",
"2. ANSWER PRIORITY: Use the provided key if available. If the marker is empty, solve it yourself using grammar rules. Do not redefine existing keys.",
"3. ZERO FABRICATION: Do NOT invent distractors. Only extract what is explicitly present.",
"4. LOGICAL AMBIGUITY: If a question is grammatically correct with multiple options but lacks context, SKIP IT.",
"5. SEMANTIC OPTION EXTRACTION: Extract ONLY the meaningful word/phrase. Strip away ALL labels like (1), (A), or OCR noise.",
"6. SMART FILL-IN-THE-BLANK: If the question is a 'Fill in the blank' type, you MUST analyze the sentence structure and place the '____' at the grammatically correct position (e.g., 'Blood ____ oozing'). DO NOT blindly put it at the end. If the sentence is already a complete question (not a blank type), do not add '____'.",
"7. METADATA: Fill metadata accurately based on content. Do not leave empty."
],
# --- EXAMPLES (Chỉ giữ 1 cái tốt nhất để làm mẫu format) ---
"examples": [
{
"input": "The giraffe has a long neck. Options: neck, leg, tail.",
"output": {
"items": [
{
"question": "The giraffe has a long ____.",
"request": "Fill in the blank",
"answer": "neck",
"options": ["neck", "leg", "tail"],
"original_quote": "The giraffe has a long neck.",
"image_description": "A giraffe",
"image_keywords": ["giraffe"],
"image_is_complex": False,
}
],
"metadata": {
"title": "Animals",
"description": "Giraffe anatomy",
"grade": 2,
"type": "quiz",
"difficulty": 1,
},
},
"why_suitable": "Valid extraction: Text has Fact + Options.",
}
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
def clean_prefix(text: str) -> str:
"""Remove prefixes like (1), (A), 1., A. from text"""
if not text: return text
# Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces.
return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip()
# # ==========================================
# # 3. HANDLER (Logic hậu xử lý gọn nhẹ)
# # ==========================================
# def post_process_quiz(items: List[dict]) -> List[dict]:
# valid_items = []
# for item in items:
# options = item.get("options", [])
# answer = item.get("answer", "")
# if len(options) < 2:
# continue
# # Nếu có answer từ input, thì so khớp để làm sạch
# if answer:
# matched_option = next(
# (opt for opt in options if opt.lower() == answer.lower()), None
# )
# if matched_option:
# item["answer"] = matched_option
# # Nếu có answer mà không khớp option nào thì mới cân nhắc loại (hoặc để AI tự đoán lại)
# # Nếu answer rỗng (do ngoặc trống), ta vẫn giữ câu này lại
# # (với điều kiện LLM đã được dặn là phải tự điền vào trường answer)
# if not item.get("answer"):
# # Bạn có thể chọn loại bỏ hoặc tin tưởng vào đáp án LLM tự suy luận
# pass
# item["request"] = (
# "Fill in the blank"
# if "____" in item.get("question", "")
# else "Choose the correct answer"
# )
# valid_items.append(item)
# return valid_items
def post_process_quiz(items: List[dict]) -> List[dict]:
"""Clean up answers and options prefixes"""
for item in items:
# Clean answers
if item.get("answers"):
item["answers"] = clean_prefix(item["answers"])
# Clean options
if item.get("options") and isinstance(item["options"], list):
item["options"] = [clean_prefix(opt) for opt in item["options"]]
return items
# Register handler
GAME_CONFIG["post_process_handler"] = post_process_quiz
# ============== EXAMPLES ==============
EXAMPLES = [
{
"input": "The Sun is a star at the center of the Solar System.",
"output": {
"items": [{
"question": "Where is the Sun located?",
"answers": "At the center of the Solar System",
"options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"],
"original_quote": "The Sun is a star at the center of the Solar System.",
"image_description": "The sun in the middle of planets",
"image_keywords": ["sun", "planets"],
"image_is_complex": False
}]
},
"why_suitable": "Has clear facts"
}
]
# # Đăng ký handler
# GAME_CONFIG["post_process_handler"] = post_process_quiz

View File

@@ -1,6 +1,6 @@
"""
games/sequence.py - Arrange Sequence Game (Sentences OR Words)
type_id = 2
type_id = 1
LLM tự quyết định dựa vào ngữ nghĩa:
- "good morning", "apple", "happy" → WORD
- "Hi, I'm Lisa", "The sun rises" → SENTENCE
@@ -38,7 +38,7 @@ class SequenceMetadata(BaseModel):
description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences"
)
difficulty: int = Field(
description="Difficulty 1-5 for that grade."
description="Difficulty 1-3 for that grade."
)
@@ -52,59 +52,7 @@ class SequenceOutput(BaseModel):
output_parser = PydanticOutputParser(pydantic_object=SequenceOutput)
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 2,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
# ============== EXAMPLES ==============
@@ -171,3 +119,59 @@ EXAMPLES = [
"why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field"
}
]
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 1,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}

View File

@@ -74,7 +74,7 @@ DEFAULT_CONFIGS = {
"openai": ModelConfig(
provider="openai",
model_name="gpt-4o-mini",
temperature=0.1
temperature=0.1,
),
"openai_light": ModelConfig(
provider="openai",
@@ -117,13 +117,19 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
from langchain_google_genai import ChatGoogleGenerativeAI
api_key = config.api_key or os.getenv("GOOGLE_API_KEY")
print("Using GOOGLE_API_KEY:", api_key)
if not api_key:
raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key")
return ChatGoogleGenerativeAI(
model=config.model_name,
temperature=config.temperature,
google_api_key=api_key
google_api_key=api_key,
version="v1",
additional_headers={
"User-Agent": "PostmanRuntime/7.43.0",
"Accept": "*/*"
}
)
elif provider == "openai":
@@ -136,7 +142,8 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
return ChatOpenAI(
model=config.model_name,
temperature=config.temperature,
api_key=api_key
api_key=api_key,
base_url=config.base_url or None
)
else: