commit 31de8b0d844b1480d2176eba92bc2c281137933d Author: vuongps38770 <166083538+vuongps38770@users.noreply.github.com> Date: Thu Dec 25 18:06:29 2025 +0700 check point diff --git a/.env b/.env new file mode 100644 index 0000000..e69de29 diff --git a/API.md b/API.md new file mode 100644 index 0000000..893f6fa --- /dev/null +++ b/API.md @@ -0,0 +1,186 @@ +# Game Generator API + +## Game Types + +| type_id | game_type | Mô tả | +|---------|-----------|-------| +| 1 | quiz | Multiple choice questions | +| 2 | sequence_sentence | Sắp xếp câu | +| 3 | sequence_word | Sắp xếp từ | + +--- + +## 1. POST /generate + +Analyze + Generate nhiều games. + +**Request:** +```json +{ + "text": "Mặt Trời là ngôi sao...", + "enabled_game_ids": [1, 2], // optional, default: all + "max_items": 3, // optional, default: 3 + "min_score": 30, // optional, default: 30 + "run_validator": true // optional, default: true +} +``` + +**Response:** +```json +{ + "success": true, + "games": [1, 2], + "game_scores": [ + {"type_id": 1, "score": 85, "reason": "..."} + ], + "results": { + "1": [{"question": "...", "answers": "...", ...}], + "2": [{"sentence": "...", ...}] + }, + "token_usage": {"prompt_tokens": 100, "completion_tokens": 50}, + "llm": "gemini/gemini-2.0-flash-lite" +} +``` + +--- + +## 2. POST /generate/single + +1 API call = Analyze + Generate 1 game tốt nhất. + +**Request:** +```json +{ + "text": "Python là ngôn ngữ...", + "enabled_game_ids": [1, 2, 3], // optional + "max_items": 3 // optional +} +``` + +**Response:** +```json +{ + "success": true, + "type_id": 1, + "reason": "Text has clear facts", + "items": [{"question": "...", ...}], + "token_usage": {...}, + "llm": "..." +} +``` + +--- + +## 3. POST /generate/{type_id} + +Generate trực tiếp 1 game (không analyze). + +### Quiz (type_id = 1) + +**Input format:** +``` +Question: Thủ đô Việt Nam? +A. Hà Nội +B. TP HCM +C. Đà Nẵng +D. Huế +Correct: A +``` + +**Request:** +```json +{ + "text": "Question: ...\\nA. ...\\nB. ...\\nCorrect: A", + "max_items": 5 +} +``` + +### Sequence Sentence (type_id = 2) + +**Input format:** +``` +sentence1; sentence2; sentence3 +``` + +**Request:** +```json +{ + "text": "Mặt trời mọc; Chim hót; Người thức dậy", + "max_items": 10 +} +``` + +### Sequence Word (type_id = 3) + +**Input format:** +``` +word1; word2; word3 +``` + +**Request:** +```json +{ + "text": "Apple; Banana; Orange; Grape", + "max_items": 10 +} +``` + +**Response (all direct):** +```json +{ + "success": true, + "type_id": 1, + "items": [...], + "token_usage": {...}, + "llm": "..." +} +``` + +--- + +## 4. GET /games + +**Response:** +```json +{ + "total": 3, + "active_count": 3, + "games": [ + {"type_id": 1, "game_type": "quiz", "display_name": "Quiz", "active": true}, + {"type_id": 2, "game_type": "sequence_sentence", ...}, + {"type_id": 3, "game_type": "sequence_word", ...} + ] +} +``` + +--- + +## 5. POST /llm + +**Request:** +```json +{ + "provider": "gemini", + "model_name": "gemini-2.0-flash-lite", + "temperature": 0.1 +} +``` + +Ollama: +```json +{ + "provider": "ollama", + "model_name": "qwen2.5:14b", + "base_url": "http://localhost:11434" +} +``` + +--- + +## 6. Other Endpoints + +- `GET /llm` - Xem LLM config hiện tại +- `POST /reload` - Reload game definitions +- `GET /health` - Health check +- `POST /games/{game_type}/activate` - Bật game +- `POST /games/{game_type}/deactivate` - Tắt game diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..94beb07 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy source code +COPY . . + +# Expose port +EXPOSE 2009 + +# Start command +CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "2009"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..5ba5a19 --- /dev/null +++ b/README.md @@ -0,0 +1,100 @@ +# GAME GENERATOR + +## 🚀 TỐI ƯU API CALLS + +| Trước | Sau | +|-------|-----| +| Analyzer: 1 call | Analyzer: 1 call | +| Generate: N calls (1 per game) | Generate: **1 call** (tất cả games) | +| Validator: N calls | Validator: **0 call** (Python) | +| **Tổng: N+1 calls** | **Tổng: 1-2 calls** | + +--- + +## 📁 Cấu trúc + +``` +gen_using_graph/ +├── api.py # FastAPI server +├── requirements.txt +│ +└── src/ + ├── core.py # Core engine (tối ưu API calls) + ├── game_registry.py # Auto-load games + ├── validator.py # Hallucination check (không dùng API) + │ + └── games/ # Game definitions + ├── _template.py # Template + ├── quiz.py # Quiz game + └── fill_blank.py # Fill-blank game +``` + +--- + +## 🔌 API ENDPOINTS + +### Generate games +```bash +POST /generate +{ + "text": "Văn bản...", + "enabled_games": ["quiz", "fill_blank"], + "run_analyzer": true, + "run_validator": true, + "max_items": 3 +} + +# Response includes: +# "api_calls": 2 <-- Số lần gọi LLM +``` + +### Xem games +```bash +GET /games +``` + +### Bật/Tắt game +```bash +POST /games/quiz/activate +POST /games/quiz/deactivate +``` + +### Reload +```bash +POST /reload +``` + +--- + +## 🎮 THÊM GAME MỚI + +1. Copy `src/games/_template.py` → `src/games/new_game.py` +2. Sửa nội dung +3. Gọi `POST /reload` + +--- + +## ✅ BẬT/TẮT GAME + +```python +# Trong file game +GAME_CONFIG = { + "active": True, # Bật + "active": False, # Tắt +} +``` + +Hoặc qua API: +```bash +curl -X POST http://localhost:8000/games/quiz/deactivate +``` + +--- + +## 🚀 Chạy + +```bash +pip install -r requirements.txt +export GOOGLE_API_KEY=your_key +uvicorn api:app --port 8000 +``` diff --git a/api.py b/api.py new file mode 100644 index 0000000..e4fddca --- /dev/null +++ b/api.py @@ -0,0 +1,513 @@ +import os +from typing import List, Dict, Any, Optional +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from pathlib import Path +import re + +from src import ( + GameCore, get_registry, reload_games, + get_active_game_types, get_active_type_ids, + get_game_by_id, id_to_type, type_to_id, + ModelConfig +) + + +# ============== APP ============== +app = FastAPI( + title="Game Generator API", + description="API tạo game giáo dục từ văn bản", + version="2.0.0" +) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], +) + + +# ============== REQUEST/RESPONSE MODELS ============== + +class LLMConfigRequest(BaseModel): + provider: str = Field(default="gemini", description="ollama, gemini, openai") + model_name: str = Field(default="gemini-2.0-flash-lite") + api_key: Optional[str] = Field(default=None, description="API key (None = lấy từ env)") + temperature: float = Field(default=0.1) + base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama") + + +class GenerateRequest(BaseModel): + text: str = Field(description="Input text", min_length=10) + enabled_game_ids: Optional[List[int]] = Field(default=None, description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)") + run_analyzer: bool = Field(default=True) + run_validator: bool = Field(default=True) + max_items: Optional[int] = Field(default=3) + min_score: int = Field(default=50, description="Minimum score (0-100) for analyzer to include a game") + debug: bool = Field(default=False, description="Print prompts to server log") + + # LLM config (optional - override global) + llm_config: Optional[LLMConfigRequest] = Field(default=None, description="Override LLM config") + + +class TokenUsageResponse(BaseModel): + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + + +class GameScoreInfo(BaseModel): + type_id: int + score: int + reason: str = "" + + +class GameResultData(BaseModel): + """Structure thống nhất cho mỗi game result""" + items: List[Dict[str, Any]] = [] + metadata: Optional[Dict[str, Any]] = None + + +class CommonMetadataResponse(BaseModel): + """Metadata chung cho toàn bộ kết quả generate""" + title: str = "" + description: str = "" + grade: int = 0 + difficulty: int = 0 + + +class GenerateResponse(BaseModel): + success: bool + games: List[int] # type_ids + game_scores: List[GameScoreInfo] = [] + metadata: Optional[CommonMetadataResponse] = None # Metadata chung từ analyzer + results: Dict[int, GameResultData] # keyed by type_id, value is {items, metadata} + llm: Optional[str] = None + api_calls: Optional[int] = None + token_usage: Optional[TokenUsageResponse] = None + errors: List[str] = [] + + +class GameInfo(BaseModel): + type_id: int + game_type: str # Keep for reference + display_name: str + description: str + active: bool + max_items: int + + +class GamesListResponse(BaseModel): + total: int + active_count: int + games: List[GameInfo] + + +class ActionResponse(BaseModel): + success: bool + message: str + game_type: Optional[str] = None + active: Optional[bool] = None + + +class LLMConfigResponse(BaseModel): + provider: str + model_name: str + temperature: float + base_url: Optional[str] = None + + +# ============== GLOBAL ============== +_core: Optional[GameCore] = None +_current_config: Optional[ModelConfig] = None + + +def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore: + """Get or create GameCore with optional config override""" + global _core, _current_config + + if config_override: + # Create new core with override config + config = ModelConfig( + provider=config_override.provider, + model_name=config_override.model_name, + api_key=config_override.api_key, + temperature=config_override.temperature, + base_url=config_override.base_url + ) + return GameCore(llm_config=config) + + if _core is None: + # Default: tự detect từ env + _core = GameCore() + _current_config = _core.llm_config + + return _core + + +# ============== ENDPOINTS ============== + +@app.post("/generate", response_model=GenerateResponse) +async def generate_games(request: GenerateRequest): + """Generate games from text with scoring""" + try: + core = get_core(request.llm_config) + + # Convert type_ids to game_types + if request.enabled_game_ids: + games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] + else: + games = get_active_game_types() + + result = core.run_multi( + text=request.text, + enabled_games=games, + max_items=request.max_items or 3, + min_score=request.min_score, + validate=request.run_validator, + debug=request.debug + ) + + # Convert game_types to type_ids in response + game_ids = [type_to_id(g) for g in result.get("games", [])] + + # Convert game_scores + game_scores = [] + for s in result.get("game_scores", []): + game_scores.append(GameScoreInfo( + type_id=type_to_id(s.get("type", "")), + score=s.get("score", 0), + reason=s.get("reason", "") + )) + + # Convert results keys to type_ids + results_by_id = {} + for game_type, items in result.get("results", {}).items(): + tid = type_to_id(game_type) + if tid > 0: + results_by_id[tid] = items + + # Get common metadata from analyzer + core_meta = result.get("metadata", {}) + common_metadata = CommonMetadataResponse( + title=core_meta.get("title", ""), + description=core_meta.get("description", ""), + grade=core_meta.get("grade", 0), + difficulty=core_meta.get("difficulty", 0) + ) if core_meta else None + + return GenerateResponse( + success=result.get("success", False), + games=game_ids, + game_scores=game_scores, + metadata=common_metadata, + results=results_by_id, + llm=result.get("llm"), + token_usage=result.get("token_usage"), + errors=result.get("errors", []) + ) + + except Exception as e: + return GenerateResponse( + success=False, + games=[], + game_scores=[], + results={}, + errors=[str(e)] + ) + + +# ============== SINGLE BEST (1 PROMPT) ============== + +class SingleGenerateRequest(BaseModel): + text: str = Field(description="Input text", min_length=10) + enabled_game_ids: Optional[List[int]] = Field(default=None, description="Limit type_ids to choose from") + max_items: int = Field(default=3, description="Max items to generate") + run_validator: bool = Field(default=True) + debug: bool = Field(default=False) + llm_config: Optional[LLMConfigRequest] = Field(default=None) + + +class SingleGenerateResponse(BaseModel): + success: bool + type_id: Optional[int] = None + reason: Optional[str] = None + items: List[Dict[str, Any]] = [] + token_usage: Optional[TokenUsageResponse] = None + llm: Optional[str] = None + errors: List[str] = [] + + +@app.post("/generate/single", response_model=SingleGenerateResponse) +async def generate_single_game(request: SingleGenerateRequest): + """ + Generate 1 game phù hợp nhất trong 1 prompt duy nhất. + + - Analyze text để chọn game type tốt nhất + - Generate items cho game đó + - Tất cả trong 1 API call + """ + try: + core = get_core(request.llm_config) + + # Convert type_ids to game_types + if request.enabled_game_ids: + games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] + else: + games = None + + result = core.run_single( + text=request.text, + enabled_games=games, + max_items=request.max_items, + debug=request.debug, + validate=request.run_validator + ) + + # Convert game_type to type_id + game_type = result.get("game_type") + tid = type_to_id(game_type) if game_type else None + + return SingleGenerateResponse( + success=result.get("success", False), + type_id=tid, + reason=result.get("reason"), + items=result.get("items", []), + token_usage=result.get("token_usage"), + llm=result.get("llm"), + errors=result.get("errors", []) + ) + + except Exception as e: + return SingleGenerateResponse( + success=False, + errors=[str(e)] + ) + + +# ============== DIRECT GENERATE (1 game cụ thể, không analyze) ============== + +class DirectGenerateRequest(BaseModel): + text: str = Field(description="Input text", min_length=10) + max_items: int = Field(default=3, description="Max items to generate") + run_validator: bool = Field(default=True) + debug: bool = Field(default=False) + llm_config: Optional[LLMConfigRequest] = Field(default=None) + + +class DirectGenerateResponse(BaseModel): + """Response thống nhất, giống GenerateResponse nhưng cho 1 game""" + success: bool + games: List[int] = [] # Single type_id in list + results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse + is_format_error: bool = False + format_error: Optional[str] = None + token_usage: Optional[TokenUsageResponse] = None + llm: Optional[str] = None + errors: List[str] = [] + + +@app.post("/generate/{type_id}", response_model=DirectGenerateResponse) +async def generate_direct(type_id: int, request: DirectGenerateRequest): + """ + Generate 1 game cụ thể, KHÔNG analyze. + Response format giống với /generate nhưng chỉ có 1 game. + """ + try: + # Get game by type_id + game_type = id_to_type(type_id) + if not game_type: + return DirectGenerateResponse( + success=False, + games=[type_id], + errors=[f"Game with type_id={type_id} not found"] + ) + + core = get_core(request.llm_config) + + result = core.generate( + game_type=game_type, + text=request.text, + max_items=request.max_items, + validate=request.run_validator, + debug=request.debug + ) + + format_error = result.get("format_error") + data = result.get("data") or {} + + # Build results với structure thống nhất + game_result = GameResultData( + items=data.get("items", []) if isinstance(data, dict) else [], + metadata=data.get("metadata") if isinstance(data, dict) else None + ) + + return DirectGenerateResponse( + success=result.get("success", False), + games=[type_id], + results={type_id: game_result}, + is_format_error=format_error is not None, + format_error=format_error, + token_usage=result.get("token_usage"), + llm=result.get("llm"), + errors=result.get("errors", []) + ) + + except Exception as e: + return DirectGenerateResponse( + success=False, + games=[type_id], + errors=[str(e)] + ) + + +@app.get("/games", response_model=GamesListResponse) +async def list_games(): + """Lấy danh sách games""" + registry = get_registry() + all_games = registry.get_all_games_including_inactive() + + games_list = [] + active_count = 0 + + for game_type, game in all_games.items(): + games_list.append(GameInfo( + type_id=game.type_id, + game_type=game.game_type, + display_name=game.display_name, + description=game.description, + active=game.active, + max_items=game.max_items, + )) + if game.active: + active_count += 1 + + # Sort by type_id + games_list.sort(key=lambda g: g.type_id) + + return GamesListResponse( + total=len(games_list), + active_count=active_count, + games=games_list + ) + + +@app.post("/games/{game_type}/activate", response_model=ActionResponse) +async def activate_game(game_type: str): + """Bật game""" + return _set_game_active(game_type, True) + + +@app.post("/games/{game_type}/deactivate", response_model=ActionResponse) +async def deactivate_game(game_type: str): + """Tắt game""" + return _set_game_active(game_type, False) + + +def _set_game_active(game_type: str, active: bool) -> ActionResponse: + games_dir = Path(__file__).parent / "src" / "games" + game_file = games_dir / f"{game_type}.py" + + if not game_file.exists(): + raise HTTPException(404, f"Game '{game_type}' not found") + + content = game_file.read_text(encoding="utf-8") + pattern = r'("active"\s*:\s*)(True|False)' + new_value = "True" if active else "False" + + if not re.search(pattern, content): + raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py") + + new_content = re.sub(pattern, f'\\1{new_value}', content) + game_file.write_text(new_content, encoding="utf-8") + + reload_games() + + action = "activated" if active else "deactivated" + return ActionResponse( + success=True, + message=f"Game '{game_type}' has been {action}", + game_type=game_type, + active=active + ) + + +@app.get("/llm", response_model=LLMConfigResponse) +async def get_llm_config(): + """Xem LLM config hiện tại""" + global _current_config + + if _current_config is None: + core = get_core() + _current_config = core.llm_config + + return LLMConfigResponse( + provider=_current_config.provider, + model_name=_current_config.model_name, + temperature=_current_config.temperature, + base_url=_current_config.base_url + ) + + +@app.post("/llm", response_model=ActionResponse) +async def set_llm_config(config: LLMConfigRequest): + """Đổi LLM config global""" + global _core, _current_config + + new_config = ModelConfig( + provider=config.provider, + model_name=config.model_name, + api_key=config.api_key, + temperature=config.temperature, + base_url=config.base_url + ) + + try: + _core = GameCore(llm_config=new_config) + _current_config = new_config + + return ActionResponse( + success=True, + message=f"LLM changed to {config.provider}/{config.model_name}" + ) + except Exception as e: + return ActionResponse( + success=False, + message=f"Failed to change LLM: {str(e)}" + ) + + +@app.post("/reload", response_model=ActionResponse) +async def reload_all_games(): + """Reload games""" + global _core + + reload_games() + _core = None + + return ActionResponse( + success=True, + message=f"Reloaded. Active games: {get_active_game_types()}" + ) + + +@app.get("/health") +async def health_check(): + return { + "status": "healthy", + "active_games": get_active_game_types() + } + + +# ============== STARTUP ============== +@app.on_event("startup") +async def startup(): + print("🚀 Game Generator API started") + print(f"📋 Active games: {get_active_game_types()}") + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=2088) diff --git a/backup_source/match.py b/backup_source/match.py new file mode 100644 index 0000000..54241ea --- /dev/null +++ b/backup_source/match.py @@ -0,0 +1,81 @@ +""" +games/match.py - Match Game - Match sentences with images +""" +from typing import List +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class MatchItem(BaseModel): + word: str = Field(description="The sentence to be matched (EXACT copy from source)") + match_with: str = Field(description="Short keyword for reference") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Detailed visual description for image generation/search") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + + +class MatchOutput(BaseModel): + """Output wrapper for match items""" + items: List[MatchItem] = Field(description="List of match items generated from source text") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=MatchOutput) + + +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "match", + "display_name": "Match with Image", + "description": "Match sentences with images", + + "active": True, + + "min_items": 2, + "max_items": 10, + "schema": MatchItem, + "output_schema": MatchOutput, + "output_parser": output_parser, + + "system_prompt": """Extract sentences and create image descriptions for matching game. +The game will show images and players must match them with the correct sentences. + +YOUR TASK: +1. Extract meaningful sentences from the source text +2. Create a DETAILED image_description that clearly represents the sentence +3. The image should be distinct enough to match with its sentence + +CRITICAL RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text +2. original_quote MUST be an EXACT copy from source text +3. image_description must be DETAILED and SPECIFIC to the sentence content +4. Each image should be visually distinguishable from others""", +} + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "The Sun is a star. The Moon orbits Earth.", + "output": { + "items": [ + { + "word": "The Sun is a star.", + "match_with": "sun", + "original_quote": "The Sun is a star.", + "image_description": "A bright glowing yellow sun with solar flares", + "image_is_complex": False + }, + { + "word": "The Moon orbits Earth.", + "match_with": "moon", + "original_quote": "The Moon orbits Earth.", + "image_description": "A grey moon circling around the blue Earth planet", + "image_is_complex": False + } + ] + }, + "why_suitable": "Has distinct concepts that can be visualized and matched" + } +] diff --git a/backup_source/memory_card.py b/backup_source/memory_card.py new file mode 100644 index 0000000..1d2d6f1 --- /dev/null +++ b/backup_source/memory_card.py @@ -0,0 +1,61 @@ +""" +games/memory_card.py - Memory Card Game - Flip cards to find pairs +""" +from typing import List +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class MemoryCardItem(BaseModel): + name: str = Field(description="Card content/label") + pair_id: str = Field(description="ID to match pairs (same pair_id = matching cards)") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Visual description for the card") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + + +class MemoryCardOutput(BaseModel): + """Output wrapper for memory card items""" + items: List[MemoryCardItem] = Field(description="List of memory card items generated from source text") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=MemoryCardOutput) + + +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "memory_card", + "display_name": "Memory Card", + "description": "Flip cards to find pairs", + + "active": False, # Disabled + + "min_items": 4, + "max_items": 10, + "schema": MemoryCardItem, + "output_schema": MemoryCardOutput, + "output_parser": output_parser, + + "system_prompt": """Create memory card pairs. +CRITICAL RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text +2. original_quote MUST be an EXACT copy from source text +3. ALL content must come from the source text only""", +} + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "The Sun is a star.", + "output": { + "items": [ + {"name": "The Sun", "pair_id": "p1", "original_quote": "The Sun is a star.", "image_description": "A bright sun", "image_is_complex": False}, + {"name": "a star", "pair_id": "p1", "original_quote": "The Sun is a star.", "image_description": "A glowing star", "image_is_complex": False} + ] + }, + "why_suitable": "Has concept pairs" + } +] diff --git a/backup_source/sequence_sentence.py b/backup_source/sequence_sentence.py new file mode 100644 index 0000000..5fb4a02 --- /dev/null +++ b/backup_source/sequence_sentence.py @@ -0,0 +1,127 @@ +""" +games/sequence_sentence.py - Arrange Sentences Game +type_id = 2 +""" +from typing import List +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class SentenceItem(BaseModel): + sentence: str = Field(description="Full sentence to arrange (EXACT from source)") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Visual description of the content") + image_keywords: List[str] = Field(default=[], description="Keywords for image search") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + + +class SentenceMetadata(BaseModel): + """Metadata đánh giá nội dung""" + title: str = Field( + description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." + ) + description: str = Field( + description="Short description summarizing the content/topic." + ) + grade: int = Field( + description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts." + ) + type: str = Field(default="sequence_sentence", description="Game type") + difficulty: int = Field( + description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by sentence complexity, vocabulary." + ) + + +class SentenceOutput(BaseModel): + """Output wrapper for sentence items""" + items: List[SentenceItem] = Field(description="List of sentence items generated from source text") + metadata: SentenceMetadata = Field(description="Metadata about the content") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=SentenceOutput) + + +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "sequence_sentence", + "display_name": "Arrange Sentences", + "description": "Arrange sentences in order", + "type_id": 2, + + "active": True, + + "max_items": 10, + "schema": SentenceItem, + "output_schema": SentenceOutput, + "output_parser": output_parser, + + # Dùng cho analyze + generate (không có format rules) + "system_prompt": """Extract sentences from source text. + +RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate +2. sentence = EXACT copy from source text +3. original_quote = same as sentence value +4. image_description = ALWAYS provide a short visual description (NEVER empty) +5. image_is_complex = FALSE for simple/static objects, TRUE for quantities/humans/complex scenes""", + + # Dùng cho generate trực tiếp (CÓ format rules) + "direct_prompt": """Extract sentences from source text. + +EXPECTED INPUT: List of sentences (separated by semicolon, newline, or similar) + +STEP 1 - VALIDATE INPUT: +Analyze if input looks like a list of sentences suitable for "arrange sentences" game. +- Should contain multiple complete sentences +- Should NOT be a quiz, single word list, or Q&A format + +If input is clearly NOT suitable (e.g. it's a quiz, single words only, or wrong format), return: +{{"items": [], "format_error": "Input không phù hợp cho game sắp xếp câu"}} + +STEP 2 - EXTRACT (if valid): +RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate +2. Extract ALL sentences from source +3. sentence = EXACT sentence from source (trim whitespace) +4. original_quote = same as sentence value +5. image_description = ALWAYS provide a short visual description (NEVER leave empty) +6. image_is_complex: + - FALSE: simple objects, static things, no specific quantities (e.g. "a sun", "a tree") + - TRUE: needs exact quantities, humans/people, or complex details (e.g. "3 birds", "a boy reading")""", +} + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "The Sun is a star; The Moon orbits Earth; Mars is red", + "output": { + "items": [ + { + "sentence": "The Sun is a star", + "original_quote": "The Sun is a star", + "image_description": "A bright glowing sun", + "image_keywords": ["sun", "star"], + "image_is_complex": False + }, + { + "sentence": "The Moon orbits Earth", + "original_quote": "The Moon orbits Earth", + "image_description": "Moon circling around Earth", + "image_keywords": ["moon", "earth", "orbit"], + "image_is_complex": False + }, + { + "sentence": "Mars is red", + "original_quote": "Mars is red", + "image_description": "Red planet Mars", + "image_keywords": ["mars", "red", "planet"], + "image_is_complex": False + } + ] + }, + "why_suitable": "Source has sentences separated by semicolons" + } +] diff --git a/backup_source/sequence_word.py b/backup_source/sequence_word.py new file mode 100644 index 0000000..f6e6a38 --- /dev/null +++ b/backup_source/sequence_word.py @@ -0,0 +1,134 @@ +""" +games/sequence_word.py - Arrange Words Game +type_id = 3 +""" +from typing import List +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class WordItem(BaseModel): + word: str = Field(description="Word or phrase to arrange (EXACT from source)") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Visual description of the content") + image_keywords: List[str] = Field(default=[], description="Keywords for image search") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + + +class WordMetadata(BaseModel): + """Metadata đánh giá nội dung""" + title: str = Field( + description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." + ) + description: str = Field( + description="Short description summarizing the content/topic." + ) + grade: int = Field( + description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary complexity." + ) + type: str = Field(default="sequence_word", description="Game type") + difficulty: int = Field( + description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by word complexity, number of items." + ) + + +class WordOutput(BaseModel): + """Output wrapper for word items""" + items: List[WordItem] = Field(description="List of word items generated from source text") + metadata: WordMetadata = Field(description="Metadata about the content") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=WordOutput) + + +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "sequence_word", + "display_name": "Arrange Words", + "description": "Arrange words or phrases in order", + "type_id": 3, + + "active": True, + + "max_items": 10, + "schema": WordItem, + "output_schema": WordOutput, + "output_parser": output_parser, + + # Dùng cho analyze + generate (không có format rules) + "system_prompt": """Extract words or phrases from source text. + +RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate +2. word = EXACT copy from source text +3. original_quote = same as word value +4. image_description = ALWAYS provide a short visual description (NEVER empty) +5. image_is_complex = FALSE for simple/static objects, TRUE for quantities/humans/complex scenes""", + + # Dùng cho generate trực tiếp (CÓ format rules) + "direct_prompt": """Extract words or phrases from source text. + +EXPECTED INPUT: List of words/phrases (separated by semicolon, comma, newline, or similar) + +STEP 1 - VALIDATE INPUT: +Analyze if input looks like a list of words/phrases suitable for "arrange words" game. +- Should contain multiple short words or phrases +- Should NOT be a paragraph, essay, or Q&A format + +If input is clearly NOT suitable (e.g. it's a quiz, a long paragraph, or wrong format), return: +{{"items": [], "format_error": "Input không phù hợp cho game sắp xếp từ"}} + +STEP 2 - EXTRACT (if valid): +RULES: +1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate +2. Extract ALL words/phrases from source +3. word = EXACT word/phrase from source (trim whitespace) +4. original_quote = same as word value +5. image_description = ALWAYS provide a short visual description (NEVER leave empty) +6. image_is_complex: + - FALSE: simple objects, static things, no specific quantities (e.g. "an apple", "a book") + - TRUE: needs exact quantities, humans/people, or complex details (e.g. "5 oranges", "a woman cooking")""", +} + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "Apple; Banana; Orange; Grape", + "output": { + "items": [ + { + "word": "Apple", + "original_quote": "Apple", + "image_description": "A red apple", + "image_keywords": ["apple"], + "image_is_complex": False + }, + { + "word": "Banana", + "original_quote": "Banana", + "image_description": "A yellow banana", + "image_keywords": ["banana"], + "image_is_complex": False + }, + { + "word": "Orange", + "original_quote": "Orange", + "image_description": "An orange fruit", + "image_keywords": ["orange"], + "image_is_complex": False + }, + { + "word": "Grape", + "original_quote": "Grape", + "image_description": "Purple grapes", + "image_keywords": ["grape"], + "image_is_complex": False + } + ] + }, + "why_suitable": "Source has words separated by semicolons" + } +] diff --git a/postman_collection.json b/postman_collection.json new file mode 100644 index 0000000..31a028b --- /dev/null +++ b/postman_collection.json @@ -0,0 +1,265 @@ +{ + "info": { + "name": "Game Generator API", + "description": "API tạo game giáo dục từ văn bản", + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" + }, + "item": [ + { + "name": "📊 Generate Multi", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"text\": \"Mặt Trời là ngôi sao ở trung tâm của Hệ Mặt Trời.\",\n \"enabled_game_ids\": [1, 2],\n \"max_items\": 3\n}" + }, + "url": { + "raw": "http://localhost:8000/generate", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "generate" + ] + }, + "description": "Analyze + Generate nhiều games\n\nREQUEST:\n• text (required)\n• enabled_game_ids: [1,2,3] (optional)\n• max_items: 3 (optional)\n• min_score: 30 (optional)\n• run_validator: true (optional)\n\nRESPONSE:\n• games: [1, 2]\n• results: {1: [...], 2: [...]}" + } + }, + { + "name": "🎯 Generate Single Best", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"text\": \"Python là ngôn ngữ lập trình phổ biến.\",\n \"max_items\": 3\n}" + }, + "url": { + "raw": "http://localhost:8000/generate/single", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "generate", + "single" + ] + }, + "description": "1 API call = Analyze + Generate 1 game tốt nhất\n\nRESPONSE:\n• type_id: 1\n• reason: \"...\"\n• items: [...]" + } + }, + { + "name": "🎮 Direct Quiz (type_id=1)", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"text\": \"Question: Thủ đô Việt Nam?\\nA. Hà Nội\\nB. TP HCM\\nC. Đà Nẵng\\nD. Huế\\nCorrect: A\",\n \"max_items\": 5\n}" + }, + "url": { + "raw": "http://localhost:8000/generate/1", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "generate", + "1" + ] + }, + "description": "Generate Quiz trực tiếp\n\nINPUT FORMAT:\nQuestion: ...\nA. ...\nB. ...\nC. ...\nD. ...\nCorrect: A" + } + }, + { + "name": "🎮 Direct Sentence (type_id=2)", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"text\": \"Mặt trời mọc; Chim hót; Người thức dậy\",\n \"max_items\": 10\n}" + }, + "url": { + "raw": "http://localhost:8000/generate/2", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "generate", + "2" + ] + }, + "description": "Generate Arrange Sentences trực tiếp\n\nINPUT FORMAT:\nsentence1; sentence2; sentence3" + } + }, + { + "name": "🎮 Direct Word (type_id=3)", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"text\": \"Apple; Banana; Orange; Grape\",\n \"max_items\": 10\n}" + }, + "url": { + "raw": "http://localhost:8000/generate/3", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "generate", + "3" + ] + }, + "description": "Generate Arrange Words trực tiếp\n\nINPUT FORMAT:\nword1; word2; word3" + } + }, + { + "name": "📋 List Games", + "request": { + "method": "GET", + "url": { + "raw": "http://localhost:8000/games", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "games" + ] + }, + "description": "Danh sách games\n\nRESPONSE:\n[\n {type_id: 1, game_type: \"quiz\", ...},\n {type_id: 2, ...},\n {type_id: 3, ...}\n]" + } + }, + { + "name": "⚙️ Get LLM", + "request": { + "method": "GET", + "url": { + "raw": "http://localhost:8000/llm", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "llm" + ] + } + } + }, + { + "name": "⚙️ Set LLM - Gemini", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"provider\": \"gemini\",\n \"model_name\": \"gemini-2.0-flash-lite\"\n}" + }, + "url": { + "raw": "http://localhost:8000/llm", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "llm" + ] + } + } + }, + { + "name": "⚙️ Set LLM - Ollama", + "request": { + "method": "POST", + "header": [ + { + "key": "Content-Type", + "value": "application/json" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"provider\": \"ollama\",\n \"model_name\": \"qwen2.5:14b\",\n \"base_url\": \"http://localhost:11434\"\n}" + }, + "url": { + "raw": "http://localhost:8000/llm", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "llm" + ] + } + } + }, + { + "name": "🔄 Reload Games", + "request": { + "method": "POST", + "url": { + "raw": "http://localhost:8000/reload", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "reload" + ] + } + } + }, + { + "name": "❤️ Health", + "request": { + "method": "GET", + "url": { + "raw": "http://localhost:8000/health", + "host": [ + "localhost" + ], + "port": "8000", + "path": [ + "health" + ] + } + } + } + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2d32f14 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,20 @@ +# Game Generator - Dependencies + +# LangChain Core +langchain>=0.1.0 +langchain-core>=0.1.0 + +# LLM Providers +langchain-google-genai>=1.0.0 +langchain-openai>=0.0.5 +langchain-ollama>=0.1.0 + +# Pydantic +pydantic>=2.0.0 + +# API Server +fastapi>=0.100.0 +uvicorn>=0.23.0 + +# Utilities +python-dotenv>=1.0.0 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..8f811b0 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,44 @@ +""" +src - Game Generator Core Package +""" +from src.core import GameCore +from src.game_registry import ( + GameRegistry, + get_registry, + reload_games, + get_active_game_types, + get_active_type_ids, + get_game_by_id, + get_game, + id_to_type, + type_to_id +) +from src.llm_config import ModelConfig, get_llm, get_default_config, create_config +from src.validator import QuoteValidator, quick_validate + + +__all__ = [ + # Core + "GameCore", + + # Registry + "GameRegistry", + "get_registry", + "reload_games", + "get_active_game_types", + "get_active_type_ids", + "get_game_by_id", + "get_game", + "id_to_type", + "type_to_id", + + # LLM Config + "ModelConfig", + "get_llm", + "get_default_config", + "create_config", + + # Validator + "QuoteValidator", + "quick_validate", +] diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..227e8be Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/__pycache__/core.cpython-310.pyc b/src/__pycache__/core.cpython-310.pyc new file mode 100644 index 0000000..15eb764 Binary files /dev/null and b/src/__pycache__/core.cpython-310.pyc differ diff --git a/src/__pycache__/game_registry.cpython-310.pyc b/src/__pycache__/game_registry.cpython-310.pyc new file mode 100644 index 0000000..bedb8eb Binary files /dev/null and b/src/__pycache__/game_registry.cpython-310.pyc differ diff --git a/src/__pycache__/llm_config.cpython-310.pyc b/src/__pycache__/llm_config.cpython-310.pyc new file mode 100644 index 0000000..393f190 Binary files /dev/null and b/src/__pycache__/llm_config.cpython-310.pyc differ diff --git a/src/__pycache__/validator.cpython-310.pyc b/src/__pycache__/validator.cpython-310.pyc new file mode 100644 index 0000000..843197f Binary files /dev/null and b/src/__pycache__/validator.cpython-310.pyc differ diff --git a/src/core.py b/src/core.py new file mode 100644 index 0000000..3624d14 --- /dev/null +++ b/src/core.py @@ -0,0 +1,513 @@ +""" +core.py - Simple Game Generator Core + +3 USE CASES: +1. run_multi() - Analyze + generate nhiều games phù hợp +2. run_single() - Analyze + generate 1 game tốt nhất (1 API call) +3. generate() - Generate 1 game cụ thể (không analyze) +""" +import os +import json +import time +from typing import List, Dict, Any, Optional, Union +from dataclasses import dataclass + +from langchain_core.prompts import ChatPromptTemplate + +from .llm_config import ModelConfig, get_llm, get_default_config +from .game_registry import get_registry, get_game, get_analyzer_context +from .validator import QuoteValidator +from .logger import logger + + +@dataclass +class TokenUsage: + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + + def add(self, usage: Dict[str, int]): + self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0) + self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0) + self.total_tokens = self.prompt_tokens + self.completion_tokens + + def to_dict(self) -> Dict[str, int]: + return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens} + + +class GameCore: + """ + Simple Game Generator. + + Usage: + core = GameCore() + + # 1. Generate nhiều games (analyze first) + result = core.run_multi(text) + + # 2. Generate 1 game tốt nhất (1 API call) + result = core.run_single(text) + + # 3. Generate 1 game cụ thể + result = core.generate("quiz", text) + """ + + def __init__(self, llm_config: Optional[Union[ModelConfig, Dict, str]] = None): + self.llm_config = self._parse_config(llm_config) + self.llm = get_llm(self.llm_config) + self.validator = QuoteValidator() + self.registry = get_registry() + print(f"🤖 LLM: {self.llm_config.provider}/{self.llm_config.model_name}") + + def _parse_config(self, config) -> ModelConfig: + if config is None: + if os.getenv("GOOGLE_API_KEY"): + return get_default_config("gemini") + elif os.getenv("OPENAI_API_KEY"): + return get_default_config("openai") + return get_default_config("ollama") + + if isinstance(config, ModelConfig): + return config + if isinstance(config, str): + return get_default_config(config) + if isinstance(config, dict): + return ModelConfig(**config) + raise ValueError(f"Invalid config: {type(config)}") + + # ============== 1. RUN MULTI (Analyze + Generate nhiều games) ============== + + def run_multi( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + min_score: int = 20, + validate: bool = True, + debug: bool = False + ) -> Dict[str, Any]: + """ + Analyze text + Generate nhiều games phù hợp. + + Returns: {success, games, results, errors, token_usage, llm} + """ + tracker = TokenUsage() + errors = [] + + # 1. Analyze (also returns metadata) + available = enabled_games or self.registry.get_game_types() + logger.info(f"Analyzing text for multi-gen. Available games: {available}") + games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug) + errors.extend(err) + + if not games: + logger.warning("Analyzer found no suitable games matches.") + return self._result(False, [], {}, errors, tracker, metadata=metadata) + + logger.info(f"Analyzer selected: {games}") + + # 2. Generate + results, err = self._generate_multi(games, text, max_items, tracker, debug) + errors.extend(err) + + # 3. Validate + if validate: + results = self._validate(results, text) + + # Check if any game has items + has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict)) + return self._result(has_items, games, results, errors, tracker, scores, metadata) + + # ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ============== + + def run_single( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + validate: bool = True, + debug: bool = False + ) -> Dict[str, Any]: + """ + 1 API call: Analyze + Generate game tốt nhất. + + Returns: {success, game_type, reason, items, errors, token_usage, llm} + """ + tracker = TokenUsage() + available = enabled_games or self.registry.get_game_types() + logger.info(f"Starting run_single for available games: {available}") + + # Build games info + games_info = [] + for gt in available: + game = get_game(gt) + if game: + example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}" + games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}") + + prompt = ChatPromptTemplate.from_messages([ + ("system", """You are an educational game generator. +1. ANALYZE text and CHOOSE the BEST game type +2. GENERATE items for that game + +RULES: +- KEEP original language +- original_quote = EXACT copy from source +- ALL content from source only"""), + ("human", """GAMES: +{games_info} + +TEXT: +{text} + +Choose BEST game from: {types} +Generate max {max_items} items. + +Return JSON: +{{"game_type": "chosen", "reason": "why", "items": [...]}}""") + ]) + + content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items} + + if debug: + print(f"\n{'='*50}\n🎯 RUN SINGLE\n{'='*50}") + + try: + resp = (prompt | self.llm).invoke(content) + tracker.add(self._get_usage(resp)) + + data = self._parse_json(resp.content) + game_type = data.get("game_type") + items = self._post_process(data.get("items", []), game_type) + + if validate and items: + items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] + + return { + "success": len(items) > 0, + "game_type": game_type, + "reason": data.get("reason", ""), + "items": items, + "errors": [], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } + except Exception as e: + return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} + + # ============== 3. GENERATE (1 game cụ thể, không analyze) ============== + + def generate( + self, + game_type: str, + text: str, + max_items: int = 3, + validate: bool = True, + debug: bool = False + ) -> Dict[str, Any]: + """Generate 1 game cụ thể""" + tracker = TokenUsage() + logger.info(f"Generating single game content: {game_type}") + + game = get_game(game_type) + + if not game: + return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""} + + # Build Format Rules Section + format_rules_section = "" + if game.input_format_rules: + rules_str = "\n".join(f"- {r}" for r in game.input_format_rules) + format_rules_section = f""" +CRITICAL: FIRST, VALIDATE THE INPUT TEXT. +Format Rules: +{rules_str} + +If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else: +{{{{ "format_error": "Input text incompatible with game requirements." }}}} +""" + + prompt = ChatPromptTemplate.from_messages([ + ("system", f"""{game.generated_system_prompt} +{format_rules_section}"""), + ("human", """TEXT TO PROCESS: +{text} + +Generate content in JSON format: +{format_instructions}""") + ]) + + if debug: + print(f"\n{'='*50}\n🎮 GENERATE: {game_type}\n{'='*50}") + + try: + resp = (prompt | self.llm).invoke({ + "text": text, + "format_instructions": game.format_instructions + }) + tracker.add(self._get_usage(resp)) + + # 1. Parse as raw JSON first to check for format_error + raw_data = None + try: + raw_data = self._parse_json(resp.content) + except: + pass + + # 2. Check if it's a format_error immediately + if raw_data and raw_data.get("format_error"): + return { + "success": False, + "game_type": game_type, + "data": None, + "format_error": raw_data["format_error"], + "errors": [raw_data["format_error"]], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } + + parsed_data = raw_data + + # 3. Try output_parser for structured validation if present + if game.output_parser: + try: + parsed = game.output_parser.parse(resp.content) + parsed_data = parsed.model_dump() + except Exception as pe: + if debug: print(f"⚠️ output_parser failed: {pe}") + # Keep raw_data if parser fails but we have JSON + + + # Check format error + if parsed_data and parsed_data.get("format_error"): + return { + "success": False, + "game_type": game_type, + "data": None, + "format_error": parsed_data["format_error"], + "errors": [parsed_data["format_error"]], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } + + # Post-process + items = parsed_data.get("items", []) if parsed_data else [] + items = self._post_process(items, game_type) + + if validate and items: + items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] + + if not items: + return { + "success": False, + "game_type": game_type, + "data": None, + "format_error": "No items extracted", + "errors": [], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } + + if parsed_data: + parsed_data["items"] = items + + return { + "success": True, + "game_type": game_type, + "data": parsed_data, + "errors": [], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } + except Exception as e: + return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} + + # ============== PRIVATE METHODS ============== + + def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple: + """Analyze text để suggest games - với retry""" + # Lấy context từ game configs + context = get_analyzer_context() + + prompt = ChatPromptTemplate.from_messages([ + ("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements. + +GAME REQUIREMENTS: +{context} + +SCORING: +- 70-100: Text matches game requirements well +- 40-69: Partial match +- 0-39: Does not match requirements + +IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field. + +Return valid JSON with scores AND metadata about the content: +{{ + "scores": [ + {{ + "type": "NAME_OF_GAME_TYPE", + "score": 80, + "reason": "..." + }} + ], + "metadata": {{ + "title": "Title from source or create short title", + "description": "One sentence summary", + "grade": 1-5, + "difficulty": 1-5 + }} +}}"""), + ("human", """TEXT TO ANALYZE: +{text} + +Analyze for games: {types} +Return JSON:""") + ]) + + max_retries = 2 + for attempt in range(max_retries): + try: + resp = (prompt | self.llm).invoke({ + "context": context, + "text": text[:800], + "types": ", ".join(available) + }) + tracker.add(self._get_usage(resp)) + + if debug: + print(f"📝 Analyzer raw: {resp.content[:300]}") + + # Parse JSON với fallback + content = resp.content.strip() + if not content: + if debug: + print(f"⚠️ Empty response, retry {attempt + 1}") + continue + + data = self._parse_json(content) + scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score] + scores.sort(key=lambda x: x.get("score", 0), reverse=True) + + # Extract metadata from response + metadata = data.get("metadata", {}) + + if debug: + print(f"🔍 Scores: {scores}") + print(f"📋 Metadata: {metadata}") + + return [s["type"] for s in scores], scores, metadata, [] + + except Exception as e: + if debug: + print(f"⚠️ Analyze attempt {attempt + 1} failed: {e}") + if attempt == max_retries - 1: + # Final fallback: return all games với low score + return available, [], {}, [f"Analyze error: {e}"] + + return available, [], {}, ["Analyze failed after retries"] + + def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple: + """Generate nhiều games""" + if len(games) == 1: + result = self.generate(games[0], text, max_items, validate=False, debug=debug) + tracker.add(result.get("token_usage", {})) + # Fix: generate returns {data: {items: [...]}} not {items: [...]} + data = result.get("data") or {} + items = data.get("items", []) if isinstance(data, dict) else [] + return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", []) + + # Multi-game: Build schema info for each game + games_schema = [] + for gt in games: + game = get_game(gt) + if game: + games_schema.append(f"""### {gt.upper()} +{game.generated_system_prompt} + +REQUIRED OUTPUT FORMAT: +{game.format_instructions}""") + + prompt = ChatPromptTemplate.from_messages([ + ("system", """You are a multi-game content generator. +Generate items for EACH game type following their EXACT schema. +IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.) +RULES: Keep original language, use exact quotes from text."""), + ("human", """GAMES AND THEIR SCHEMAS: +{schemas} + +SOURCE TEXT: +{text} + +Generate items for: {types} +Return valid JSON: {{{format}}}""") + ]) + + fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]) + + try: + resp = (prompt | self.llm).invoke({ + "schemas": "\n\n".join(games_schema), + "text": text, + "types": ", ".join(games), + "format": fmt + }) + tracker.add(self._get_usage(resp)) + + data = self._parse_json(resp.content) + results = {} + errors = [] + for gt in games: + game_data = data.get(gt, {}) if isinstance(data.get(gt), dict) else {} + items = game_data.get("items", []) + items = self._post_process(items, gt) + # Thống nhất structure: {items: [...], metadata: {...}} + results[gt] = {"items": items, "metadata": game_data.get("metadata")} + if not items: + errors.append(f"No items for {gt}") + + return results, errors + except Exception as e: + return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"] + + def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]: + """Validate items trong results""" + validated = {} + for gt, data in results.items(): + items = data.get("items", []) if isinstance(data, dict) else [] + valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] + validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None} + return validated + + def _post_process(self, items: List, game_type: str) -> List[Dict]: + ms = int(time.time() * 1000) + result = [] + for i, item in enumerate(items): + d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {}) + d["id"] = f"{game_type[:2].upper()}-{ms}-{i}" + d["game_type"] = game_type + result.append(d) + return result + + def _parse_json(self, content: str) -> Dict: + if "```" in content: + content = content.split("```")[1].replace("json", "").strip() + return json.loads(content) + + def _get_usage(self, resp) -> Dict: + if hasattr(resp, 'response_metadata'): + meta = resp.response_metadata + return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {}))) + return getattr(resp, 'usage_metadata', {}) + + def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict: + return { + "success": success, + "games": games, + "game_scores": scores or [], + "metadata": metadata or {}, + "results": results, + "errors": errors, + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + } diff --git a/src/game_registry.py b/src/game_registry.py new file mode 100644 index 0000000..b175b7b --- /dev/null +++ b/src/game_registry.py @@ -0,0 +1,220 @@ +""" +game_registry.py - Tự động load games từ thư mục games/ + +Hệ thống sẽ: +1. Scan thư mục games/ +2. Load mọi file .py (trừ _template.py và __init__.py) +3. Chỉ load games có active: True +4. Đăng ký tự động vào registry + +THÊM GAME MỚI = TẠO FILE TRONG games/ +BẬT/TẮT GAME = SỬA active: True/False trong file game +""" +import importlib.util +from pathlib import Path +from typing import Dict, List, Any, Optional +from src.games.base import GameType, create_game_type + + +class GameRegistry: + """ + Registry tự động load games từ thư mục games/ + Chỉ load games có active: True + + Supports lookup by: + - game_type (string): "quiz", "sequence" + - type_id (int): 1, 2 + """ + _instance: Optional["GameRegistry"] = None + _all_games: Dict[str, GameType] = {} # Keyed by game_type + _id_map: Dict[int, str] = {} # type_id -> game_type + _loaded: bool = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._all_games = {} + cls._instance._id_map = {} + return cls._instance + + def __init__(self): + if not self._loaded: + self._load_all_games() + self._loaded = True + + def _load_all_games(self): + """Scan và load tất cả game definitions từ games/""" + games_dir = Path(__file__).parent / "games" + + if not games_dir.exists(): + print(f"⚠️ Games directory not found: {games_dir}") + return + + for file_path in games_dir.glob("*.py"): + # Skip __init__.py và _template.py và base.py + if file_path.name.startswith("_") or file_path.name == "base.py": + continue + + try: + game_def = self._load_game_from_file(file_path) + if game_def: + self._all_games[game_def.game_type] = game_def + if game_def.type_id > 0: + self._id_map[game_def.type_id] = game_def.game_type + status = "✅" if game_def.active else "⏸️" + print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})") + except Exception as e: + print(f"❌ Error loading {file_path.name}: {e}") + + def _load_game_from_file(self, file_path: Path) -> Optional[GameType]: + """Load 1 game definition từ file""" + module_name = f"games.{file_path.stem}" + + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None or spec.loader is None: + return None + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + config = getattr(module, "GAME_CONFIG", None) + examples = getattr(module, "EXAMPLES", []) + + if config is None: + return None + + # Inject examples if not in config + if examples and "examples" not in config: + config["examples"] = examples + + return create_game_type(config) + + def reload(self): + """Reload tất cả games""" + self._all_games.clear() + self._id_map.clear() + self._loaded = False + self._load_all_games() + self._loaded = True + + # ============== PUBLIC API ============== + + def get_game(self, game_type: str) -> Optional[GameType]: + """Lấy game by game_type (chỉ active)""" + game = self._all_games.get(game_type) + return game if game and game.active else None + + def get_game_by_id(self, type_id: int) -> Optional[GameType]: + """Lấy game by type_id (chỉ active)""" + game_type = self._id_map.get(type_id) + if game_type: + return self.get_game(game_type) + return None + + def get_game_type_by_id(self, type_id: int) -> Optional[str]: + """Convert type_id -> game_type""" + return self._id_map.get(type_id) + + def get_id_by_game_type(self, game_type: str) -> int: + """Convert game_type -> type_id""" + game = self._all_games.get(game_type) + return game.type_id if game else 0 + + def get_all_games(self) -> Dict[str, GameType]: + """Lấy tất cả games ACTIVE""" + return {k: v for k, v in self._all_games.items() if v.active} + + def get_all_games_including_inactive(self) -> Dict[str, GameType]: + """Lấy tất cả games (kể cả inactive)""" + return self._all_games.copy() + + def get_game_types(self) -> List[str]: + """Lấy danh sách game types ACTIVE""" + return [k for k, v in self._all_games.items() if v.active] + + def get_type_ids(self) -> List[int]: + """Lấy danh sách type_ids ACTIVE""" + return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0] + + def get_analyzer_context(self) -> str: + """Tạo context cho Analyzer (chỉ từ active games)""" + context_parts = [] + + for game_type, game in self._all_games.items(): + if not game.active: + continue + + hints = game.analyzer_rules # New field name + if hints: + hints_text = "\n - ".join(hints) + context_parts.append( + f"**{game.display_name}** (id={game.type_id}):\n" + f" Description: {game.description}\n" + f" Suitable when:\n - {hints_text}" + ) + + return "\n\n".join(context_parts) + + def is_active(self, game_type: str) -> bool: + """Kiểm tra game có active không""" + game = self._all_games.get(game_type) + return game.active if game else False + + +# ============== GLOBAL FUNCTIONS ============== +_registry: Optional[GameRegistry] = None + + +def get_registry() -> GameRegistry: + global _registry + if _registry is None: + _registry = GameRegistry() + return _registry + + +def reload_games(): + """Reload tất cả games (gọi sau khi thêm/sửa game)""" + get_registry().reload() + + +def get_game(game_type: str) -> Optional[GameType]: + return get_registry().get_game(game_type) + + +def get_active_game_types() -> List[str]: + return get_registry().get_game_types() + + +def get_analyzer_context() -> str: + return get_registry().get_analyzer_context() + + +def list_all_games() -> None: + """In danh sách tất cả games và trạng thái""" + registry = get_registry() + print("\n📋 DANH SÁCH GAMES:") + print("-" * 50) + for game_type, game in registry.get_all_games_including_inactive().items(): + status = "✅ ACTIVE" if game.active else "⏸️ INACTIVE" + print(f" [{game.type_id}] {game.display_name} ({game_type}): {status}") + print("-" * 50) + + +def get_game_by_id(type_id: int) -> Optional[GameType]: + """Lấy game by type_id""" + return get_registry().get_game_by_id(type_id) + + +def get_active_type_ids() -> List[int]: + """Lấy danh sách type_ids active""" + return get_registry().get_type_ids() + + +def id_to_type(type_id: int) -> Optional[str]: + """Convert type_id -> game_type""" + return get_registry().get_game_type_by_id(type_id) + + +def type_to_id(game_type: str) -> int: + """Convert game_type -> type_id""" + return get_registry().get_id_by_game_type(game_type) diff --git a/src/games/__init__.py b/src/games/__init__.py new file mode 100644 index 0000000..4366bd7 --- /dev/null +++ b/src/games/__init__.py @@ -0,0 +1,9 @@ +""" +games/ - Game type definitions + +Mỗi game là 1 file với GAME_CONFIG dict. +Thêm game mới = thêm file mới. +""" +from .base import GameType, create_game_type + +__all__ = ["GameType", "create_game_type"] diff --git a/src/games/__pycache__/__init__.cpython-310.pyc b/src/games/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..33ee7b4 Binary files /dev/null and b/src/games/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/games/__pycache__/base.cpython-310.pyc b/src/games/__pycache__/base.cpython-310.pyc new file mode 100644 index 0000000..aa06f86 Binary files /dev/null and b/src/games/__pycache__/base.cpython-310.pyc differ diff --git a/src/games/__pycache__/match.cpython-310.pyc b/src/games/__pycache__/match.cpython-310.pyc new file mode 100644 index 0000000..305c2de Binary files /dev/null and b/src/games/__pycache__/match.cpython-310.pyc differ diff --git a/src/games/__pycache__/memory_card.cpython-310.pyc b/src/games/__pycache__/memory_card.cpython-310.pyc new file mode 100644 index 0000000..3630a7e Binary files /dev/null and b/src/games/__pycache__/memory_card.cpython-310.pyc differ diff --git a/src/games/__pycache__/quiz.cpython-310.pyc b/src/games/__pycache__/quiz.cpython-310.pyc new file mode 100644 index 0000000..f504339 Binary files /dev/null and b/src/games/__pycache__/quiz.cpython-310.pyc differ diff --git a/src/games/__pycache__/sentence.cpython-310.pyc b/src/games/__pycache__/sentence.cpython-310.pyc new file mode 100644 index 0000000..63f8f8e Binary files /dev/null and b/src/games/__pycache__/sentence.cpython-310.pyc differ diff --git a/src/games/__pycache__/sequence.cpython-310.pyc b/src/games/__pycache__/sequence.cpython-310.pyc new file mode 100644 index 0000000..4383cbc Binary files /dev/null and b/src/games/__pycache__/sequence.cpython-310.pyc differ diff --git a/src/games/_template.py b/src/games/_template.py new file mode 100644 index 0000000..444d81e --- /dev/null +++ b/src/games/_template.py @@ -0,0 +1,91 @@ +""" +games/_template.py - TEMPLATE CHO GAME MỚI + +THÊM GAME MỚI CHỈ CẦN: +1. Copy file này +2. Rename thành .py (ví dụ: matching.py) +3. Sửa nội dung bên trong +4. DONE! Hệ thống tự động nhận diện. + +Không cần sửa bất kỳ file nào khác! +""" +from typing import List, Optional +from pydantic import BaseModel, Field + + +# ============== 1. SCHEMA ============== +# Định nghĩa structure của 1 item trong game +# BẮT BUỘC phải có: original_quote và explanation + +class YourGameItem(BaseModel): + """Schema cho 1 item của game""" + + # Các trường BẮT BUỘC (để chống hallucination) + original_quote: str = Field( + description="Trích dẫn NGUYÊN VĂN từ văn bản gốc" + ) + explanation: str = Field(description="Giải thích") + + # Thêm các trường riêng của game ở đây + # Ví dụ: + # question: str = Field(description="Câu hỏi") + # answer: str = Field(description="Đáp án") + + +# ============== 2. CONFIG ============== +# Cấu hình cho game + +GAME_CONFIG = { + # Key duy nhất cho game (dùng trong API) + "game_type": "your_game", + + # Tên hiển thị + "display_name": "Tên Game", + + # Mô tả ngắn + "description": "Mô tả game của bạn", + + # Số lượng items + "max_items": 5, + + # Trỏ đến schema class + "schema": YourGameItem, + + # Prompt cho LLM + "system_prompt": """Bạn là chuyên gia tạo [tên game]. + +NHIỆM VỤ: [Mô tả nhiệm vụ] + +QUY TẮC: +1. original_quote PHẢI là trích dẫn NGUYÊN VĂN +2. [Quy tắc khác] +3. [Quy tắc khác]""", +} + + +# ============== 3. EXAMPLES ============== +# Ví dụ input/output để: +# - Analyzer học khi nào nên suggest game này +# - Generator dùng làm few-shot + +EXAMPLES = [ + { + # Input text mẫu + "input": "Văn bản mẫu ở đây...", + + # Output mong đợi + "output": { + "items": [ + { + "original_quote": "Trích dẫn từ văn bản", + "explanation": "Giải thích", + # Các trường khác của schema... + } + ] + }, + + # Analyzer học từ trường này + "why_suitable": "Giải thích tại sao văn bản này phù hợp với game này" + }, + # Thêm 1-2 examples nữa... +] diff --git a/src/games/base.py b/src/games/base.py new file mode 100644 index 0000000..868aa3e --- /dev/null +++ b/src/games/base.py @@ -0,0 +1,85 @@ +""" +games/base.py - Base Game Type Definition +""" +from dataclasses import dataclass, field +from typing import List, Dict, Any, Optional, Type +from pydantic import BaseModel +from langchain_core.output_parsers import PydanticOutputParser + + +@dataclass +class GameType: + """ + Định nghĩa cấu trúc chuẩn cho một Game. + Mọi game phải tuân thủ cấu trúc này để Core có thể xử lý tự động. + """ + # --- REQUIRED FIELDS (No default value) --- + type_id: int + game_type: str # e.g. "quiz" + display_name: str # e.g. "Multiple Choice Quiz" + description: str # e.g. "Create questions from text" + + schema: Type[BaseModel] # Schema cho 1 item (e.g. QuizItem) + output_schema: Type[BaseModel] # Schema cho output (e.g. QuizOutput) + + generation_rules: List[str] # Rules để tạo nội dung + analyzer_rules: List[str] # Rules để analyzer nhận diện + + # --- OPTIONAL FIELDS (Has default value) --- + input_format_rules: List[str] = field(default_factory=list) # Rules validate input format (Direct Mode) + active: bool = True + max_items: int = 10 + examples: List[Dict[str, Any]] = field(default_factory=list) + output_parser: Optional[PydanticOutputParser] = None + + def __post_init__(self): + if self.output_parser is None and self.output_schema: + self.output_parser = PydanticOutputParser(pydantic_object=self.output_schema) + + @property + def format_instructions(self) -> str: + """Lấy hướng dẫn format JSON từ parser""" + if self.output_parser: + return self.output_parser.get_format_instructions() + return "" + + @property + def generated_system_prompt(self) -> str: + """Tự động tạo System Prompt từ rules và description""" + rules_txt = "\n".join([f"- {r}" for r in self.generation_rules]) + return f"""Game: {self.display_name} +Description: {self.description} + +GENERATION RULES: +{rules_txt} + +Always ensure output follows the JSON schema exactly.""" + + +def create_game_type(config: Dict[str, Any]) -> GameType: + """Factory method to create GameType from Config Dict""" + # Backward compatibility mapping + gen_rules = config.get("generation_rules", []) + if not gen_rules and "system_prompt" in config: + # Nếu chưa có rules tách biệt, dùng tạm system_prompt cũ làm 1 rule + gen_rules = [config["system_prompt"]] + + # Map analyzer_hints -> analyzer_rules + ana_rules = config.get("analyzer_rules", []) or config.get("analyzer_hints", []) + + return GameType( + type_id=config.get("type_id", 0), + game_type=config["game_type"], + display_name=config["display_name"], + description=config["description"], + input_format_rules=config.get("input_format_rules", []), + active=config.get("active", True), + max_items=config.get("max_items", 10), + schema=config["schema"], + output_schema=config["output_schema"], + generation_rules=gen_rules, + analyzer_rules=ana_rules, + examples=config.get("examples", []), + output_parser=config.get("output_parser") + ) + diff --git a/src/games/quiz.py b/src/games/quiz.py new file mode 100644 index 0000000..c4ed989 --- /dev/null +++ b/src/games/quiz.py @@ -0,0 +1,139 @@ +""" +games/quiz.py - Quiz Game - Multiple choice questions +""" +from typing import List, Literal +import re +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class QuizItem(BaseModel): + question: str = Field(description="The question based on source content") + answers: str = Field(description="The correct answer") + options: List[str] = Field(description="List of options including correct answer") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Visual description for the question") + image_keywords: List[str] = Field(default=[], description="Keywords for image search") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + + + +class QuizMetadata(BaseModel): + """Metadata đánh giá nội dung""" + title: str = Field( + description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." + ) + description: str = Field( + description="Short description summarizing the content/topic of the quiz." + ) + grade: int = Field( + description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge." + ) + type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')") + difficulty: int = Field( + description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts." + ) + + +class QuizOutput(BaseModel): + """Output wrapper for quiz items""" + items: List[QuizItem] = Field(description="List of quiz items generated from source text") + metadata: QuizMetadata = Field(description="Metadata about the quiz content") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=QuizOutput) + + +# ============== CONFIG ============== +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "quiz", + "display_name": "Quiz", + "description": "Multiple choice questions", + "type_id": 1, + + "active": True, + + "max_items": 10, + "schema": QuizItem, + "output_schema": QuizOutput, + "output_parser": output_parser, + + "input_format_rules": [ + "Text should contain facts or questions suitable for a quiz.", + "Prefer extracting existing multiple choice questions if available.", + "Text MUST contain questions with multiple choice options", + ], + + # 1. Recognition Rules (for Analyzer) + "analyzer_rules": [ + "Text MUST contain questions with multiple choice options", + "NOT suitable if text is just a list of words with no questions", + ], + + # 2. Rules tạo nội dung (cho Generator) + "generation_rules": [ + "KEEP ORIGINAL LANGUAGE - Do NOT translate", + "original_quote = EXACT quote from source text (full question block)", + "ALL content must come from source only - do NOT invent", + "REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers", + "STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.", + "FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.", + "IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.", + + # VISUAL FIELD COMPULSORY + "image_description: MUST be a visual description relevant to the question in ENGLISH.", + "image_keywords: MUST provide 2-3 English keywords for search.", + "image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes", + "NEVER leave image fields empty!", + ], + + "examples": EXAMPLES if 'EXAMPLES' in globals() else [] +} + + +def clean_prefix(text: str) -> str: + """Remove prefixes like (1), (A), 1., A. from text""" + if not text: return text + # Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces. + return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip() + + +def post_process_quiz(items: List[dict]) -> List[dict]: + """Clean up answers and options prefixes""" + for item in items: + # Clean answers + if item.get("answers"): + item["answers"] = clean_prefix(item["answers"]) + + # Clean options + if item.get("options") and isinstance(item["options"], list): + item["options"] = [clean_prefix(opt) for opt in item["options"]] + + return items + + +# Register handler +GAME_CONFIG["post_process_handler"] = post_process_quiz + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "The Sun is a star at the center of the Solar System.", + "output": { + "items": [{ + "question": "Where is the Sun located?", + "answers": "At the center of the Solar System", + "options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"], + "original_quote": "The Sun is a star at the center of the Solar System.", + "image_description": "The sun in the middle of planets", + "image_keywords": ["sun", "planets"], + "image_is_complex": False + }] + }, + "why_suitable": "Has clear facts" + } +] diff --git a/src/games/sequence.py b/src/games/sequence.py new file mode 100644 index 0000000..f93526a --- /dev/null +++ b/src/games/sequence.py @@ -0,0 +1,173 @@ +""" +games/sequence.py - Arrange Sequence Game (Sentences OR Words) +type_id = 2 +LLM tự quyết định dựa vào ngữ nghĩa: +- "good morning", "apple", "happy" → WORD +- "Hi, I'm Lisa", "The sun rises" → SENTENCE +Output trả về đúng trường: word hoặc sentence +""" +from typing import List, Literal, Optional +from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser + + +# ============== SCHEMA ============== +class SequenceItem(BaseModel): + """Item - LLM điền word HOẶC sentence, không điền cả 2""" + word: Optional[str] = Field(default=None, description="Fill this if item is a WORD/PHRASE (not complete sentence)") + sentence: Optional[str] = Field(default=None, description="Fill this if item is a COMPLETE SENTENCE") + original_quote: str = Field(description="EXACT quote from source text") + image_description: str = Field(default="", description="Visual description of the content") + image_keywords: List[str] = Field(default=[], description="Keywords for image search") + image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or complex details") + + +class SequenceMetadata(BaseModel): + """Metadata đánh giá nội dung""" + title: str = Field( + description="Title for this content. Prefer title from source document if available." + ) + description: str = Field( + description="Short description summarizing the content/topic." + ) + grade: int = Field( + description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older)." + ) + type: Literal["sequence"] = Field(default="sequence", description="Game type") + sub_type: Literal["sentence", "word"] = Field( + description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences" + ) + difficulty: int = Field( + description="Difficulty 1-5 for that grade." + ) + + +class SequenceOutput(BaseModel): + """Output wrapper for sequence items""" + items: List[SequenceItem] = Field(description="List of sequence items") + metadata: SequenceMetadata = Field(description="Metadata about the content") + + +# Output parser +output_parser = PydanticOutputParser(pydantic_object=SequenceOutput) + + +# ============== CONFIG ============== +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "sequence", + "display_name": "Arrange Sequence", + "description": "Arrange sentences or words in order", + "type_id": 2, + + "active": True, + + "max_items": 10, + "schema": SequenceItem, + "output_schema": SequenceOutput, + "output_parser": output_parser, + + "input_format_rules": [ + "Text MUST be a list of items (words, phrases, sentences) to be ordered.", + "Do NOT generate sequence from multiple choice questions (A/B/C/D).", + "Do NOT generate sequence if the text is a quiz or test format.", + ], + + # 1. Recognition Rules (for Analyzer) + "analyzer_rules": [ + "Text is a list of words, phrases, or sentences suitable for ordering", + "Items are separated by commas, semicolons, or newlines", + "Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'", + "NO questions required - just a list of items", + "Text is NOT a long essay or complex dialogue", + ], + + # 2. Rules tạo nội dung (cho Generator) + "generation_rules": [ + "KEEP ORIGINAL LANGUAGE - Do NOT translate", + "Analyze text semantically to extract meaningful items", + "For each item, decide type: WORD/PHRASE or SENTENCE", + "- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field", + "- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field", + "NEVER fill both fields for the same item", + "Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)", + "Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')", + + # CONSISTENCY RULES + "CRITICAL: All extracted items MUST be of the SAME type.", + "Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.", + "If input has mixed types, pick the MAJORITY type and ignore the others.", + + # VISUAL FIELD COMPULSORY + "image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'", + "image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']", + ], + + "examples": EXAMPLES if 'EXAMPLES' in globals() else [] +} + + +# ============== EXAMPLES ============== +EXAMPLES = [ + { + "input": "apple; banana;\norange; grape;\ncat; dog;", + "output": { + "items": [ + {"word": "apple", "sentence": None, "original_quote": "apple", "image_description": "A red apple", "image_keywords": ["apple"], "image_is_complex": False}, + {"word": "banana", "sentence": None, "original_quote": "banana", "image_description": "A yellow banana", "image_keywords": ["banana"], "image_is_complex": False}, + {"word": "orange", "sentence": None, "original_quote": "orange", "image_description": "An orange fruit", "image_keywords": ["orange"], "image_is_complex": False}, + {"word": "grape", "sentence": None, "original_quote": "grape", "image_description": "Purple grapes", "image_keywords": ["grape"], "image_is_complex": False}, + {"word": "cat", "sentence": None, "original_quote": "cat", "image_description": "A cat", "image_keywords": ["cat"], "image_is_complex": False}, + {"word": "dog", "sentence": None, "original_quote": "dog", "image_description": "A dog", "image_keywords": ["dog"], "image_is_complex": False} + ], + "metadata": { + "title": "Animals and Fruits", + "description": "Common animals and fruits", + "grade": 1, + "type": "sequence", + "sub_type": "word", + "difficulty": 1 + } + }, + "why": "Items are single words → use 'word' field" + }, + { + "input": "Hi, I'm Lisa; Nice to meet you; How are you?", + "output": { + "items": [ + {"word": None, "sentence": "Hi, I'm Lisa", "original_quote": "Hi, I'm Lisa", "image_description": "A girl introducing herself", "image_keywords": ["girl", "greeting"], "image_is_complex": True}, + {"word": None, "sentence": "Nice to meet you", "original_quote": "Nice to meet you", "image_description": "Two people shaking hands", "image_keywords": ["handshake", "greeting"], "image_is_complex": True}, + {"word": None, "sentence": "How are you?", "original_quote": "How are you?", "image_description": "Person asking a question", "image_keywords": ["question", "greeting"], "image_is_complex": True} + ], + "metadata": { + "title": "English Greetings", + "description": "Common greeting sentences", + "grade": 2, + "type": "sequence", + "sub_type": "sentence", + "difficulty": 2 + } + }, + "why": "Items are complete sentences → use 'sentence' field" + }, + { + "input": "good morning; good afternoon; good evening; good night", + "output": { + "items": [ + {"word": "good morning", "sentence": None, "original_quote": "good morning", "image_description": "Morning sunrise", "image_keywords": ["morning", "sun"], "image_is_complex": False}, + {"word": "good afternoon", "sentence": None, "original_quote": "good afternoon", "image_description": "Afternoon sun", "image_keywords": ["afternoon"], "image_is_complex": False}, + {"word": "good evening", "sentence": None, "original_quote": "good evening", "image_description": "Evening sunset", "image_keywords": ["evening", "sunset"], "image_is_complex": False}, + {"word": "good night", "sentence": None, "original_quote": "good night", "image_description": "Night sky with moon", "image_keywords": ["night", "moon"], "image_is_complex": False} + ], + "metadata": { + "title": "Time Greetings", + "description": "Greetings for different times of day", + "grade": 1, + "type": "sequence", + "sub_type": "word", + "difficulty": 1 + } + }, + "why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field" + } +] diff --git a/src/llm_config.py b/src/llm_config.py new file mode 100644 index 0000000..c45b313 --- /dev/null +++ b/src/llm_config.py @@ -0,0 +1,191 @@ +""" +llm_config.py - Cấu hình LLM linh hoạt + +Hỗ trợ: +- Ollama (local) +- Google Gemini +- OpenAI + +Sử dụng: + from llm_config import ModelConfig, get_llm + + config = ModelConfig(provider="ollama", model_name="qwen2.5:14b") + llm = get_llm(config) +""" +import os +from typing import Optional +from pydantic import BaseModel, Field +from langchain_core.language_models.chat_models import BaseChatModel + + +class ModelConfig(BaseModel): + """Cấu hình cho LLM""" + provider: str = Field( + default="gemini", + description="Provider: ollama, gemini, openai" + ) + model_name: str = Field( + default="gemini-2.0-flash-lite", + description="Tên model" + ) + api_key: Optional[str] = Field( + default=None, + description="API key (nếu None, lấy từ env)" + ) + temperature: float = Field( + default=0.1, + description="Độ sáng tạo (0.0 - 1.0)" + ) + base_url: Optional[str] = Field( + default=None, + description="Base URL cho Ollama" + ) + + class Config: + # Cho phép tạo từ dict + extra = "allow" + + +# ============== DEFAULT CONFIGS ============== + +DEFAULT_CONFIGS = { + "ollama": ModelConfig( + provider="ollama", + model_name="qwen2.5:14b", + temperature=0.1, + base_url=None # Sẽ lấy từ OLLAMA_BASE_URL env + ), + "ollama_light": ModelConfig( + provider="ollama", + model_name="qwen2.5:7b", + temperature=0.0, + base_url=None # Sẽ lấy từ OLLAMA_BASE_URL env + ), + "gemini": ModelConfig( + provider="gemini", + model_name="gemini-2.0-flash-lite", + temperature=0.1 + ), + "gemini_light": ModelConfig( + provider="gemini", + model_name="gemini-2.0-flash-lite", + temperature=0.0 + ), + "openai": ModelConfig( + provider="openai", + model_name="gpt-4o-mini", + temperature=0.1 + ), + "openai_light": ModelConfig( + provider="openai", + model_name="gpt-4o-mini", + temperature=0.0 + ), +} + + +def get_default_config(name: str = "gemini") -> ModelConfig: + """Lấy config mặc định theo tên""" + return DEFAULT_CONFIGS.get(name, DEFAULT_CONFIGS["gemini"]) + + +# ============== LLM FACTORY ============== + +def get_llm(config: ModelConfig) -> BaseChatModel: + """ + Factory function tạo LLM instance + + Args: + config: ModelConfig object + + Returns: + BaseChatModel instance + """ + provider = config.provider.lower() + + if provider == "ollama": + from langchain_ollama import ChatOllama + + base_url = config.base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + return ChatOllama( + model=config.model_name, + temperature=config.temperature, + base_url=base_url + ) + + elif provider == "gemini": + from langchain_google_genai import ChatGoogleGenerativeAI + + api_key = config.api_key or os.getenv("GOOGLE_API_KEY") + if not api_key: + raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key") + + return ChatGoogleGenerativeAI( + model=config.model_name, + temperature=config.temperature, + google_api_key=api_key + ) + + elif provider == "openai": + from langchain_openai import ChatOpenAI + + api_key = config.api_key or os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OPENAI_API_KEY required for OpenAI. Set via env or config.api_key") + + return ChatOpenAI( + model=config.model_name, + temperature=config.temperature, + api_key=api_key + ) + + else: + raise ValueError(f"Provider '{provider}' không được hỗ trợ. Chọn: ollama, gemini, openai") + + +def get_completion_model(config: ModelConfig): + """ + Tạo completion model (non-chat) nếu cần + Hiện tại chỉ Ollama có completion model riêng + """ + if config.provider.lower() == "ollama": + from langchain_ollama.llms import OllamaLLM + + base_url = config.base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434") + return OllamaLLM( + model=config.model_name, + temperature=config.temperature, + base_url=base_url + ) + + # Các provider khác dùng Chat interface + return get_llm(config) + + +# ============== HELPER ============== + +def create_config( + provider: str = "gemini", + model_name: Optional[str] = None, + api_key: Optional[str] = None, + temperature: float = 0.1, + base_url: Optional[str] = None +) -> ModelConfig: + """ + Helper function tạo ModelConfig + + Nếu không chỉ định model_name, sẽ dùng default cho provider đó + """ + default_models = { + "ollama": "qwen2.5:14b", + "gemini": "gemini-2.0-flash-lite", + "openai": "gpt-4o-mini" + } + + return ModelConfig( + provider=provider, + model_name=model_name or default_models.get(provider, "gemini-2.0-flash-lite"), + api_key=api_key, + temperature=temperature, + base_url=base_url + ) diff --git a/src/logger.py b/src/logger.py new file mode 100644 index 0000000..7510ef6 --- /dev/null +++ b/src/logger.py @@ -0,0 +1,37 @@ +import logging +import sys +import os +from logging.handlers import RotatingFileHandler + +def setup_logger(name: str = "sena_gen"): + logger = logging.getLogger(name) + + if logger.handlers: + return logger + + logger.setLevel(logging.INFO) + + # Format + formatter = logging.Formatter( + '[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] - %(message)s' + ) + + # Console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # File handler (Optional - based on env) + log_file = os.getenv("LOG_FILE", "logs/gen_game.log") + if log_file: + os.makedirs(os.path.dirname(log_file), exist_ok=True) + file_handler = RotatingFileHandler( + log_file, maxBytes=10*1024*1024, backupCount=5, encoding='utf-8' + ) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + +# Singleton logger +logger = setup_logger() diff --git a/src/validator.py b/src/validator.py new file mode 100644 index 0000000..05ec56b --- /dev/null +++ b/src/validator.py @@ -0,0 +1,204 @@ +""" +validator.py - Hallucination Guardrail +Kiểm tra original_quote có thực sự nằm trong văn bản gốc không (Python-based, 0 API calls) +""" +import re +from typing import List, Dict, Any, Tuple, Optional +from dataclasses import dataclass +from difflib import SequenceMatcher +import unicodedata + + +@dataclass +class ValidationResult: + """Kết quả validate một item""" + item_index: int + is_valid: bool + original_quote: str + match_found: bool + match_score: float + error_message: Optional[str] = None + + +@dataclass +class ValidatedGameOutput: + """Kết quả sau khi validate một game""" + game_type: str + valid_items: List[Dict[str, Any]] + invalid_items: List[Dict[str, Any]] + validation_results: List[ValidationResult] + + @property + def all_valid(self) -> bool: + return len(self.invalid_items) == 0 + + @property + def validity_rate(self) -> float: + total = len(self.valid_items) + len(self.invalid_items) + return len(self.valid_items) / total if total > 0 else 0.0 + + +class QuoteValidator: + """ + Validator kiểm tra original_quote có thực sự nằm trong văn bản gốc + Sử dụng nhiều chiến lược matching: exact, fuzzy, substring + + KHÔNG GỌI API - hoàn toàn Python-based + """ + + def __init__( + self, + fuzzy_threshold: float = 0.85, + min_quote_length: int = 10, + normalize_whitespace: bool = True + ): + self.fuzzy_threshold = fuzzy_threshold + self.min_quote_length = min_quote_length + self.normalize_whitespace = normalize_whitespace + + def _normalize_text(self, text: str) -> str: + """Chuẩn hóa text để so sánh""" + if not text: + return "" + + text = unicodedata.normalize('NFC', text) + text = text.lower() + + if self.normalize_whitespace: + text = re.sub(r'\s+', ' ', text).strip() + + return text + + def _exact_match(self, quote: str, source: str) -> bool: + """Kiểm tra quote có nằm chính xác trong source không""" + return self._normalize_text(quote) in self._normalize_text(source) + + def _fuzzy_match(self, quote: str, source: str) -> float: + """Tìm đoạn giống nhất trong source và trả về similarity score""" + norm_quote = self._normalize_text(quote) + norm_source = self._normalize_text(source) + + if not norm_quote or not norm_source: + return 0.0 + + if len(norm_quote) > len(norm_source): + return 0.0 + + best_score = 0.0 + quote_len = len(norm_quote) + + window_sizes = [ + quote_len, + int(quote_len * 0.9), + int(quote_len * 1.1), + ] + + for window_size in window_sizes: + if window_size <= 0 or window_size > len(norm_source): + continue + + for i in range(len(norm_source) - window_size + 1): + window = norm_source[i:i + window_size] + score = SequenceMatcher(None, norm_quote, window).ratio() + best_score = max(best_score, score) + + if best_score >= self.fuzzy_threshold: + return best_score + + return best_score + + def validate_quote( + self, + original_quote: str, + source_text: str, + item_index: int = 0 + ) -> ValidationResult: + """Validate một original_quote against source_text""" + + if not original_quote: + return ValidationResult( + item_index=item_index, + is_valid=False, + original_quote=original_quote or "", + match_found=False, + match_score=0.0, + error_message="original_quote is empty" + ) + + if len(original_quote) < self.min_quote_length: + return ValidationResult( + item_index=item_index, + is_valid=False, + original_quote=original_quote, + match_found=False, + match_score=0.0, + error_message=f"quote too short (min: {self.min_quote_length})" + ) + + # Strategy 1: Exact match + if self._exact_match(original_quote, source_text): + return ValidationResult( + item_index=item_index, + is_valid=True, + original_quote=original_quote, + match_found=True, + match_score=1.0, + error_message=None + ) + + # Strategy 2: Fuzzy match + fuzzy_score = self._fuzzy_match(original_quote, source_text) + if fuzzy_score >= self.fuzzy_threshold: + return ValidationResult( + item_index=item_index, + is_valid=True, + original_quote=original_quote, + match_found=True, + match_score=fuzzy_score, + error_message=None + ) + + return ValidationResult( + item_index=item_index, + is_valid=False, + original_quote=original_quote, + match_found=False, + match_score=fuzzy_score, + error_message=f"Quote not found. Score: {fuzzy_score:.2f}" + ) + + def validate_game_output( + self, + game_type: str, + items: List[Dict[str, Any]], + source_text: str + ) -> ValidatedGameOutput: + """Validate tất cả items trong một game output""" + valid_items = [] + invalid_items = [] + validation_results = [] + + for i, item in enumerate(items): + original_quote = item.get("original_quote", "") + result = self.validate_quote(original_quote, source_text, i) + validation_results.append(result) + + if result.is_valid: + valid_items.append(item) + else: + item["_validation_error"] = result.error_message + invalid_items.append(item) + + return ValidatedGameOutput( + game_type=game_type, + valid_items=valid_items, + invalid_items=invalid_items, + validation_results=validation_results + ) + + +def quick_validate(original_quote: str, source_text: str, threshold: float = 0.85) -> bool: + """Hàm tiện ích validate nhanh""" + validator = QuoteValidator(fuzzy_threshold=threshold) + result = validator.validate_quote(original_quote, source_text) + return result.is_valid