diff --git a/.env b/.env index e69de29..df30497 100644 --- a/.env +++ b/.env @@ -0,0 +1 @@ +PORT=2088 \ No newline at end of file diff --git a/api.py b/api.py index e4fddca..23dd5a1 100644 --- a/api.py +++ b/api.py @@ -5,12 +5,19 @@ from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel, Field from pathlib import Path import re +from dotenv import load_dotenv +load_dotenv() from src import ( - GameCore, get_registry, reload_games, - get_active_game_types, get_active_type_ids, - get_game_by_id, id_to_type, type_to_id, - ModelConfig + GameCore, + get_registry, + reload_games, + get_active_game_types, + get_active_type_ids, + get_game_by_id, + id_to_type, + type_to_id, + ModelConfig, ) @@ -18,7 +25,7 @@ from src import ( app = FastAPI( title="Game Generator API", description="API tạo game giáo dục từ văn bản", - version="2.0.0" + version="2.0.0", ) app.add_middleware( @@ -31,31 +38,43 @@ app.add_middleware( # ============== REQUEST/RESPONSE MODELS ============== + class LLMConfigRequest(BaseModel): provider: str = Field(default="gemini", description="ollama, gemini, openai") model_name: str = Field(default="gemini-2.0-flash-lite") - api_key: Optional[str] = Field(default=None, description="API key (None = lấy từ env)") + api_key: Optional[str] = Field( + default=None, description="API key (None = lấy từ env)" + ) temperature: float = Field(default=0.1) base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama") class GenerateRequest(BaseModel): text: str = Field(description="Input text", min_length=10) - enabled_game_ids: Optional[List[int]] = Field(default=None, description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)") + enabled_game_ids: Optional[List[int]] = Field( + default=None, + description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)", + ) run_analyzer: bool = Field(default=True) run_validator: bool = Field(default=True) - max_items: Optional[int] = Field(default=3) - min_score: int = Field(default=50, description="Minimum score (0-100) for analyzer to include a game") + max_items: Optional[int] = Field(default=100) + min_score: int = Field( + default=50, description="Minimum score (0-100) for analyzer to include a game" + ) debug: bool = Field(default=False, description="Print prompts to server log") - + # LLM config (optional - override global) - llm_config: Optional[LLMConfigRequest] = Field(default=None, description="Override LLM config") + llm_config: Optional[LLMConfigRequest] = Field( + default=None, description="Override LLM config" + ) class TokenUsageResponse(BaseModel): prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 + input_chars: int = 0 # Character count sent to LLM + output_chars: int = 0 # Character count received from LLM class GameScoreInfo(BaseModel): @@ -66,12 +85,14 @@ class GameScoreInfo(BaseModel): class GameResultData(BaseModel): """Structure thống nhất cho mỗi game result""" + items: List[Dict[str, Any]] = [] metadata: Optional[Dict[str, Any]] = None class CommonMetadataResponse(BaseModel): """Metadata chung cho toàn bộ kết quả generate""" + title: str = "" description: str = "" grade: int = 0 @@ -92,7 +113,7 @@ class GenerateResponse(BaseModel): class GameInfo(BaseModel): type_id: int - game_type: str # Keep for reference + game_type: str display_name: str description: str active: bool @@ -127,7 +148,7 @@ _current_config: Optional[ModelConfig] = None def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore: """Get or create GameCore with optional config override""" global _core, _current_config - + if config_override: # Create new core with override config config = ModelConfig( @@ -135,69 +156,77 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore: model_name=config_override.model_name, api_key=config_override.api_key, temperature=config_override.temperature, - base_url=config_override.base_url + base_url=config_override.base_url, ) return GameCore(llm_config=config) - + if _core is None: # Default: tự detect từ env _core = GameCore() _current_config = _core.llm_config - + return _core # ============== ENDPOINTS ============== + @app.post("/generate", response_model=GenerateResponse) async def generate_games(request: GenerateRequest): """Generate games from text with scoring""" try: core = get_core(request.llm_config) - + # Convert type_ids to game_types if request.enabled_game_ids: - games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] + games = [ + id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid) + ] else: games = get_active_game_types() - - result = core.run_multi( + + result = await core.run_multi_async( text=request.text, enabled_games=games, - max_items=request.max_items or 3, - min_score=request.min_score, + max_items=request.max_items or 100, validate=request.run_validator, - debug=request.debug + debug=request.debug, ) - + # Convert game_types to type_ids in response game_ids = [type_to_id(g) for g in result.get("games", [])] - + # Convert game_scores game_scores = [] for s in result.get("game_scores", []): - game_scores.append(GameScoreInfo( - type_id=type_to_id(s.get("type", "")), - score=s.get("score", 0), - reason=s.get("reason", "") - )) - + game_scores.append( + GameScoreInfo( + type_id=type_to_id(s.get("type", "")), + score=s.get("score", 0), + reason=s.get("reason", ""), + ) + ) + # Convert results keys to type_ids results_by_id = {} for game_type, items in result.get("results", {}).items(): tid = type_to_id(game_type) - if tid > 0: + if tid >= 0: # 0=quiz, 1=sequence are valid results_by_id[tid] = items - + # Get common metadata from analyzer core_meta = result.get("metadata", {}) - common_metadata = CommonMetadataResponse( - title=core_meta.get("title", ""), - description=core_meta.get("description", ""), - grade=core_meta.get("grade", 0), - difficulty=core_meta.get("difficulty", 0) - ) if core_meta else None - + common_metadata = ( + CommonMetadataResponse( + title=core_meta.get("title", ""), + description=core_meta.get("description", ""), + grade=core_meta.get("grade", 0), + difficulty=core_meta.get("difficulty", 0), + ) + if core_meta + else None + ) + return GenerateResponse( success=result.get("success", False), games=game_ids, @@ -206,25 +235,120 @@ async def generate_games(request: GenerateRequest): results=results_by_id, llm=result.get("llm"), token_usage=result.get("token_usage"), - errors=result.get("errors", []) + errors=result.get("errors", []), ) - + except Exception as e: return GenerateResponse( - success=False, - games=[], - game_scores=[], - results={}, - errors=[str(e)] + success=False, games=[], game_scores=[], results={}, errors=[str(e)] + ) + + +# ============== FAST GENERATE (1 API call - OPTIMIZED) ============== + + +class FastGenerateRequest(BaseModel): + text: str = Field(description="Input text", min_length=10) + enabled_game_ids: Optional[List[int]] = Field( + default=None, description="Limit type_ids" + ) + max_items: int = Field(default=100, description="Max items per game") + min_score: int = Field(default=50, description="Min score 0-100 to include game") + run_validator: bool = Field(default=True) + debug: bool = Field(default=False) + llm_config: Optional[LLMConfigRequest] = Field(default=None) + + +@app.post("/generate/fast", response_model=GenerateResponse) +async def generate_fast(request: FastGenerateRequest): + """ + 🚀 OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp. + + So với /generate (2+ calls): + - Chỉ 1 API call + - Tiết kiệm quota/tokens + - Nhanh hơn + + So với /generate/single: + - Trả về NHIỀU games (không chỉ 1) + """ + try: + core = get_core(request.llm_config) + + # Convert type_ids to game_types + if request.enabled_game_ids: + games = [ + id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid) + ] + else: + games = get_active_game_types() + + result = await core.run_fast_async( + text=request.text, + enabled_games=games, + max_items=request.max_items, + min_score=request.min_score, + validate=request.run_validator, + debug=request.debug, + ) + + # Convert to response format (same as /generate) + game_ids = [type_to_id(g) for g in result.get("games", [])] + + game_scores = [ + GameScoreInfo( + type_id=type_to_id(s.get("type", "")), + score=s.get("score", 0), + reason=s.get("reason", ""), + ) + for s in result.get("game_scores", []) + ] + + results_by_id = {} + for game_type, data in result.get("results", {}).items(): + tid = type_to_id(game_type) + if tid >= 0: # 0=quiz, 1=sequence are valid + results_by_id[tid] = data + + core_meta = result.get("metadata", {}) + common_metadata = ( + CommonMetadataResponse( + title=core_meta.get("title", ""), + description=core_meta.get("description", ""), + grade=core_meta.get("grade", 0), + difficulty=core_meta.get("difficulty", 0), + ) + if core_meta + else None + ) + + return GenerateResponse( + success=result.get("success", False), + games=game_ids, + game_scores=game_scores, + metadata=common_metadata, + results=results_by_id, + api_calls=1, # Always 1 for fast + llm=result.get("llm"), + token_usage=result.get("token_usage"), + errors=result.get("errors", []), + ) + + except Exception as e: + return GenerateResponse( + success=False, games=[], game_scores=[], results={}, errors=[str(e)] ) # ============== SINGLE BEST (1 PROMPT) ============== + class SingleGenerateRequest(BaseModel): text: str = Field(description="Input text", min_length=10) - enabled_game_ids: Optional[List[int]] = Field(default=None, description="Limit type_ids to choose from") - max_items: int = Field(default=3, description="Max items to generate") + enabled_game_ids: Optional[List[int]] = Field( + default=None, description="Limit type_ids to choose from" + ) + max_items: int = Field(default=100, description="Max items to generate") run_validator: bool = Field(default=True) debug: bool = Field(default=False) llm_config: Optional[LLMConfigRequest] = Field(default=None) @@ -244,32 +368,34 @@ class SingleGenerateResponse(BaseModel): async def generate_single_game(request: SingleGenerateRequest): """ Generate 1 game phù hợp nhất trong 1 prompt duy nhất. - + - Analyze text để chọn game type tốt nhất - Generate items cho game đó - Tất cả trong 1 API call """ try: core = get_core(request.llm_config) - + # Convert type_ids to game_types if request.enabled_game_ids: - games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] + games = [ + id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid) + ] else: games = None - + result = core.run_single( text=request.text, enabled_games=games, max_items=request.max_items, debug=request.debug, - validate=request.run_validator + validate=request.run_validator, ) - + # Convert game_type to type_id game_type = result.get("game_type") tid = type_to_id(game_type) if game_type else None - + return SingleGenerateResponse( success=result.get("success", False), type_id=tid, @@ -277,21 +403,19 @@ async def generate_single_game(request: SingleGenerateRequest): items=result.get("items", []), token_usage=result.get("token_usage"), llm=result.get("llm"), - errors=result.get("errors", []) + errors=result.get("errors", []), ) - + except Exception as e: - return SingleGenerateResponse( - success=False, - errors=[str(e)] - ) + return SingleGenerateResponse(success=False, errors=[str(e)]) # ============== DIRECT GENERATE (1 game cụ thể, không analyze) ============== + class DirectGenerateRequest(BaseModel): text: str = Field(description="Input text", min_length=10) - max_items: int = Field(default=3, description="Max items to generate") + max_items: int = Field(default=100, description="Max items to generate") run_validator: bool = Field(default=True) debug: bool = Field(default=False) llm_config: Optional[LLMConfigRequest] = Field(default=None) @@ -299,6 +423,7 @@ class DirectGenerateRequest(BaseModel): class DirectGenerateResponse(BaseModel): """Response thống nhất, giống GenerateResponse nhưng cho 1 game""" + success: bool games: List[int] = [] # Single type_id in list results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse @@ -322,28 +447,28 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest): return DirectGenerateResponse( success=False, games=[type_id], - errors=[f"Game with type_id={type_id} not found"] + errors=[f"Game with type_id={type_id} not found"], ) - + core = get_core(request.llm_config) - + result = core.generate( game_type=game_type, text=request.text, max_items=request.max_items, validate=request.run_validator, - debug=request.debug + debug=request.debug, ) - + format_error = result.get("format_error") data = result.get("data") or {} - + # Build results với structure thống nhất game_result = GameResultData( items=data.get("items", []) if isinstance(data, dict) else [], - metadata=data.get("metadata") if isinstance(data, dict) else None + metadata=data.get("metadata") if isinstance(data, dict) else None, ) - + return DirectGenerateResponse( success=result.get("success", False), games=[type_id], @@ -352,15 +477,11 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest): format_error=format_error, token_usage=result.get("token_usage"), llm=result.get("llm"), - errors=result.get("errors", []) + errors=result.get("errors", []), ) - + except Exception as e: - return DirectGenerateResponse( - success=False, - games=[type_id], - errors=[str(e)] - ) + return DirectGenerateResponse(success=False, games=[type_id], errors=[str(e)]) @app.get("/games", response_model=GamesListResponse) @@ -368,29 +489,29 @@ async def list_games(): """Lấy danh sách games""" registry = get_registry() all_games = registry.get_all_games_including_inactive() - + games_list = [] active_count = 0 - + for game_type, game in all_games.items(): - games_list.append(GameInfo( - type_id=game.type_id, - game_type=game.game_type, - display_name=game.display_name, - description=game.description, - active=game.active, - max_items=game.max_items, - )) + games_list.append( + GameInfo( + type_id=game.type_id, + game_type=game.game_type, + display_name=game.display_name, + description=game.description, + active=game.active, + max_items=game.max_items, + ) + ) if game.active: active_count += 1 - + # Sort by type_id games_list.sort(key=lambda g: g.type_id) - + return GamesListResponse( - total=len(games_list), - active_count=active_count, - games=games_list + total=len(games_list), active_count=active_count, games=games_list ) @@ -409,28 +530,28 @@ async def deactivate_game(game_type: str): def _set_game_active(game_type: str, active: bool) -> ActionResponse: games_dir = Path(__file__).parent / "src" / "games" game_file = games_dir / f"{game_type}.py" - + if not game_file.exists(): raise HTTPException(404, f"Game '{game_type}' not found") - + content = game_file.read_text(encoding="utf-8") pattern = r'("active"\s*:\s*)(True|False)' new_value = "True" if active else "False" - + if not re.search(pattern, content): raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py") - - new_content = re.sub(pattern, f'\\1{new_value}', content) + + new_content = re.sub(pattern, f"\\1{new_value}", content) game_file.write_text(new_content, encoding="utf-8") - + reload_games() - + action = "activated" if active else "deactivated" return ActionResponse( success=True, message=f"Game '{game_type}' has been {action}", game_type=game_type, - active=active + active=active, ) @@ -438,16 +559,16 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse: async def get_llm_config(): """Xem LLM config hiện tại""" global _current_config - + if _current_config is None: core = get_core() _current_config = core.llm_config - + return LLMConfigResponse( provider=_current_config.provider, model_name=_current_config.model_name, temperature=_current_config.temperature, - base_url=_current_config.base_url + base_url=_current_config.base_url, ) @@ -455,50 +576,43 @@ async def get_llm_config(): async def set_llm_config(config: LLMConfigRequest): """Đổi LLM config global""" global _core, _current_config - + new_config = ModelConfig( provider=config.provider, model_name=config.model_name, api_key=config.api_key, temperature=config.temperature, - base_url=config.base_url + base_url=config.base_url, ) - + try: _core = GameCore(llm_config=new_config) _current_config = new_config - + return ActionResponse( success=True, - message=f"LLM changed to {config.provider}/{config.model_name}" + message=f"LLM changed to {config.provider}/{config.model_name}", ) except Exception as e: - return ActionResponse( - success=False, - message=f"Failed to change LLM: {str(e)}" - ) + return ActionResponse(success=False, message=f"Failed to change LLM: {str(e)}") @app.post("/reload", response_model=ActionResponse) async def reload_all_games(): """Reload games""" global _core - + reload_games() _core = None - + return ActionResponse( - success=True, - message=f"Reloaded. Active games: {get_active_game_types()}" + success=True, message=f"Reloaded. Active games: {get_active_game_types()}" ) @app.get("/health") async def health_check(): - return { - "status": "healthy", - "active_games": get_active_game_types() - } + return {"status": "healthy", "active_games": get_active_game_types()} # ============== STARTUP ============== @@ -510,4 +624,8 @@ async def startup(): if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=2088) + + port = os.getenv("PORT") + if not port: + raise ValueError("Missing required environment variable: PORT") + uvicorn.run(app, host="0.0.0.0", port=int(port)) diff --git a/backup_source/match.py b/backup_source/match.py index 54241ea..7fc9d52 100644 --- a/backup_source/match.py +++ b/backup_source/match.py @@ -1,23 +1,57 @@ """ -games/match.py - Match Game - Match sentences with images +games/match.py - Match Game - Match words/phrases with images +type_id = 3 + +Input: Danh sách từ hoặc cụm từ +Output: Mỗi item gồm từ/cụm từ và mô tả hình ảnh tương ứng """ -from typing import List + +from typing import List, Literal from pydantic import BaseModel, Field from langchain_core.output_parsers import PydanticOutputParser # ============== SCHEMA ============== class MatchItem(BaseModel): - word: str = Field(description="The sentence to be matched (EXACT copy from source)") - match_with: str = Field(description="Short keyword for reference") - original_quote: str = Field(description="EXACT quote from source text") - image_description: str = Field(default="", description="Detailed visual description for image generation/search") - image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + """Schema cho 1 item của Match game""" + + word: str = Field( + description="The word or phrase to be matched (EXACT copy from source, cleaned of numbering)" + ) + original_quote: str = Field( + description="EXACT quote from source text before any cleaning" + ) + image_description: str = Field( + description="Detailed visual description for image generation in ENGLISH. Must be specific and visual." + ) + image_keywords: List[str] = Field( + default=[], description="2-3 English keywords for image search" + ) + image_is_complex: bool = Field( + default=False, + description="True if image needs precise quantities, humans, or multiple detailed objects", + ) + + +class MatchMetadata(BaseModel): + """Metadata đánh giá nội dung""" + + title: str = Field(description="Title from source or short descriptive title") + description: str = Field(description="One sentence summary of the content") + grade: int = Field( + description="Estimated grade level 1-5 (1=easy/young, 5=advanced)" + ) + type: Literal["match"] = Field(default="match", description="Game type") + difficulty: int = Field(description="Difficulty 1-5 for that grade") class MatchOutput(BaseModel): """Output wrapper for match items""" - items: List[MatchItem] = Field(description="List of match items generated from source text") + + items: List[MatchItem] = Field( + description="List of match items generated from source text" + ) + metadata: MatchMetadata = Field(description="Metadata about the content") # Output parser @@ -26,56 +60,110 @@ output_parser = PydanticOutputParser(pydantic_object=MatchOutput) # ============== CONFIG ============== GAME_CONFIG = { + # === REQUIRED === "game_type": "match", + "type_id": 3, "display_name": "Match with Image", - "description": "Match sentences with images", - - "active": True, - - "min_items": 2, - "max_items": 10, + "description": "Match words or phrases with their corresponding images", "schema": MatchItem, "output_schema": MatchOutput, "output_parser": output_parser, - - "system_prompt": """Extract sentences and create image descriptions for matching game. -The game will show images and players must match them with the correct sentences. - -YOUR TASK: -1. Extract meaningful sentences from the source text -2. Create a DETAILED image_description that clearly represents the sentence -3. The image should be distinct enough to match with its sentence - -CRITICAL RULES: -1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text -2. original_quote MUST be an EXACT copy from source text -3. image_description must be DETAILED and SPECIFIC to the sentence content -4. Each image should be visually distinguishable from others""", + # === OPTIONAL === + "active": True, + "max_items": 10, + # Input validation rules + "input_format_rules": [ + "Text MUST be a list of words or phrases separated by commas, semicolons, or newlines", + "NOT suitable for long sentences or paragraphs", + "Each item should be a concrete noun/concept that can be visualized", + ], + # Analyzer rules - khi nào nên chọn game này + "analyzer_rules": [ + "Text is a list of words or short phrases", + "Words represent concrete objects/concepts that can be visualized", + "Examples: 'apple, banana, orange' or 'cat; dog; bird'", + "NOT suitable for abstract concepts or long sentences", + ], + # Generation rules - cách tạo nội dung + "generation_rules": [ + "KEEP ORIGINAL LANGUAGE for 'word' field - Do NOT translate", + "original_quote = EXACT copy from source before cleaning", + "Clean numbering like '1.', 'a)', '•' from word field", + "Each word/phrase should represent a visualizable concept", + # Image rules + "image_description: MUST be DETAILED visual description in ENGLISH", + "image_description: Describe colors, shapes, actions, context", + "image_keywords: 2-3 English keywords for search", + "image_is_complex: TRUE for humans, precise counts, complex scenes", + "NEVER leave image_description empty!", + # Quality rules + "Each image should be visually DISTINCT from others", + "Avoid generic descriptions - be specific", + ], + "examples": [], # Defined below } # ============== EXAMPLES ============== EXAMPLES = [ { - "input": "The Sun is a star. The Moon orbits Earth.", + "input": "apple; banana;", "output": { "items": [ { - "word": "The Sun is a star.", - "match_with": "sun", - "original_quote": "The Sun is a star.", - "image_description": "A bright glowing yellow sun with solar flares", - "image_is_complex": False + "word": "apple", + "original_quote": "apple", + "image_description": "A shiny red apple with a green leaf on top", + "image_keywords": ["apple", "fruit", "red"], + "image_is_complex": False, }, { - "word": "The Moon orbits Earth.", - "match_with": "moon", - "original_quote": "The Moon orbits Earth.", - "image_description": "A grey moon circling around the blue Earth planet", - "image_is_complex": False - } - ] + "word": "banana", + "original_quote": "banana", + "image_description": "A curved yellow banana", + "image_keywords": ["banana", "fruit", "yellow"], + "image_is_complex": False, + }, + ], + "metadata": { + "title": "Fruits", + "description": "Common fruits vocabulary", + "grade": 1, + "type": "match", + "difficulty": 1, + }, }, - "why_suitable": "Has distinct concepts that can be visualized and matched" - } + "why_suitable": "Simple words representing concrete objects that can be visualized", + }, + { + "input": "1. elephant\n2. giraffe\n", + "output": { + "items": [ + { + "word": "elephant", + "original_quote": "1. elephant", + "image_description": "A large grey elephant with big ears and long trunk", + "image_keywords": ["elephant", "animal", "africa"], + "image_is_complex": False, + }, + { + "word": "giraffe", + "original_quote": "2. giraffe", + "image_description": "A tall giraffe with brown spots and long neck", + "image_keywords": ["giraffe", "tall", "spots"], + "image_is_complex": False, + }, + ], + "metadata": { + "title": "African Animals", + "description": "Safari animals vocabulary", + "grade": 2, + "type": "match", + "difficulty": 1, + }, + }, + "why_suitable": "Numbered list of animals - numbering will be cleaned", + }, ] + +GAME_CONFIG["examples"] = EXAMPLES diff --git a/src/core.py b/src/core.py index 3624d14..fde8e2d 100644 --- a/src/core.py +++ b/src/core.py @@ -6,6 +6,7 @@ core.py - Simple Game Generator Core 2. run_single() - Analyze + generate 1 game tốt nhất (1 API call) 3. generate() - Generate 1 game cụ thể (không analyze) """ + import os import json import time @@ -25,40 +26,57 @@ class TokenUsage: prompt_tokens: int = 0 completion_tokens: int = 0 total_tokens: int = 0 - + input_chars: int = 0 # Character count sent to LLM + output_chars: int = 0 # Character count received from LLM + def add(self, usage: Dict[str, int]): - self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get("input_tokens", 0) - self.completion_tokens += usage.get("completion_tokens", 0) or usage.get("output_tokens", 0) + self.prompt_tokens += usage.get("prompt_tokens", 0) or usage.get( + "input_tokens", 0 + ) + self.completion_tokens += usage.get("completion_tokens", 0) or usage.get( + "output_tokens", 0 + ) self.total_tokens = self.prompt_tokens + self.completion_tokens - + + def add_chars(self, input_text: str, output_text: str): + """Track character counts for LLM input/output""" + self.input_chars += len(input_text) if input_text else 0 + self.output_chars += len(output_text) if output_text else 0 + def to_dict(self) -> Dict[str, int]: - return {"prompt_tokens": self.prompt_tokens, "completion_tokens": self.completion_tokens, "total_tokens": self.total_tokens} + return { + "prompt_tokens": self.prompt_tokens, + "completion_tokens": self.completion_tokens, + "total_tokens": self.total_tokens, + "input_chars": self.input_chars, + "output_chars": self.output_chars, + } class GameCore: """ Simple Game Generator. - + Usage: core = GameCore() - + # 1. Generate nhiều games (analyze first) result = core.run_multi(text) - + # 2. Generate 1 game tốt nhất (1 API call) result = core.run_single(text) - + # 3. Generate 1 game cụ thể result = core.generate("quiz", text) """ - + def __init__(self, llm_config: Optional[Union[ModelConfig, Dict, str]] = None): self.llm_config = self._parse_config(llm_config) self.llm = get_llm(self.llm_config) self.validator = QuoteValidator() self.registry = get_registry() print(f"🤖 LLM: {self.llm_config.provider}/{self.llm_config.model_name}") - + def _parse_config(self, config) -> ModelConfig: if config is None: if os.getenv("GOOGLE_API_KEY"): @@ -66,7 +84,7 @@ class GameCore: elif os.getenv("OPENAI_API_KEY"): return get_default_config("openai") return get_default_config("ollama") - + if isinstance(config, ModelConfig): return config if isinstance(config, str): @@ -74,9 +92,9 @@ class GameCore: if isinstance(config, dict): return ModelConfig(**config) raise ValueError(f"Invalid config: {type(config)}") - + # ============== 1. RUN MULTI (Analyze + Generate nhiều games) ============== - + def run_multi( self, text: str, @@ -84,105 +102,316 @@ class GameCore: max_items: int = 3, min_score: int = 20, validate: bool = True, - debug: bool = False + debug: bool = False, ) -> Dict[str, Any]: """ Analyze text + Generate nhiều games phù hợp. - + Returns: {success, games, results, errors, token_usage, llm} """ tracker = TokenUsage() errors = [] - + # 1. Analyze (also returns metadata) available = enabled_games or self.registry.get_game_types() logger.info(f"Analyzing text for multi-gen. Available games: {available}") - games, scores, metadata, err = self._analyze(text, available, min_score, tracker, debug) + games, scores, metadata, err = self._analyze( + text, available, min_score, tracker, debug + ) errors.extend(err) - + if not games: logger.warning("Analyzer found no suitable games matches.") return self._result(False, [], {}, errors, tracker, metadata=metadata) - + logger.info(f"Analyzer selected: {games}") - + # 2. Generate results, err = self._generate_multi(games, text, max_items, tracker, debug) errors.extend(err) - + # 3. Validate if validate: results = self._validate(results, text) - + # Check if any game has items - has_items = any(data.get("items", []) for data in results.values() if isinstance(data, dict)) - return self._result(has_items, games, results, errors, tracker, scores, metadata) - + has_items = any( + data.get("items", []) for data in results.values() if isinstance(data, dict) + ) + return self._result( + has_items, games, results, errors, tracker, scores, metadata + ) + + # ============== 1.5. RUN FAST (1 API call: Analyze + Generate ALL suitable games) ============== + + def run_fast( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + min_score: int = 50, + validate: bool = True, + debug: bool = False, + ) -> Dict[str, Any]: + """ + OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp. + + Output format GIỐNG HỆT run_multi(): + - 1 call duy nhất thay vì 2 (analyze + generate) + - Trả về nhiều games với items + + Returns: {success, games, game_scores, metadata, results, token_usage, llm} + """ + tracker = TokenUsage() + available = enabled_games or self.registry.get_game_types() + logger.info(f"[run_fast] Starting with games: {available}") + + # Build FULL game schemas (giống _generate_multi) + games_schema = [] + for gt in available: + game = get_game(gt) + if game: + games_schema.append(f"""### {gt.upper()} +{game.generated_system_prompt} + +REQUIRED OUTPUT FORMAT: +{game.format_instructions}""") + + # Format cho response + fmt = ", ".join( + [ + f'"{gt}": {{"score": 0-100, "reason": "...", "items": [...]}}' + for gt in available + ] + ) + + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + """You are an educational game analyzer AND generator. In ONE response: + 1. SCORE each game type (0-100) based on how well the text matches game requirements + 2. GENERATE items for games with score >= {min_score} + + SCORING GUIDE: + - 70-100: Text matches game requirements well → GENERATE items + - 40-69: Partial match → GENERATE items if >= min_score + - 0-39: Does not match → DO NOT generate items + + GENERATION RULES: + - KEEP original language from text + - original_quote = EXACT copy from source text + - ALL content must come from source text only + - Include ALL required fields (image_description, image_keywords, etc.) + - Generate max {max_items} items per game + - STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""", + ), + ( + "human", + """GAMES AND THEIR SCHEMAS: + {schemas} + + SOURCE TEXT: + {text} + + RESPOND with this EXACT JSON structure: + {{ + "metadata": {{ + "title": "short title from source", + "description": "one sentence summary", + "grade": 1-5, + "difficulty": 1-5 + }}, + {format} + }}""", + ), + ] + ) + + if debug: + print(f"\n{'=' * 50}\n✨ RUN FAST (1 call)\n{'=' * 50}") + print(f"Text length: {len(text)}") + + try: + # Build input for tracking + invoke_params = { + "schemas": "\n\n".join(games_schema), + "text": text, + "format": fmt, + "min_score": min_score, + "max_items": max_items, + } + + resp = (prompt | self.llm).invoke(invoke_params) + tracker.add(self._get_usage(resp)) + + # Track character counts: input = all params joined, output = response content + input_text = " ".join(str(v) for v in invoke_params.values()) + tracker.add_chars(input_text, resp.content) + + if debug: + print(f"📝 Response: {resp.content[:500]}...") + + data = self._parse_json(resp.content) + metadata = data.get("metadata", {}) + + # Process results - format giống _generate_multi + results = {} + scores = [] + selected_games = [] + errors = [] + + for gt in available: + game_data = data.get(gt, {}) + if not isinstance(game_data, dict): + continue + + score = game_data.get("score", 0) + reason = game_data.get("reason", "") + items = game_data.get("items", []) + + scores.append({"type": gt, "score": score, "reason": reason}) + + if score >= min_score and items: + # Post-process items (giống _generate_multi) + processed_items = self._post_process(items, gt) + + # Validate if needed + if validate: + processed_items = [ + i + for i in processed_items + if self.validator.validate_quote( + i.get("original_quote", ""), text + ).is_valid + ] + + # Thống nhất structure: {items: [...], metadata: {...}} - giống run_multi + results[gt] = { + "items": processed_items, + "metadata": game_data.get("metadata"), + } + + if processed_items: + selected_games.append(gt) + else: + errors.append(f"No valid items for {gt}") + elif score >= min_score: + errors.append(f"No items generated for {gt}") + + # Sort scores + scores.sort(key=lambda x: x.get("score", 0), reverse=True) + + return self._result( + success=len(selected_games) > 0, + games=selected_games, + results=results, + errors=errors, + tracker=tracker, + scores=scores, + metadata=metadata, + ) + + except Exception as e: + logger.error(f"[run_fast] Error: {e}") + return self._result(False, [], {}, [str(e)], tracker) + # ============== 2. RUN SINGLE (1 API call: Analyze + Generate 1 game) ============== - + def run_single( self, text: str, enabled_games: Optional[List[str]] = None, max_items: int = 3, validate: bool = True, - debug: bool = False + debug: bool = False, ) -> Dict[str, Any]: """ 1 API call: Analyze + Generate game tốt nhất. - + Returns: {success, game_type, reason, items, errors, token_usage, llm} """ tracker = TokenUsage() available = enabled_games or self.registry.get_game_types() logger.info(f"Starting run_single for available games: {available}") - + # Build games info games_info = [] for gt in available: game = get_game(gt) if game: - example = json.dumps(game.examples[0].get('output', {}), ensure_ascii=False, indent=2) if game.examples else "{}" - games_info.append(f"### {gt}\n{game.description}\nExample output:\n{example}") - - prompt = ChatPromptTemplate.from_messages([ - ("system", """You are an educational game generator. -1. ANALYZE text and CHOOSE the BEST game type -2. GENERATE items for that game + example = ( + json.dumps( + game.examples[0].get("output", {}), ensure_ascii=False, indent=2 + ) + if game.examples + else "{}" + ) + games_info.append( + f"### {gt}\n{game.description}\nExample output:\n{example}" + ) -RULES: -- KEEP original language -- original_quote = EXACT copy from source -- ALL content from source only"""), - ("human", """GAMES: -{games_info} + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + """You are an educational game generator. + 1. ANALYZE text and CHOOSE the BEST game type + 2. GENERATE items for that game -TEXT: -{text} + RULES: + - KEEP original language + - original_quote = EXACT copy from source + - ALL content from source only""", + ), + ( + "human", + """GAMES: + {games_info} -Choose BEST game from: {types} -Generate max {max_items} items. + TEXT: + {text} + + Choose BEST game from: {types} + Generate max {max_items} items. + + Return JSON: + {{"game_type": "chosen", "reason": "why", "items": [...]}}""", + ), + ] + ) + + content = { + "games_info": "\n\n".join(games_info), + "text": text[:2000], + "types": ", ".join(available), + "max_items": max_items, + } -Return JSON: -{{"game_type": "chosen", "reason": "why", "items": [...]}}""") - ]) - - content = {"games_info": "\n\n".join(games_info), "text": text[:2000], "types": ", ".join(available), "max_items": max_items} - if debug: - print(f"\n{'='*50}\n🎯 RUN SINGLE\n{'='*50}") - + print(f"\n{'=' * 50}\n🎯 RUN SINGLE\n{'=' * 50}") + try: resp = (prompt | self.llm).invoke(content) tracker.add(self._get_usage(resp)) - + + # Track character counts + input_text = " ".join(str(v) for v in content.values()) + tracker.add_chars(input_text, resp.content) + data = self._parse_json(resp.content) game_type = data.get("game_type") items = self._post_process(data.get("items", []), game_type) - + if validate and items: - items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] - + items = [ + i + for i in items + if self.validator.validate_quote( + i.get("original_quote", ""), text + ).is_valid + ] + return { "success": len(items) > 0, "game_type": game_type, @@ -190,63 +419,90 @@ Return JSON: "items": items, "errors": [], "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } except Exception as e: - return {"success": False, "game_type": None, "items": [], "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} - + return { + "success": False, + "game_type": None, + "items": [], + "errors": [str(e)], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", + } + # ============== 3. GENERATE (1 game cụ thể, không analyze) ============== - + def generate( self, game_type: str, text: str, max_items: int = 3, validate: bool = True, - debug: bool = False + debug: bool = False, ) -> Dict[str, Any]: """Generate 1 game cụ thể""" tracker = TokenUsage() logger.info(f"Generating single game content: {game_type}") - + game = get_game(game_type) - + if not game: - return {"success": False, "game_type": game_type, "items": [], "errors": [f"Game not found: {game_type}"], "token_usage": {}, "llm": ""} - + return { + "success": False, + "game_type": game_type, + "items": [], + "errors": [f"Game not found: {game_type}"], + "token_usage": {}, + "llm": "", + } + # Build Format Rules Section format_rules_section = "" if game.input_format_rules: rules_str = "\n".join(f"- {r}" for r in game.input_format_rules) format_rules_section = f""" -CRITICAL: FIRST, VALIDATE THE INPUT TEXT. -Format Rules: -{rules_str} + CRITICAL: FIRST, VALIDATE THE INPUT TEXT. + Format Rules: + {rules_str} -If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else: -{{{{ "format_error": "Input text incompatible with game requirements." }}}} -""" + If the text is completely UNSUITABLE for this game type, you MUST output strictly this JSON and nothing else: + {{{{ "format_error": "Input text incompatible with game requirements." }}}} + """ - prompt = ChatPromptTemplate.from_messages([ - ("system", f"""{game.generated_system_prompt} -{format_rules_section}"""), - ("human", """TEXT TO PROCESS: -{text} + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + f"""{game.generated_system_prompt} + {format_rules_section}""", + ), + ( + "human", + """TEXT TO PROCESS: + {text} + + Generate content in JSON format: + {format_instructions}""", + ), + ] + ) -Generate content in JSON format: -{format_instructions}""") - ]) - if debug: - print(f"\n{'='*50}\n🎮 GENERATE: {game_type}\n{'='*50}") - + print(f"\n{'=' * 50}\n🎮 GENERATE: {game_type}\n{'=' * 50}") + try: - resp = (prompt | self.llm).invoke({ + invoke_params = { "text": text, - "format_instructions": game.format_instructions - }) + "format_instructions": game.format_instructions, + } + resp = (prompt | self.llm).invoke(invoke_params) tracker.add(self._get_usage(resp)) - + + # Track character counts + input_text = " ".join(str(v) for v in invoke_params.values()) + tracker.add_chars(input_text, resp.content) + # 1. Parse as raw JSON first to check for format_error raw_data = None try: @@ -263,21 +519,21 @@ Generate content in JSON format: "format_error": raw_data["format_error"], "errors": [raw_data["format_error"]], "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } parsed_data = raw_data - + # 3. Try output_parser for structured validation if present if game.output_parser: try: parsed = game.output_parser.parse(resp.content) parsed_data = parsed.model_dump() except Exception as pe: - if debug: print(f"⚠️ output_parser failed: {pe}") + if debug: + print(f"⚠️ output_parser failed: {pe}") # Keep raw_data if parser fails but we have JSON - # Check format error if parsed_data and parsed_data.get("format_error"): return { @@ -287,173 +543,241 @@ Generate content in JSON format: "format_error": parsed_data["format_error"], "errors": [parsed_data["format_error"]], "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } - + # Post-process items = parsed_data.get("items", []) if parsed_data else [] items = self._post_process(items, game_type) - + if validate and items: - items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] - + items = [ + i + for i in items + if self.validator.validate_quote( + i.get("original_quote", ""), text + ).is_valid + ] + if not items: return { "success": False, "game_type": game_type, "data": None, "format_error": "No items extracted", - "errors": [], + "errors": [], "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } - + if parsed_data: parsed_data["items"] = items - + return { "success": True, "game_type": game_type, "data": parsed_data, "errors": [], "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } except Exception as e: - return {"success": False, "game_type": game_type, "data": None, "errors": [str(e)], "token_usage": tracker.to_dict(), "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}"} - + return { + "success": False, + "game_type": game_type, + "data": None, + "errors": [str(e)], + "token_usage": tracker.to_dict(), + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", + } + # ============== PRIVATE METHODS ============== - - def _analyze(self, text: str, available: List[str], min_score: int, tracker: TokenUsage, debug: bool) -> tuple: + + def _analyze( + self, + text: str, + available: List[str], + min_score: int, + tracker: TokenUsage, + debug: bool, + ) -> tuple: """Analyze text để suggest games - với retry""" # Lấy context từ game configs context = get_analyzer_context() - - prompt = ChatPromptTemplate.from_messages([ - ("system", """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements. -GAME REQUIREMENTS: -{context} + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + """You are a game type analyzer. Score each game 0-100 based on how well the text matches the game requirements. -SCORING: -- 70-100: Text matches game requirements well -- 40-69: Partial match -- 0-39: Does not match requirements + GAME REQUIREMENTS: + {context} -IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field. + SCORING: + - 70-100: Text matches game requirements well + - 40-69: Partial match + - 0-39: Does not match requirements -Return valid JSON with scores AND metadata about the content: -{{ - "scores": [ - {{ - "type": "NAME_OF_GAME_TYPE", - "score": 80, - "reason": "..." - }} - ], - "metadata": {{ - "title": "Title from source or create short title", - "description": "One sentence summary", - "grade": 1-5, - "difficulty": 1-5 - }} -}}"""), - ("human", """TEXT TO ANALYZE: -{text} + IMPORTANT: You MUST use the exact game type name (e.g. 'quiz', 'sequence') in the "type" field. + + Return valid JSON with scores AND metadata about the content: + {{ + "scores": [ + {{ + "type": "NAME_OF_GAME_TYPE", + "score": 80, + "reason": "..." + }} + ], + "metadata": {{ + "title": "Title from source or create short title", + "description": "One sentence summary", + "grade": 1-5, + "difficulty": 1-5 + }} + }}""", + ), + ( + "human", + """TEXT TO ANALYZE: + {text} + + Analyze for games: {types} + Return JSON:""", + ), + ] + ) -Analyze for games: {types} -Return JSON:""") - ]) - max_retries = 2 for attempt in range(max_retries): try: - resp = (prompt | self.llm).invoke({ + invoke_params = { "context": context, - "text": text[:800], - "types": ", ".join(available) - }) + "text": text, + "types": ", ".join(available), + } + resp = (prompt | self.llm).invoke(invoke_params) tracker.add(self._get_usage(resp)) - + + # Track character counts + input_text = " ".join(str(v) for v in invoke_params.values()) + tracker.add_chars(input_text, resp.content) + if debug: - print(f"📝 Analyzer raw: {resp.content[:300]}") - + print(f"📝 Analyzer raw: {resp.content}") + # Parse JSON với fallback content = resp.content.strip() if not content: if debug: print(f"⚠️ Empty response, retry {attempt + 1}") continue - + data = self._parse_json(content) - scores = [s for s in data.get("scores", []) if s.get("type") in available and s.get("score", 0) >= min_score] + scores = [ + s + for s in data.get("scores", []) + if s.get("type") in available and s.get("score", 0) >= min_score + ] scores.sort(key=lambda x: x.get("score", 0), reverse=True) - + # Extract metadata from response metadata = data.get("metadata", {}) - + if debug: print(f"🔍 Scores: {scores}") print(f"📋 Metadata: {metadata}") - + return [s["type"] for s in scores], scores, metadata, [] - + except Exception as e: if debug: print(f"⚠️ Analyze attempt {attempt + 1} failed: {e}") if attempt == max_retries - 1: # Final fallback: return all games với low score return available, [], {}, [f"Analyze error: {e}"] - + return available, [], {}, ["Analyze failed after retries"] - - def _generate_multi(self, games: List[str], text: str, max_items: int, tracker: TokenUsage, debug: bool) -> tuple: + + def _generate_multi( + self, + games: List[str], + text: str, + max_items: int, + tracker: TokenUsage, + debug: bool, + ) -> tuple: """Generate nhiều games""" if len(games) == 1: - result = self.generate(games[0], text, max_items, validate=False, debug=debug) + result = self.generate( + games[0], text, max_items, validate=False, debug=debug + ) tracker.add(result.get("token_usage", {})) # Fix: generate returns {data: {items: [...]}} not {items: [...]} data = result.get("data") or {} items = data.get("items", []) if isinstance(data, dict) else [] - return {games[0]: {"items": items, "metadata": data.get("metadata")}}, result.get("errors", []) - + return { + games[0]: {"items": items, "metadata": data.get("metadata")} + }, result.get("errors", []) + # Multi-game: Build schema info for each game games_schema = [] for gt in games: game = get_game(gt) if game: games_schema.append(f"""### {gt.upper()} -{game.generated_system_prompt} + {game.generated_system_prompt} + + REQUIRED OUTPUT FORMAT: + {game.format_instructions}""") -REQUIRED OUTPUT FORMAT: -{game.format_instructions}""") - - prompt = ChatPromptTemplate.from_messages([ - ("system", """You are a multi-game content generator. -Generate items for EACH game type following their EXACT schema. -IMPORTANT: Include ALL required fields for each item (image_description, image_keywords, etc.) -RULES: Keep original language, use exact quotes from text."""), - ("human", """GAMES AND THEIR SCHEMAS: -{schemas} + prompt = ChatPromptTemplate.from_messages( + [ + ( + "system", + """You are a multi-game content generator. In ONE response: + 1. Generate items for EACH game type following their EXACT schema + + GENERATION RULES: + - KEEP original language from text + - original_quote = EXACT copy from source text + - ALL content must come from source text only + - Include ALL required fields (image_description, image_keywords, etc.) + - STRICTLY FOLLOW each game's GENERATION RULES defined in their schema below""", + ), + ( + "human", + """GAMES AND THEIR SCHEMAS: + {schemas} -SOURCE TEXT: -{text} + SOURCE TEXT: + {text} + + Generate items for: {types} + Return valid JSON: {{{format}}}""", + ), + ] + ) + + fmt = ", ".join( + [f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games] + ) -Generate items for: {types} -Return valid JSON: {{{format}}}""") - ]) - - fmt = ", ".join([f'"{gt}": {{"items": [...], "metadata": {{...}}}}' for gt in games]) - try: - resp = (prompt | self.llm).invoke({ + invoke_params = { "schemas": "\n\n".join(games_schema), "text": text, "types": ", ".join(games), - "format": fmt - }) + "format": fmt, + } + resp = (prompt | self.llm).invoke(invoke_params) tracker.add(self._get_usage(resp)) - + + # Track character counts + input_text = " ".join(str(v) for v in invoke_params.values()) + tracker.add_chars(input_text, resp.content) + data = self._parse_json(resp.content) results = {} errors = [] @@ -465,42 +789,68 @@ Return valid JSON: {{{format}}}""") results[gt] = {"items": items, "metadata": game_data.get("metadata")} if not items: errors.append(f"No items for {gt}") - + return results, errors except Exception as e: - return {gt: {"items": [], "metadata": None} for gt in games}, [f"Generate error: {e}"] - + return {gt: {"items": [], "metadata": None} for gt in games}, [ + f"Generate error: {e}" + ] + def _validate(self, results: Dict[str, dict], text: str) -> Dict[str, dict]: """Validate items trong results""" validated = {} for gt, data in results.items(): items = data.get("items", []) if isinstance(data, dict) else [] - valid_items = [i for i in items if self.validator.validate_quote(i.get("original_quote", ""), text).is_valid] - validated[gt] = {"items": valid_items, "metadata": data.get("metadata") if isinstance(data, dict) else None} + valid_items = [ + i + for i in items + if self.validator.validate_quote( + i.get("original_quote", ""), text + ).is_valid + ] + validated[gt] = { + "items": valid_items, + "metadata": data.get("metadata") if isinstance(data, dict) else None, + } return validated - + def _post_process(self, items: List, game_type: str) -> List[Dict]: ms = int(time.time() * 1000) result = [] for i, item in enumerate(items): - d = item if isinstance(item, dict) else (item.model_dump() if hasattr(item, 'model_dump') else {}) + d = ( + item + if isinstance(item, dict) + else (item.model_dump() if hasattr(item, "model_dump") else {}) + ) d["id"] = f"{game_type[:2].upper()}-{ms}-{i}" d["game_type"] = game_type result.append(d) return result - + def _parse_json(self, content: str) -> Dict: if "```" in content: content = content.split("```")[1].replace("json", "").strip() return json.loads(content) - + def _get_usage(self, resp) -> Dict: - if hasattr(resp, 'response_metadata'): + if hasattr(resp, "response_metadata"): meta = resp.response_metadata - return meta.get('usage', meta.get('usage_metadata', meta.get('token_usage', {}))) - return getattr(resp, 'usage_metadata', {}) - - def _result(self, success: bool, games: List, results: Dict, errors: List, tracker: TokenUsage, scores: List = None, metadata: Dict = None) -> Dict: + return meta.get( + "usage", meta.get("usage_metadata", meta.get("token_usage", {})) + ) + return getattr(resp, "usage_metadata", {}) + + def _result( + self, + success: bool, + games: List, + results: Dict, + errors: List, + tracker: TokenUsage, + scores: List = None, + metadata: Dict = None, + ) -> Dict: return { "success": success, "games": games, @@ -509,5 +859,69 @@ Return valid JSON: {{{format}}}""") "results": results, "errors": errors, "token_usage": tracker.to_dict(), - "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}" + "llm": f"{self.llm_config.provider}/{self.llm_config.model_name}", } + + # ============== ASYNC WRAPPERS (for concurrent FastAPI handling) ============== + # These methods run the blocking LLM calls in a thread pool + + async def run_fast_async( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + min_score: int = 50, + validate: bool = True, + debug: bool = False, + ) -> Dict[str, Any]: + """Async wrapper for run_fast - runs in thread pool to not block event loop""" + import asyncio + + return await asyncio.to_thread( + self.run_fast, text, enabled_games, max_items, min_score, validate, debug + ) + + async def run_single_async( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + validate: bool = True, + debug: bool = False, + ) -> Dict[str, Any]: + """Async wrapper for run_single - runs in thread pool to not block event loop""" + import asyncio + + return await asyncio.to_thread( + self.run_single, text, enabled_games, max_items, validate, debug + ) + + async def run_multi_async( + self, + text: str, + enabled_games: Optional[List[str]] = None, + max_items: int = 3, + validate: bool = True, + debug: bool = False, + ) -> Dict[str, Any]: + """Async wrapper for run_multi - runs in thread pool to not block event loop""" + import asyncio + + return await asyncio.to_thread( + self.run_multi, text, enabled_games, max_items, validate, debug + ) + + async def generate_async( + self, + text: str, + game_types: Union[List[str], str], + max_items: int = 10, + validate: bool = True, + debug: bool = False, + ) -> Dict[str, Any]: + """Async wrapper for generate - runs in thread pool to not block event loop""" + import asyncio + + return await asyncio.to_thread( + self.generate, text, game_types, max_items, validate, debug + ) diff --git a/src/game_registry.py b/src/game_registry.py index b175b7b..de57022 100644 --- a/src/game_registry.py +++ b/src/game_registry.py @@ -2,7 +2,7 @@ game_registry.py - Tự động load games từ thư mục games/ Hệ thống sẽ: -1. Scan thư mục games/ +1. Scan thư mục games/ 2. Load mọi file .py (trừ _template.py và __init__.py) 3. Chỉ load games có active: True 4. Đăng ký tự động vào registry @@ -10,6 +10,7 @@ Hệ thống sẽ: THÊM GAME MỚI = TẠO FILE TRONG games/ BẬT/TẮT GAME = SỬA active: True/False trong file game """ + import importlib.util from pathlib import Path from typing import Dict, List, Any, Optional @@ -20,75 +21,78 @@ class GameRegistry: """ Registry tự động load games từ thư mục games/ Chỉ load games có active: True - + Supports lookup by: - game_type (string): "quiz", "sequence" - type_id (int): 1, 2 """ + _instance: Optional["GameRegistry"] = None _all_games: Dict[str, GameType] = {} # Keyed by game_type _id_map: Dict[int, str] = {} # type_id -> game_type _loaded: bool = False - + def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) cls._instance._all_games = {} cls._instance._id_map = {} return cls._instance - + def __init__(self): if not self._loaded: self._load_all_games() self._loaded = True - + def _load_all_games(self): """Scan và load tất cả game definitions từ games/""" games_dir = Path(__file__).parent / "games" - + if not games_dir.exists(): print(f"⚠️ Games directory not found: {games_dir}") return - + for file_path in games_dir.glob("*.py"): # Skip __init__.py và _template.py và base.py if file_path.name.startswith("_") or file_path.name == "base.py": continue - + try: game_def = self._load_game_from_file(file_path) if game_def: self._all_games[game_def.game_type] = game_def - if game_def.type_id > 0: + if game_def.type_id >= 0: # 0=quiz, 1=sequence are valid self._id_map[game_def.type_id] = game_def.game_type status = "✅" if game_def.active else "⏸️" - print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})") + print( + f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})" + ) except Exception as e: print(f"❌ Error loading {file_path.name}: {e}") - + def _load_game_from_file(self, file_path: Path) -> Optional[GameType]: """Load 1 game definition từ file""" module_name = f"games.{file_path.stem}" - + spec = importlib.util.spec_from_file_location(module_name, file_path) if spec is None or spec.loader is None: return None - + module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) - + config = getattr(module, "GAME_CONFIG", None) examples = getattr(module, "EXAMPLES", []) - + if config is None: return None - + # Inject examples if not in config if examples and "examples" not in config: config["examples"] = examples - + return create_game_type(config) - + def reload(self): """Reload tất cả games""" self._all_games.clear() @@ -96,55 +100,57 @@ class GameRegistry: self._loaded = False self._load_all_games() self._loaded = True - + # ============== PUBLIC API ============== - + def get_game(self, game_type: str) -> Optional[GameType]: """Lấy game by game_type (chỉ active)""" game = self._all_games.get(game_type) return game if game and game.active else None - + def get_game_by_id(self, type_id: int) -> Optional[GameType]: """Lấy game by type_id (chỉ active)""" game_type = self._id_map.get(type_id) if game_type: return self.get_game(game_type) return None - + def get_game_type_by_id(self, type_id: int) -> Optional[str]: """Convert type_id -> game_type""" return self._id_map.get(type_id) - + def get_id_by_game_type(self, game_type: str) -> int: - """Convert game_type -> type_id""" + """Convert game_type -> type_id. Returns -1 if not found.""" game = self._all_games.get(game_type) - return game.type_id if game else 0 - + return game.type_id if game else -1 # -1 = not found + def get_all_games(self) -> Dict[str, GameType]: """Lấy tất cả games ACTIVE""" return {k: v for k, v in self._all_games.items() if v.active} - + def get_all_games_including_inactive(self) -> Dict[str, GameType]: """Lấy tất cả games (kể cả inactive)""" return self._all_games.copy() - + def get_game_types(self) -> List[str]: """Lấy danh sách game types ACTIVE""" return [k for k, v in self._all_games.items() if v.active] - + def get_type_ids(self) -> List[int]: """Lấy danh sách type_ids ACTIVE""" - return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0] - + return [ + v.type_id for v in self._all_games.values() if v.active and v.type_id > 0 + ] + def get_analyzer_context(self) -> str: """Tạo context cho Analyzer (chỉ từ active games)""" context_parts = [] - + for game_type, game in self._all_games.items(): if not game.active: continue - - hints = game.analyzer_rules # New field name + + hints = game.analyzer_rules # New field name if hints: hints_text = "\n - ".join(hints) context_parts.append( @@ -152,9 +158,9 @@ class GameRegistry: f" Description: {game.description}\n" f" Suitable when:\n - {hints_text}" ) - + return "\n\n".join(context_parts) - + def is_active(self, game_type: str) -> bool: """Kiểm tra game có active không""" game = self._all_games.get(game_type) diff --git a/src/games/_template.py b/src/games/_template.py index 444d81e..6fb5f42 100644 --- a/src/games/_template.py +++ b/src/games/_template.py @@ -4,88 +4,180 @@ games/_template.py - TEMPLATE CHO GAME MỚI THÊM GAME MỚI CHỈ CẦN: 1. Copy file này 2. Rename thành .py (ví dụ: matching.py) -3. Sửa nội dung bên trong +3. Sửa nội dung bên trong theo hướng dẫn 4. DONE! Hệ thống tự động nhận diện. Không cần sửa bất kỳ file nào khác! """ -from typing import List, Optional +from typing import List, Literal, Optional from pydantic import BaseModel, Field +from langchain_core.output_parsers import PydanticOutputParser -# ============== 1. SCHEMA ============== +# ============== 1. ITEM SCHEMA ============== # Định nghĩa structure của 1 item trong game -# BẮT BUỘC phải có: original_quote và explanation +# BẮT BUỘC phải có: original_quote class YourGameItem(BaseModel): """Schema cho 1 item của game""" - # Các trường BẮT BUỘC (để chống hallucination) + # === TRƯỜNG BẮT BUỘC === original_quote: str = Field( - description="Trích dẫn NGUYÊN VĂN từ văn bản gốc" + description="EXACT quote from source text - dùng để verify không hallucinate" ) - explanation: str = Field(description="Giải thích") - # Thêm các trường riêng của game ở đây + # === TRƯỜNG RIÊNG CỦA GAME === + # Thêm các trường cần thiết cho game của bạn # Ví dụ: - # question: str = Field(description="Câu hỏi") - # answer: str = Field(description="Đáp án") + question: str = Field(description="The question") + answer: str = Field(description="The correct answer") + + # === TRƯỜNG HÌNH ẢNH (Khuyến nghị) === + image_description: str = Field(default="", description="Visual description in English") + image_keywords: List[str] = Field(default=[], description="2-3 English keywords for image search") + image_is_complex: bool = Field(default=False, description="True if needs precise quantities/humans/complex scene") -# ============== 2. CONFIG ============== -# Cấu hình cho game +# ============== 2. METADATA SCHEMA ============== +# Metadata mô tả nội dung được generate + +class YourGameMetadata(BaseModel): + """Metadata đánh giá nội dung""" + title: str = Field(description="Title from source or short descriptive title") + description: str = Field(description="One sentence summary") + grade: int = Field(description="Grade level 1-5 (1=easy, 5=advanced)") + type: Literal["your_game"] = Field(default="your_game", description="Game type - MUST match game_type below") + difficulty: int = Field(description="Difficulty 1-5 for that grade") + + +# ============== 3. OUTPUT SCHEMA ============== +# Wrapper chứa danh sách items và metadata + +class YourGameOutput(BaseModel): + """Output wrapper - BẮT BUỘC phải có""" + items: List[YourGameItem] = Field(description="List of game items") + metadata: YourGameMetadata = Field(description="Metadata about the content") + + +# Output parser - tự động từ output schema +output_parser = PydanticOutputParser(pydantic_object=YourGameOutput) + + +# ============== 4. CONFIG ============== +# Cấu hình cho game - ĐÂY LÀ PHẦN QUAN TRỌNG NHẤT GAME_CONFIG = { - # Key duy nhất cho game (dùng trong API) - "game_type": "your_game", + # === REQUIRED FIELDS === + + # Key duy nhất cho game (dùng trong API) - PHẢI unique + "game_type": "your_game", + + # ID số nguyên unique - PHẢI khác các game khác + # Quiz=1, Sequence=2, ... tiếp tục từ 3 + "type_id": 99, # TODO: Đổi thành số unique # Tên hiển thị - "display_name": "Tên Game", + "display_name": "Your Game Name", # Mô tả ngắn - "description": "Mô tả game của bạn", + "description": "Description of your game", - # Số lượng items - "max_items": 5, - - # Trỏ đến schema class + # Schema classes - BẮT BUỘC "schema": YourGameItem, + "output_schema": YourGameOutput, + "output_parser": output_parser, - # Prompt cho LLM - "system_prompt": """Bạn là chuyên gia tạo [tên game]. - -NHIỆM VỤ: [Mô tả nhiệm vụ] - -QUY TẮC: -1. original_quote PHẢI là trích dẫn NGUYÊN VĂN -2. [Quy tắc khác] -3. [Quy tắc khác]""", + # === OPTIONAL FIELDS (có default) === + + # Game có active không + "active": True, + + # Số lượng items tối đa + "max_items": 10, + + # Rules validate input trước khi generate (Direct Mode) + "input_format_rules": [ + "Text should contain ... suitable for this game.", + "Text MUST have ...", + ], + + # Rules cho Analyzer nhận diện game phù hợp + "analyzer_rules": [ + "Text MUST contain ...", + "NOT suitable if text is ...", + ], + + # Rules cho Generator tạo nội dung + "generation_rules": [ + "KEEP ORIGINAL LANGUAGE - Do NOT translate", + "original_quote = EXACT quote from source text", + "ALL content must come from source only - do NOT invent", + + # Thêm rules riêng cho game của bạn + "Your specific rule 1", + "Your specific rule 2", + + # Visual fields + "image_description: MUST be visual description in ENGLISH", + "image_keywords: MUST provide 2-3 English keywords", + "NEVER leave image fields empty!", + ], + + # Examples - giúp LLM học format + "examples": [] # Sẽ định nghĩa bên dưới } -# ============== 3. EXAMPLES ============== -# Ví dụ input/output để: -# - Analyzer học khi nào nên suggest game này -# - Generator dùng làm few-shot +# ============== 5. EXAMPLES ============== +# Ví dụ input/output để LLM học pattern EXAMPLES = [ { # Input text mẫu - "input": "Văn bản mẫu ở đây...", + "input": "Sample text for your game...", - # Output mong đợi + # Output mong đợi - PHẢI match schema "output": { "items": [ { - "original_quote": "Trích dẫn từ văn bản", - "explanation": "Giải thích", - # Các trường khác của schema... + "original_quote": "Exact quote from input", + "question": "Sample question?", + "answer": "Sample answer", + "image_description": "Visual description", + "image_keywords": ["keyword1", "keyword2"], + "image_is_complex": False } - ] + ], + "metadata": { + "title": "Sample Title", + "description": "Sample description", + "grade": 2, + "type": "your_game", + "difficulty": 2 + } }, - # Analyzer học từ trường này - "why_suitable": "Giải thích tại sao văn bản này phù hợp với game này" + # Giải thích tại sao phù hợp - Analyzer học từ đây + "why_suitable": "Explain why this input is suitable for this game" }, - # Thêm 1-2 examples nữa... + # Thêm 1-2 examples nữa để LLM học tốt hơn... ] + +# Gán examples vào config +GAME_CONFIG["examples"] = EXAMPLES + + +# ============== 6. POST PROCESS (Optional) ============== +# Function xử lý output sau khi LLM generate + +def post_process_your_game(items: List[dict]) -> List[dict]: + """Clean up hoặc transform items sau khi generate""" + for item in items: + # Ví dụ: clean up text + if item.get("answer"): + item["answer"] = item["answer"].strip() + return items + + +# Đăng ký handler (optional) +# GAME_CONFIG["post_process_handler"] = post_process_your_game diff --git a/src/games/quiz.py b/src/games/quiz.py index c4ed989..acdf552 100644 --- a/src/games/quiz.py +++ b/src/games/quiz.py @@ -1,139 +1,172 @@ """ -games/quiz.py - Quiz Game - Multiple choice questions +games/quiz.py - Optimized for LLM Performance while keeping System Integrity """ -from typing import List, Literal -import re -from pydantic import BaseModel, Field + +from typing import List, Literal, Optional +from pydantic import BaseModel, Field, field_validator from langchain_core.output_parsers import PydanticOutputParser +import re -# ============== SCHEMA ============== +# ========================================== +# 1. OPTIMIZED SCHEMA (Thông minh hơn) +# ========================================== class QuizItem(BaseModel): - question: str = Field(description="The question based on source content") - answers: str = Field(description="The correct answer") - options: List[str] = Field(description="List of options including correct answer") - original_quote: str = Field(description="EXACT quote from source text") - image_description: str = Field(default="", description="Visual description for the question") - image_keywords: List[str] = Field(default=[], description="Keywords for image search") - image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") + # LLM chỉ cần tập trung sinh ra raw data, việc clean để code lo + question: str = Field(description="Question text. Use ____ for blanks.") + # Request field có thể để default, logic xử lý sau + request: str = Field( + default="Choose the correct answer", description="Instruction type" + ) + answer: str = Field(description="Correct answer text") + options: List[str] = Field(description="List of options") + original_quote: str = Field(description="Exact source sentence") + # Gom nhóm image fields để prompt gọn hơn + image_description: str = Field( + default="", description="Visual description (if needed)" + ) + image_keywords: List[str] = Field(default=[]) + image_is_complex: bool = Field(default=False) + + @field_validator("answer", "options", mode="before") + @classmethod + def clean_prefixes(cls, v): + """Tự động xóa A., B., (1)... ngay khi nhận dữ liệu từ LLM""" + + def clean_str(text): + # Regex xóa (A), 1., Q: ở đầu và (1) ở cuối + text = re.sub( + r"^(\([A-Za-z0-9]\)|[A-Za-z0-9]\.|Q\d*:)\s*", + "", + str(text), + flags=re.IGNORECASE, + ) + text = re.sub(r"\s*\([A-Za-z0-9]\)$", "", text) + return text.strip() + + if isinstance(v, list): + return [clean_str(item) for item in v] + return clean_str(v) class QuizMetadata(BaseModel): - """Metadata đánh giá nội dung""" - title: str = Field( - description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." - ) - description: str = Field( - description="Short description summarizing the content/topic of the quiz." - ) - grade: int = Field( - description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge." - ) - type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')") - difficulty: int = Field( - description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts." - ) + title: str = Field(description="Short content title") + description: str = Field(description="Summary") + grade: int = Field(description="Level 1-5") + type: Literal["quiz"] = "quiz" + difficulty: int = Field(description="Level 1-5") class QuizOutput(BaseModel): - """Output wrapper for quiz items""" - items: List[QuizItem] = Field(description="List of quiz items generated from source text") - metadata: QuizMetadata = Field(description="Metadata about the quiz content") + items: List[QuizItem] + metadata: QuizMetadata -# Output parser output_parser = PydanticOutputParser(pydantic_object=QuizOutput) +# ========================================== +# 2. COMPACT CONFIG (Giữ đủ key, giảm nội dung) +# ========================================== -# ============== CONFIG ============== -# ============== CONFIG ============== GAME_CONFIG = { + # --- SYSTEM FIELDS (Giữ nguyên không đổi) --- "game_type": "quiz", "display_name": "Quiz", "description": "Multiple choice questions", - "type_id": 1, - + "type_id": 0, "active": True, - "max_items": 10, "schema": QuizItem, "output_schema": QuizOutput, "output_parser": output_parser, - + # --- USER UI HINTS (Rút gọn văn bản hiển thị) --- "input_format_rules": [ - "Text should contain facts or questions suitable for a quiz.", - "Prefer extracting existing multiple choice questions if available.", - "Text MUST contain questions with multiple choice options", + "Text must contain specific facts or Q&A content.", + "Suitable for multiple choice extraction.", ], - - # 1. Recognition Rules (for Analyzer) + # --- PRE-CHECK LOGIC (Rút gọn) --- "analyzer_rules": [ - "Text MUST contain questions with multiple choice options", - "NOT suitable if text is just a list of words with no questions", + "Contains questions with options OR factual statements.", + "Not just a list of unconnected words.", ], - - # 2. Rules tạo nội dung (cho Generator) - "generation_rules": [ - "KEEP ORIGINAL LANGUAGE - Do NOT translate", - "original_quote = EXACT quote from source text (full question block)", - "ALL content must come from source only - do NOT invent", - "REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers", - "STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.", - "FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.", - "IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.", - - # VISUAL FIELD COMPULSORY - "image_description: MUST be a visual description relevant to the question in ENGLISH.", - "image_keywords: MUST provide 2-3 English keywords for search.", - "image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes", - "NEVER leave image fields empty!", + # --- LLM INSTRUCTIONS --- + "generation_rules": [ + "MODE: STRICT EXTRACTION & LOCALITY PRIORITIZED.", + "1. MANDATORY OPTIONS & LOCALITY: Only create a quiz item if 2-4 options are EXPLICITLY present and located immediately after/below the question. SKIP if options are shared in a 'Word Box' or 'Word Bank' tại đầu/cuối trang.", + "2. ANSWER PRIORITY: Use the provided key if available. If the marker is empty, solve it yourself using grammar rules. Do not redefine existing keys.", + "3. ZERO FABRICATION: Do NOT invent distractors. Only extract what is explicitly present.", + "4. LOGICAL AMBIGUITY: If a question is grammatically correct with multiple options but lacks context, SKIP IT.", + "5. SEMANTIC OPTION EXTRACTION: Extract ONLY the meaningful word/phrase. Strip away ALL labels like (1), (A), or OCR noise.", + "6. SMART FILL-IN-THE-BLANK: If the question is a 'Fill in the blank' type, you MUST analyze the sentence structure and place the '____' at the grammatically correct position (e.g., 'Blood ____ oozing'). DO NOT blindly put it at the end. If the sentence is already a complete question (not a blank type), do not add '____'.", + "7. METADATA: Fill metadata accurately based on content. Do not leave empty." + ], + # --- EXAMPLES (Chỉ giữ 1 cái tốt nhất để làm mẫu format) --- + "examples": [ + { + "input": "The giraffe has a long neck. Options: neck, leg, tail.", + "output": { + "items": [ + { + "question": "The giraffe has a long ____.", + "request": "Fill in the blank", + "answer": "neck", + "options": ["neck", "leg", "tail"], + "original_quote": "The giraffe has a long neck.", + "image_description": "A giraffe", + "image_keywords": ["giraffe"], + "image_is_complex": False, + } + ], + "metadata": { + "title": "Animals", + "description": "Giraffe anatomy", + "grade": 2, + "type": "quiz", + "difficulty": 1, + }, + }, + "why_suitable": "Valid extraction: Text has Fact + Options.", + } ], - - "examples": EXAMPLES if 'EXAMPLES' in globals() else [] } -def clean_prefix(text: str) -> str: - """Remove prefixes like (1), (A), 1., A. from text""" - if not text: return text - # Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces. - return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip() +# # ========================================== +# # 3. HANDLER (Logic hậu xử lý gọn nhẹ) +# # ========================================== +# def post_process_quiz(items: List[dict]) -> List[dict]: +# valid_items = [] +# for item in items: +# options = item.get("options", []) +# answer = item.get("answer", "") + +# if len(options) < 2: +# continue + +# # Nếu có answer từ input, thì so khớp để làm sạch +# if answer: +# matched_option = next( +# (opt for opt in options if opt.lower() == answer.lower()), None +# ) +# if matched_option: +# item["answer"] = matched_option +# # Nếu có answer mà không khớp option nào thì mới cân nhắc loại (hoặc để AI tự đoán lại) + +# # Nếu answer rỗng (do ngoặc trống), ta vẫn giữ câu này lại +# # (với điều kiện LLM đã được dặn là phải tự điền vào trường answer) +# if not item.get("answer"): +# # Bạn có thể chọn loại bỏ hoặc tin tưởng vào đáp án LLM tự suy luận +# pass + +# item["request"] = ( +# "Fill in the blank" +# if "____" in item.get("question", "") +# else "Choose the correct answer" +# ) +# valid_items.append(item) +# return valid_items -def post_process_quiz(items: List[dict]) -> List[dict]: - """Clean up answers and options prefixes""" - for item in items: - # Clean answers - if item.get("answers"): - item["answers"] = clean_prefix(item["answers"]) - - # Clean options - if item.get("options") and isinstance(item["options"], list): - item["options"] = [clean_prefix(opt) for opt in item["options"]] - - return items - - -# Register handler -GAME_CONFIG["post_process_handler"] = post_process_quiz - - -# ============== EXAMPLES ============== -EXAMPLES = [ - { - "input": "The Sun is a star at the center of the Solar System.", - "output": { - "items": [{ - "question": "Where is the Sun located?", - "answers": "At the center of the Solar System", - "options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"], - "original_quote": "The Sun is a star at the center of the Solar System.", - "image_description": "The sun in the middle of planets", - "image_keywords": ["sun", "planets"], - "image_is_complex": False - }] - }, - "why_suitable": "Has clear facts" - } -] +# # Đăng ký handler +# GAME_CONFIG["post_process_handler"] = post_process_quiz diff --git a/src/games/sequence.py b/src/games/sequence.py index f93526a..9415cb6 100644 --- a/src/games/sequence.py +++ b/src/games/sequence.py @@ -1,6 +1,6 @@ """ games/sequence.py - Arrange Sequence Game (Sentences OR Words) -type_id = 2 +type_id = 1 LLM tự quyết định dựa vào ngữ nghĩa: - "good morning", "apple", "happy" → WORD - "Hi, I'm Lisa", "The sun rises" → SENTENCE @@ -38,7 +38,7 @@ class SequenceMetadata(BaseModel): description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences" ) difficulty: int = Field( - description="Difficulty 1-5 for that grade." + description="Difficulty 1-3 for that grade." ) @@ -52,59 +52,7 @@ class SequenceOutput(BaseModel): output_parser = PydanticOutputParser(pydantic_object=SequenceOutput) -# ============== CONFIG ============== -# ============== CONFIG ============== -GAME_CONFIG = { - "game_type": "sequence", - "display_name": "Arrange Sequence", - "description": "Arrange sentences or words in order", - "type_id": 2, - - "active": True, - - "max_items": 10, - "schema": SequenceItem, - "output_schema": SequenceOutput, - "output_parser": output_parser, - - "input_format_rules": [ - "Text MUST be a list of items (words, phrases, sentences) to be ordered.", - "Do NOT generate sequence from multiple choice questions (A/B/C/D).", - "Do NOT generate sequence if the text is a quiz or test format.", - ], - - # 1. Recognition Rules (for Analyzer) - "analyzer_rules": [ - "Text is a list of words, phrases, or sentences suitable for ordering", - "Items are separated by commas, semicolons, or newlines", - "Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'", - "NO questions required - just a list of items", - "Text is NOT a long essay or complex dialogue", - ], - - # 2. Rules tạo nội dung (cho Generator) - "generation_rules": [ - "KEEP ORIGINAL LANGUAGE - Do NOT translate", - "Analyze text semantically to extract meaningful items", - "For each item, decide type: WORD/PHRASE or SENTENCE", - "- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field", - "- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field", - "NEVER fill both fields for the same item", - "Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)", - "Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')", - - # CONSISTENCY RULES - "CRITICAL: All extracted items MUST be of the SAME type.", - "Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.", - "If input has mixed types, pick the MAJORITY type and ignore the others.", - - # VISUAL FIELD COMPULSORY - "image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'", - "image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']", - ], - - "examples": EXAMPLES if 'EXAMPLES' in globals() else [] -} + # ============== EXAMPLES ============== @@ -171,3 +119,59 @@ EXAMPLES = [ "why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field" } ] + + + +# ============== CONFIG ============== +# ============== CONFIG ============== +GAME_CONFIG = { + "game_type": "sequence", + "display_name": "Arrange Sequence", + "description": "Arrange sentences or words in order", + "type_id": 1, + + "active": True, + + "max_items": 10, + "schema": SequenceItem, + "output_schema": SequenceOutput, + "output_parser": output_parser, + + "input_format_rules": [ + "Text MUST be a list of items (words, phrases, sentences) to be ordered.", + "Do NOT generate sequence from multiple choice questions (A/B/C/D).", + "Do NOT generate sequence if the text is a quiz or test format.", + ], + + # 1. Recognition Rules (for Analyzer) + "analyzer_rules": [ + "Text is a list of words, phrases, or sentences suitable for ordering", + "Items are separated by commas, semicolons, or newlines", + "Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'", + "NO questions required - just a list of items", + "Text is NOT a long essay or complex dialogue", + ], + + # 2. Rules tạo nội dung (cho Generator) + "generation_rules": [ + "KEEP ORIGINAL LANGUAGE - Do NOT translate", + "Analyze text semantically to extract meaningful items", + "For each item, decide type: WORD/PHRASE or SENTENCE", + "- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field", + "- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field", + "NEVER fill both fields for the same item", + "Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)", + "Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')", + + # CONSISTENCY RULES + "CRITICAL: All extracted items MUST be of the SAME type.", + "Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.", + "If input has mixed types, pick the MAJORITY type and ignore the others.", + + # VISUAL FIELD COMPULSORY + "image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'", + "image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']", + ], + + "examples": EXAMPLES if 'EXAMPLES' in globals() else [] +} \ No newline at end of file diff --git a/src/llm_config.py b/src/llm_config.py index c45b313..4b8eeb7 100644 --- a/src/llm_config.py +++ b/src/llm_config.py @@ -74,7 +74,7 @@ DEFAULT_CONFIGS = { "openai": ModelConfig( provider="openai", model_name="gpt-4o-mini", - temperature=0.1 + temperature=0.1, ), "openai_light": ModelConfig( provider="openai", @@ -117,13 +117,19 @@ def get_llm(config: ModelConfig) -> BaseChatModel: from langchain_google_genai import ChatGoogleGenerativeAI api_key = config.api_key or os.getenv("GOOGLE_API_KEY") + print("Using GOOGLE_API_KEY:", api_key) if not api_key: raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key") return ChatGoogleGenerativeAI( model=config.model_name, temperature=config.temperature, - google_api_key=api_key + google_api_key=api_key, + version="v1", + additional_headers={ + "User-Agent": "PostmanRuntime/7.43.0", + "Accept": "*/*" + } ) elif provider == "openai": @@ -136,7 +142,8 @@ def get_llm(config: ModelConfig) -> BaseChatModel: return ChatOpenAI( model=config.model_name, temperature=config.temperature, - api_key=api_key + api_key=api_key, + base_url=config.base_url or None ) else: