This commit is contained in:
vuongps38770
2026-01-13 09:33:10 +07:00
parent 29544da4c6
commit 7c41ddaa82
9 changed files with 1362 additions and 599 deletions

1
.env
View File

@@ -0,0 +1 @@
PORT=2088

362
api.py
View File

@@ -5,12 +5,19 @@ from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from pathlib import Path from pathlib import Path
import re import re
from dotenv import load_dotenv
load_dotenv()
from src import ( from src import (
GameCore, get_registry, reload_games, GameCore,
get_active_game_types, get_active_type_ids, get_registry,
get_game_by_id, id_to_type, type_to_id, reload_games,
ModelConfig get_active_game_types,
get_active_type_ids,
get_game_by_id,
id_to_type,
type_to_id,
ModelConfig,
) )
@@ -18,7 +25,7 @@ from src import (
app = FastAPI( app = FastAPI(
title="Game Generator API", title="Game Generator API",
description="API tạo game giáo dục từ văn bản", description="API tạo game giáo dục từ văn bản",
version="2.0.0" version="2.0.0",
) )
app.add_middleware( app.add_middleware(
@@ -31,31 +38,43 @@ app.add_middleware(
# ============== REQUEST/RESPONSE MODELS ============== # ============== REQUEST/RESPONSE MODELS ==============
class LLMConfigRequest(BaseModel): class LLMConfigRequest(BaseModel):
provider: str = Field(default="gemini", description="ollama, gemini, openai") provider: str = Field(default="gemini", description="ollama, gemini, openai")
model_name: str = Field(default="gemini-2.0-flash-lite") model_name: str = Field(default="gemini-2.0-flash-lite")
api_key: Optional[str] = Field(default=None, description="API key (None = lấy từ env)") api_key: Optional[str] = Field(
default=None, description="API key (None = lấy từ env)"
)
temperature: float = Field(default=0.1) temperature: float = Field(default=0.1)
base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama") base_url: Optional[str] = Field(default=None, description="Base URL cho Ollama")
class GenerateRequest(BaseModel): class GenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)") enabled_game_ids: Optional[List[int]] = Field(
default=None,
description="List of type_ids (1=quiz, 2=sequence_sentence, 3=sequence_word)",
)
run_analyzer: bool = Field(default=True) run_analyzer: bool = Field(default=True)
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
max_items: Optional[int] = Field(default=3) max_items: Optional[int] = Field(default=100)
min_score: int = Field(default=50, description="Minimum score (0-100) for analyzer to include a game") min_score: int = Field(
default=50, description="Minimum score (0-100) for analyzer to include a game"
)
debug: bool = Field(default=False, description="Print prompts to server log") debug: bool = Field(default=False, description="Print prompts to server log")
# LLM config (optional - override global) # LLM config (optional - override global)
llm_config: Optional[LLMConfigRequest] = Field(default=None, description="Override LLM config") llm_config: Optional[LLMConfigRequest] = Field(
default=None, description="Override LLM config"
)
class TokenUsageResponse(BaseModel): class TokenUsageResponse(BaseModel):
prompt_tokens: int = 0 prompt_tokens: int = 0
completion_tokens: int = 0 completion_tokens: int = 0
total_tokens: int = 0 total_tokens: int = 0
input_chars: int = 0 # Character count sent to LLM
output_chars: int = 0 # Character count received from LLM
class GameScoreInfo(BaseModel): class GameScoreInfo(BaseModel):
@@ -66,12 +85,14 @@ class GameScoreInfo(BaseModel):
class GameResultData(BaseModel): class GameResultData(BaseModel):
"""Structure thống nhất cho mỗi game result""" """Structure thống nhất cho mỗi game result"""
items: List[Dict[str, Any]] = [] items: List[Dict[str, Any]] = []
metadata: Optional[Dict[str, Any]] = None metadata: Optional[Dict[str, Any]] = None
class CommonMetadataResponse(BaseModel): class CommonMetadataResponse(BaseModel):
"""Metadata chung cho toàn bộ kết quả generate""" """Metadata chung cho toàn bộ kết quả generate"""
title: str = "" title: str = ""
description: str = "" description: str = ""
grade: int = 0 grade: int = 0
@@ -92,7 +113,7 @@ class GenerateResponse(BaseModel):
class GameInfo(BaseModel): class GameInfo(BaseModel):
type_id: int type_id: int
game_type: str # Keep for reference game_type: str
display_name: str display_name: str
description: str description: str
active: bool active: bool
@@ -127,7 +148,7 @@ _current_config: Optional[ModelConfig] = None
def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore: def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
"""Get or create GameCore with optional config override""" """Get or create GameCore with optional config override"""
global _core, _current_config global _core, _current_config
if config_override: if config_override:
# Create new core with override config # Create new core with override config
config = ModelConfig( config = ModelConfig(
@@ -135,69 +156,77 @@ def get_core(config_override: Optional[LLMConfigRequest] = None) -> GameCore:
model_name=config_override.model_name, model_name=config_override.model_name,
api_key=config_override.api_key, api_key=config_override.api_key,
temperature=config_override.temperature, temperature=config_override.temperature,
base_url=config_override.base_url base_url=config_override.base_url,
) )
return GameCore(llm_config=config) return GameCore(llm_config=config)
if _core is None: if _core is None:
# Default: tự detect từ env # Default: tự detect từ env
_core = GameCore() _core = GameCore()
_current_config = _core.llm_config _current_config = _core.llm_config
return _core return _core
# ============== ENDPOINTS ============== # ============== ENDPOINTS ==============
@app.post("/generate", response_model=GenerateResponse) @app.post("/generate", response_model=GenerateResponse)
async def generate_games(request: GenerateRequest): async def generate_games(request: GenerateRequest):
"""Generate games from text with scoring""" """Generate games from text with scoring"""
try: try:
core = get_core(request.llm_config) core = get_core(request.llm_config)
# Convert type_ids to game_types # Convert type_ids to game_types
if request.enabled_game_ids: if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else: else:
games = get_active_game_types() games = get_active_game_types()
result = core.run_multi( result = await core.run_multi_async(
text=request.text, text=request.text,
enabled_games=games, enabled_games=games,
max_items=request.max_items or 3, max_items=request.max_items or 100,
min_score=request.min_score,
validate=request.run_validator, validate=request.run_validator,
debug=request.debug debug=request.debug,
) )
# Convert game_types to type_ids in response # Convert game_types to type_ids in response
game_ids = [type_to_id(g) for g in result.get("games", [])] game_ids = [type_to_id(g) for g in result.get("games", [])]
# Convert game_scores # Convert game_scores
game_scores = [] game_scores = []
for s in result.get("game_scores", []): for s in result.get("game_scores", []):
game_scores.append(GameScoreInfo( game_scores.append(
type_id=type_to_id(s.get("type", "")), GameScoreInfo(
score=s.get("score", 0), type_id=type_to_id(s.get("type", "")),
reason=s.get("reason", "") score=s.get("score", 0),
)) reason=s.get("reason", ""),
)
)
# Convert results keys to type_ids # Convert results keys to type_ids
results_by_id = {} results_by_id = {}
for game_type, items in result.get("results", {}).items(): for game_type, items in result.get("results", {}).items():
tid = type_to_id(game_type) tid = type_to_id(game_type)
if tid > 0: if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = items results_by_id[tid] = items
# Get common metadata from analyzer # Get common metadata from analyzer
core_meta = result.get("metadata", {}) core_meta = result.get("metadata", {})
common_metadata = CommonMetadataResponse( common_metadata = (
title=core_meta.get("title", ""), CommonMetadataResponse(
description=core_meta.get("description", ""), title=core_meta.get("title", ""),
grade=core_meta.get("grade", 0), description=core_meta.get("description", ""),
difficulty=core_meta.get("difficulty", 0) grade=core_meta.get("grade", 0),
) if core_meta else None difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse( return GenerateResponse(
success=result.get("success", False), success=result.get("success", False),
games=game_ids, games=game_ids,
@@ -206,25 +235,120 @@ async def generate_games(request: GenerateRequest):
results=results_by_id, results=results_by_id,
llm=result.get("llm"), llm=result.get("llm"),
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return GenerateResponse( return GenerateResponse(
success=False, success=False, games=[], game_scores=[], results={}, errors=[str(e)]
games=[], )
game_scores=[],
results={},
errors=[str(e)] # ============== FAST GENERATE (1 API call - OPTIMIZED) ==============
class FastGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(
default=None, description="Limit type_ids"
)
max_items: int = Field(default=100, description="Max items per game")
min_score: int = Field(default=50, description="Min score 0-100 to include game")
run_validator: bool = Field(default=True)
debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None)
@app.post("/generate/fast", response_model=GenerateResponse)
async def generate_fast(request: FastGenerateRequest):
"""
🚀 OPTIMIZED: 1 API call để analyze + generate TẤT CẢ games phù hợp.
So với /generate (2+ calls):
- Chỉ 1 API call
- Tiết kiệm quota/tokens
- Nhanh hơn
So với /generate/single:
- Trả về NHIỀU games (không chỉ 1)
"""
try:
core = get_core(request.llm_config)
# Convert type_ids to game_types
if request.enabled_game_ids:
games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else:
games = get_active_game_types()
result = await core.run_fast_async(
text=request.text,
enabled_games=games,
max_items=request.max_items,
min_score=request.min_score,
validate=request.run_validator,
debug=request.debug,
)
# Convert to response format (same as /generate)
game_ids = [type_to_id(g) for g in result.get("games", [])]
game_scores = [
GameScoreInfo(
type_id=type_to_id(s.get("type", "")),
score=s.get("score", 0),
reason=s.get("reason", ""),
)
for s in result.get("game_scores", [])
]
results_by_id = {}
for game_type, data in result.get("results", {}).items():
tid = type_to_id(game_type)
if tid >= 0: # 0=quiz, 1=sequence are valid
results_by_id[tid] = data
core_meta = result.get("metadata", {})
common_metadata = (
CommonMetadataResponse(
title=core_meta.get("title", ""),
description=core_meta.get("description", ""),
grade=core_meta.get("grade", 0),
difficulty=core_meta.get("difficulty", 0),
)
if core_meta
else None
)
return GenerateResponse(
success=result.get("success", False),
games=game_ids,
game_scores=game_scores,
metadata=common_metadata,
results=results_by_id,
api_calls=1, # Always 1 for fast
llm=result.get("llm"),
token_usage=result.get("token_usage"),
errors=result.get("errors", []),
)
except Exception as e:
return GenerateResponse(
success=False, games=[], game_scores=[], results={}, errors=[str(e)]
) )
# ============== SINGLE BEST (1 PROMPT) ============== # ============== SINGLE BEST (1 PROMPT) ==============
class SingleGenerateRequest(BaseModel): class SingleGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
enabled_game_ids: Optional[List[int]] = Field(default=None, description="Limit type_ids to choose from") enabled_game_ids: Optional[List[int]] = Field(
max_items: int = Field(default=3, description="Max items to generate") default=None, description="Limit type_ids to choose from"
)
max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
debug: bool = Field(default=False) debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None) llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -244,32 +368,34 @@ class SingleGenerateResponse(BaseModel):
async def generate_single_game(request: SingleGenerateRequest): async def generate_single_game(request: SingleGenerateRequest):
""" """
Generate 1 game phù hợp nhất trong 1 prompt duy nhất. Generate 1 game phù hợp nhất trong 1 prompt duy nhất.
- Analyze text để chọn game type tốt nhất - Analyze text để chọn game type tốt nhất
- Generate items cho game đó - Generate items cho game đó
- Tất cả trong 1 API call - Tất cả trong 1 API call
""" """
try: try:
core = get_core(request.llm_config) core = get_core(request.llm_config)
# Convert type_ids to game_types # Convert type_ids to game_types
if request.enabled_game_ids: if request.enabled_game_ids:
games = [id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)] games = [
id_to_type(tid) for tid in request.enabled_game_ids if id_to_type(tid)
]
else: else:
games = None games = None
result = core.run_single( result = core.run_single(
text=request.text, text=request.text,
enabled_games=games, enabled_games=games,
max_items=request.max_items, max_items=request.max_items,
debug=request.debug, debug=request.debug,
validate=request.run_validator validate=request.run_validator,
) )
# Convert game_type to type_id # Convert game_type to type_id
game_type = result.get("game_type") game_type = result.get("game_type")
tid = type_to_id(game_type) if game_type else None tid = type_to_id(game_type) if game_type else None
return SingleGenerateResponse( return SingleGenerateResponse(
success=result.get("success", False), success=result.get("success", False),
type_id=tid, type_id=tid,
@@ -277,21 +403,19 @@ async def generate_single_game(request: SingleGenerateRequest):
items=result.get("items", []), items=result.get("items", []),
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
llm=result.get("llm"), llm=result.get("llm"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return SingleGenerateResponse( return SingleGenerateResponse(success=False, errors=[str(e)])
success=False,
errors=[str(e)]
)
# ============== DIRECT GENERATE (1 game cụ thể, không analyze) ============== # ============== DIRECT GENERATE (1 game cụ thể, không analyze) ==============
class DirectGenerateRequest(BaseModel): class DirectGenerateRequest(BaseModel):
text: str = Field(description="Input text", min_length=10) text: str = Field(description="Input text", min_length=10)
max_items: int = Field(default=3, description="Max items to generate") max_items: int = Field(default=100, description="Max items to generate")
run_validator: bool = Field(default=True) run_validator: bool = Field(default=True)
debug: bool = Field(default=False) debug: bool = Field(default=False)
llm_config: Optional[LLMConfigRequest] = Field(default=None) llm_config: Optional[LLMConfigRequest] = Field(default=None)
@@ -299,6 +423,7 @@ class DirectGenerateRequest(BaseModel):
class DirectGenerateResponse(BaseModel): class DirectGenerateResponse(BaseModel):
"""Response thống nhất, giống GenerateResponse nhưng cho 1 game""" """Response thống nhất, giống GenerateResponse nhưng cho 1 game"""
success: bool success: bool
games: List[int] = [] # Single type_id in list games: List[int] = [] # Single type_id in list
results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse results: Dict[int, GameResultData] = {} # Same structure as GenerateResponse
@@ -322,28 +447,28 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
return DirectGenerateResponse( return DirectGenerateResponse(
success=False, success=False,
games=[type_id], games=[type_id],
errors=[f"Game with type_id={type_id} not found"] errors=[f"Game with type_id={type_id} not found"],
) )
core = get_core(request.llm_config) core = get_core(request.llm_config)
result = core.generate( result = core.generate(
game_type=game_type, game_type=game_type,
text=request.text, text=request.text,
max_items=request.max_items, max_items=request.max_items,
validate=request.run_validator, validate=request.run_validator,
debug=request.debug debug=request.debug,
) )
format_error = result.get("format_error") format_error = result.get("format_error")
data = result.get("data") or {} data = result.get("data") or {}
# Build results với structure thống nhất # Build results với structure thống nhất
game_result = GameResultData( game_result = GameResultData(
items=data.get("items", []) if isinstance(data, dict) else [], items=data.get("items", []) if isinstance(data, dict) else [],
metadata=data.get("metadata") if isinstance(data, dict) else None metadata=data.get("metadata") if isinstance(data, dict) else None,
) )
return DirectGenerateResponse( return DirectGenerateResponse(
success=result.get("success", False), success=result.get("success", False),
games=[type_id], games=[type_id],
@@ -352,15 +477,11 @@ async def generate_direct(type_id: int, request: DirectGenerateRequest):
format_error=format_error, format_error=format_error,
token_usage=result.get("token_usage"), token_usage=result.get("token_usage"),
llm=result.get("llm"), llm=result.get("llm"),
errors=result.get("errors", []) errors=result.get("errors", []),
) )
except Exception as e: except Exception as e:
return DirectGenerateResponse( return DirectGenerateResponse(success=False, games=[type_id], errors=[str(e)])
success=False,
games=[type_id],
errors=[str(e)]
)
@app.get("/games", response_model=GamesListResponse) @app.get("/games", response_model=GamesListResponse)
@@ -368,29 +489,29 @@ async def list_games():
"""Lấy danh sách games""" """Lấy danh sách games"""
registry = get_registry() registry = get_registry()
all_games = registry.get_all_games_including_inactive() all_games = registry.get_all_games_including_inactive()
games_list = [] games_list = []
active_count = 0 active_count = 0
for game_type, game in all_games.items(): for game_type, game in all_games.items():
games_list.append(GameInfo( games_list.append(
type_id=game.type_id, GameInfo(
game_type=game.game_type, type_id=game.type_id,
display_name=game.display_name, game_type=game.game_type,
description=game.description, display_name=game.display_name,
active=game.active, description=game.description,
max_items=game.max_items, active=game.active,
)) max_items=game.max_items,
)
)
if game.active: if game.active:
active_count += 1 active_count += 1
# Sort by type_id # Sort by type_id
games_list.sort(key=lambda g: g.type_id) games_list.sort(key=lambda g: g.type_id)
return GamesListResponse( return GamesListResponse(
total=len(games_list), total=len(games_list), active_count=active_count, games=games_list
active_count=active_count,
games=games_list
) )
@@ -409,28 +530,28 @@ async def deactivate_game(game_type: str):
def _set_game_active(game_type: str, active: bool) -> ActionResponse: def _set_game_active(game_type: str, active: bool) -> ActionResponse:
games_dir = Path(__file__).parent / "src" / "games" games_dir = Path(__file__).parent / "src" / "games"
game_file = games_dir / f"{game_type}.py" game_file = games_dir / f"{game_type}.py"
if not game_file.exists(): if not game_file.exists():
raise HTTPException(404, f"Game '{game_type}' not found") raise HTTPException(404, f"Game '{game_type}' not found")
content = game_file.read_text(encoding="utf-8") content = game_file.read_text(encoding="utf-8")
pattern = r'("active"\s*:\s*)(True|False)' pattern = r'("active"\s*:\s*)(True|False)'
new_value = "True" if active else "False" new_value = "True" if active else "False"
if not re.search(pattern, content): if not re.search(pattern, content):
raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py") raise HTTPException(400, f"Cannot find 'active' field in {game_type}.py")
new_content = re.sub(pattern, f'\\1{new_value}', content) new_content = re.sub(pattern, f"\\1{new_value}", content)
game_file.write_text(new_content, encoding="utf-8") game_file.write_text(new_content, encoding="utf-8")
reload_games() reload_games()
action = "activated" if active else "deactivated" action = "activated" if active else "deactivated"
return ActionResponse( return ActionResponse(
success=True, success=True,
message=f"Game '{game_type}' has been {action}", message=f"Game '{game_type}' has been {action}",
game_type=game_type, game_type=game_type,
active=active active=active,
) )
@@ -438,16 +559,16 @@ def _set_game_active(game_type: str, active: bool) -> ActionResponse:
async def get_llm_config(): async def get_llm_config():
"""Xem LLM config hiện tại""" """Xem LLM config hiện tại"""
global _current_config global _current_config
if _current_config is None: if _current_config is None:
core = get_core() core = get_core()
_current_config = core.llm_config _current_config = core.llm_config
return LLMConfigResponse( return LLMConfigResponse(
provider=_current_config.provider, provider=_current_config.provider,
model_name=_current_config.model_name, model_name=_current_config.model_name,
temperature=_current_config.temperature, temperature=_current_config.temperature,
base_url=_current_config.base_url base_url=_current_config.base_url,
) )
@@ -455,50 +576,43 @@ async def get_llm_config():
async def set_llm_config(config: LLMConfigRequest): async def set_llm_config(config: LLMConfigRequest):
"""Đổi LLM config global""" """Đổi LLM config global"""
global _core, _current_config global _core, _current_config
new_config = ModelConfig( new_config = ModelConfig(
provider=config.provider, provider=config.provider,
model_name=config.model_name, model_name=config.model_name,
api_key=config.api_key, api_key=config.api_key,
temperature=config.temperature, temperature=config.temperature,
base_url=config.base_url base_url=config.base_url,
) )
try: try:
_core = GameCore(llm_config=new_config) _core = GameCore(llm_config=new_config)
_current_config = new_config _current_config = new_config
return ActionResponse( return ActionResponse(
success=True, success=True,
message=f"LLM changed to {config.provider}/{config.model_name}" message=f"LLM changed to {config.provider}/{config.model_name}",
) )
except Exception as e: except Exception as e:
return ActionResponse( return ActionResponse(success=False, message=f"Failed to change LLM: {str(e)}")
success=False,
message=f"Failed to change LLM: {str(e)}"
)
@app.post("/reload", response_model=ActionResponse) @app.post("/reload", response_model=ActionResponse)
async def reload_all_games(): async def reload_all_games():
"""Reload games""" """Reload games"""
global _core global _core
reload_games() reload_games()
_core = None _core = None
return ActionResponse( return ActionResponse(
success=True, success=True, message=f"Reloaded. Active games: {get_active_game_types()}"
message=f"Reloaded. Active games: {get_active_game_types()}"
) )
@app.get("/health") @app.get("/health")
async def health_check(): async def health_check():
return { return {"status": "healthy", "active_games": get_active_game_types()}
"status": "healthy",
"active_games": get_active_game_types()
}
# ============== STARTUP ============== # ============== STARTUP ==============
@@ -510,4 +624,8 @@ async def startup():
if __name__ == "__main__": if __name__ == "__main__":
import uvicorn import uvicorn
uvicorn.run(app, host="0.0.0.0", port=2088)
port = os.getenv("PORT")
if not port:
raise ValueError("Missing required environment variable: PORT")
uvicorn.run(app, host="0.0.0.0", port=int(port))

View File

@@ -1,23 +1,57 @@
""" """
games/match.py - Match Game - Match sentences with images games/match.py - Match Game - Match words/phrases with images
type_id = 3
Input: Danh sách từ hoặc cụm từ
Output: Mỗi item gồm từ/cụm từ và mô tả hình ảnh tương ứng
""" """
from typing import List
from typing import List, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser from langchain_core.output_parsers import PydanticOutputParser
# ============== SCHEMA ============== # ============== SCHEMA ==============
class MatchItem(BaseModel): class MatchItem(BaseModel):
word: str = Field(description="The sentence to be matched (EXACT copy from source)") """Schema cho 1 item của Match game"""
match_with: str = Field(description="Short keyword for reference")
original_quote: str = Field(description="EXACT quote from source text") word: str = Field(
image_description: str = Field(default="", description="Detailed visual description for image generation/search") description="The word or phrase to be matched (EXACT copy from source, cleaned of numbering)"
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") )
original_quote: str = Field(
description="EXACT quote from source text before any cleaning"
)
image_description: str = Field(
description="Detailed visual description for image generation in ENGLISH. Must be specific and visual."
)
image_keywords: List[str] = Field(
default=[], description="2-3 English keywords for image search"
)
image_is_complex: bool = Field(
default=False,
description="True if image needs precise quantities, humans, or multiple detailed objects",
)
class MatchMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary of the content")
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced)"
)
type: Literal["match"] = Field(default="match", description="Game type")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
class MatchOutput(BaseModel): class MatchOutput(BaseModel):
"""Output wrapper for match items""" """Output wrapper for match items"""
items: List[MatchItem] = Field(description="List of match items generated from source text")
items: List[MatchItem] = Field(
description="List of match items generated from source text"
)
metadata: MatchMetadata = Field(description="Metadata about the content")
# Output parser # Output parser
@@ -26,56 +60,110 @@ output_parser = PydanticOutputParser(pydantic_object=MatchOutput)
# ============== CONFIG ============== # ============== CONFIG ==============
GAME_CONFIG = { GAME_CONFIG = {
# === REQUIRED ===
"game_type": "match", "game_type": "match",
"type_id": 3,
"display_name": "Match with Image", "display_name": "Match with Image",
"description": "Match sentences with images", "description": "Match words or phrases with their corresponding images",
"active": True,
"min_items": 2,
"max_items": 10,
"schema": MatchItem, "schema": MatchItem,
"output_schema": MatchOutput, "output_schema": MatchOutput,
"output_parser": output_parser, "output_parser": output_parser,
# === OPTIONAL ===
"system_prompt": """Extract sentences and create image descriptions for matching game. "active": True,
The game will show images and players must match them with the correct sentences. "max_items": 10,
# Input validation rules
YOUR TASK: "input_format_rules": [
1. Extract meaningful sentences from the source text "Text MUST be a list of words or phrases separated by commas, semicolons, or newlines",
2. Create a DETAILED image_description that clearly represents the sentence "NOT suitable for long sentences or paragraphs",
3. The image should be distinct enough to match with its sentence "Each item should be a concrete noun/concept that can be visualized",
],
CRITICAL RULES: # Analyzer rules - khi nào nên chọn game này
1. KEEP THE ORIGINAL LANGUAGE - Do NOT translate the source text "analyzer_rules": [
2. original_quote MUST be an EXACT copy from source text "Text is a list of words or short phrases",
3. image_description must be DETAILED and SPECIFIC to the sentence content "Words represent concrete objects/concepts that can be visualized",
4. Each image should be visually distinguishable from others""", "Examples: 'apple, banana, orange' or 'cat; dog; bird'",
"NOT suitable for abstract concepts or long sentences",
],
# Generation rules - cách tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE for 'word' field - Do NOT translate",
"original_quote = EXACT copy from source before cleaning",
"Clean numbering like '1.', 'a)', '' from word field",
"Each word/phrase should represent a visualizable concept",
# Image rules
"image_description: MUST be DETAILED visual description in ENGLISH",
"image_description: Describe colors, shapes, actions, context",
"image_keywords: 2-3 English keywords for search",
"image_is_complex: TRUE for humans, precise counts, complex scenes",
"NEVER leave image_description empty!",
# Quality rules
"Each image should be visually DISTINCT from others",
"Avoid generic descriptions - be specific",
],
"examples": [], # Defined below
} }
# ============== EXAMPLES ============== # ============== EXAMPLES ==============
EXAMPLES = [ EXAMPLES = [
{ {
"input": "The Sun is a star. The Moon orbits Earth.", "input": "apple; banana;",
"output": { "output": {
"items": [ "items": [
{ {
"word": "The Sun is a star.", "word": "apple",
"match_with": "sun", "original_quote": "apple",
"original_quote": "The Sun is a star.", "image_description": "A shiny red apple with a green leaf on top",
"image_description": "A bright glowing yellow sun with solar flares", "image_keywords": ["apple", "fruit", "red"],
"image_is_complex": False "image_is_complex": False,
}, },
{ {
"word": "The Moon orbits Earth.", "word": "banana",
"match_with": "moon", "original_quote": "banana",
"original_quote": "The Moon orbits Earth.", "image_description": "A curved yellow banana",
"image_description": "A grey moon circling around the blue Earth planet", "image_keywords": ["banana", "fruit", "yellow"],
"image_is_complex": False "image_is_complex": False,
} },
] ],
"metadata": {
"title": "Fruits",
"description": "Common fruits vocabulary",
"grade": 1,
"type": "match",
"difficulty": 1,
},
}, },
"why_suitable": "Has distinct concepts that can be visualized and matched" "why_suitable": "Simple words representing concrete objects that can be visualized",
} },
{
"input": "1. elephant\n2. giraffe\n",
"output": {
"items": [
{
"word": "elephant",
"original_quote": "1. elephant",
"image_description": "A large grey elephant with big ears and long trunk",
"image_keywords": ["elephant", "animal", "africa"],
"image_is_complex": False,
},
{
"word": "giraffe",
"original_quote": "2. giraffe",
"image_description": "A tall giraffe with brown spots and long neck",
"image_keywords": ["giraffe", "tall", "spots"],
"image_is_complex": False,
},
],
"metadata": {
"title": "African Animals",
"description": "Safari animals vocabulary",
"grade": 2,
"type": "match",
"difficulty": 1,
},
},
"why_suitable": "Numbered list of animals - numbering will be cleaned",
},
] ]
GAME_CONFIG["examples"] = EXAMPLES

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@
game_registry.py - Tự động load games từ thư mục games/ game_registry.py - Tự động load games từ thư mục games/
Hệ thống sẽ: Hệ thống sẽ:
1. Scan thư mục games/ 1. Scan thư mục games/
2. Load mọi file .py (trừ _template.py và __init__.py) 2. Load mọi file .py (trừ _template.py và __init__.py)
3. Chỉ load games có active: True 3. Chỉ load games có active: True
4. Đăng ký tự động vào registry 4. Đăng ký tự động vào registry
@@ -10,6 +10,7 @@ Hệ thống sẽ:
THÊM GAME MỚI = TẠO FILE TRONG games/ THÊM GAME MỚI = TẠO FILE TRONG games/
BẬT/TẮT GAME = SỬA active: True/False trong file game BẬT/TẮT GAME = SỬA active: True/False trong file game
""" """
import importlib.util import importlib.util
from pathlib import Path from pathlib import Path
from typing import Dict, List, Any, Optional from typing import Dict, List, Any, Optional
@@ -20,75 +21,78 @@ class GameRegistry:
""" """
Registry tự động load games từ thư mục games/ Registry tự động load games từ thư mục games/
Chỉ load games có active: True Chỉ load games có active: True
Supports lookup by: Supports lookup by:
- game_type (string): "quiz", "sequence" - game_type (string): "quiz", "sequence"
- type_id (int): 1, 2 - type_id (int): 1, 2
""" """
_instance: Optional["GameRegistry"] = None _instance: Optional["GameRegistry"] = None
_all_games: Dict[str, GameType] = {} # Keyed by game_type _all_games: Dict[str, GameType] = {} # Keyed by game_type
_id_map: Dict[int, str] = {} # type_id -> game_type _id_map: Dict[int, str] = {} # type_id -> game_type
_loaded: bool = False _loaded: bool = False
def __new__(cls): def __new__(cls):
if cls._instance is None: if cls._instance is None:
cls._instance = super().__new__(cls) cls._instance = super().__new__(cls)
cls._instance._all_games = {} cls._instance._all_games = {}
cls._instance._id_map = {} cls._instance._id_map = {}
return cls._instance return cls._instance
def __init__(self): def __init__(self):
if not self._loaded: if not self._loaded:
self._load_all_games() self._load_all_games()
self._loaded = True self._loaded = True
def _load_all_games(self): def _load_all_games(self):
"""Scan và load tất cả game definitions từ games/""" """Scan và load tất cả game definitions từ games/"""
games_dir = Path(__file__).parent / "games" games_dir = Path(__file__).parent / "games"
if not games_dir.exists(): if not games_dir.exists():
print(f"⚠️ Games directory not found: {games_dir}") print(f"⚠️ Games directory not found: {games_dir}")
return return
for file_path in games_dir.glob("*.py"): for file_path in games_dir.glob("*.py"):
# Skip __init__.py và _template.py và base.py # Skip __init__.py và _template.py và base.py
if file_path.name.startswith("_") or file_path.name == "base.py": if file_path.name.startswith("_") or file_path.name == "base.py":
continue continue
try: try:
game_def = self._load_game_from_file(file_path) game_def = self._load_game_from_file(file_path)
if game_def: if game_def:
self._all_games[game_def.game_type] = game_def self._all_games[game_def.game_type] = game_def
if game_def.type_id > 0: if game_def.type_id >= 0: # 0=quiz, 1=sequence are valid
self._id_map[game_def.type_id] = game_def.game_type self._id_map[game_def.type_id] = game_def.game_type
status = "" if game_def.active else "⏸️" status = "" if game_def.active else "⏸️"
print(f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})") print(
f"{status} Loaded: {game_def.game_type} (id={game_def.type_id}, active={game_def.active})"
)
except Exception as e: except Exception as e:
print(f"❌ Error loading {file_path.name}: {e}") print(f"❌ Error loading {file_path.name}: {e}")
def _load_game_from_file(self, file_path: Path) -> Optional[GameType]: def _load_game_from_file(self, file_path: Path) -> Optional[GameType]:
"""Load 1 game definition từ file""" """Load 1 game definition từ file"""
module_name = f"games.{file_path.stem}" module_name = f"games.{file_path.stem}"
spec = importlib.util.spec_from_file_location(module_name, file_path) spec = importlib.util.spec_from_file_location(module_name, file_path)
if spec is None or spec.loader is None: if spec is None or spec.loader is None:
return None return None
module = importlib.util.module_from_spec(spec) module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module) spec.loader.exec_module(module)
config = getattr(module, "GAME_CONFIG", None) config = getattr(module, "GAME_CONFIG", None)
examples = getattr(module, "EXAMPLES", []) examples = getattr(module, "EXAMPLES", [])
if config is None: if config is None:
return None return None
# Inject examples if not in config # Inject examples if not in config
if examples and "examples" not in config: if examples and "examples" not in config:
config["examples"] = examples config["examples"] = examples
return create_game_type(config) return create_game_type(config)
def reload(self): def reload(self):
"""Reload tất cả games""" """Reload tất cả games"""
self._all_games.clear() self._all_games.clear()
@@ -96,55 +100,57 @@ class GameRegistry:
self._loaded = False self._loaded = False
self._load_all_games() self._load_all_games()
self._loaded = True self._loaded = True
# ============== PUBLIC API ============== # ============== PUBLIC API ==============
def get_game(self, game_type: str) -> Optional[GameType]: def get_game(self, game_type: str) -> Optional[GameType]:
"""Lấy game by game_type (chỉ active)""" """Lấy game by game_type (chỉ active)"""
game = self._all_games.get(game_type) game = self._all_games.get(game_type)
return game if game and game.active else None return game if game and game.active else None
def get_game_by_id(self, type_id: int) -> Optional[GameType]: def get_game_by_id(self, type_id: int) -> Optional[GameType]:
"""Lấy game by type_id (chỉ active)""" """Lấy game by type_id (chỉ active)"""
game_type = self._id_map.get(type_id) game_type = self._id_map.get(type_id)
if game_type: if game_type:
return self.get_game(game_type) return self.get_game(game_type)
return None return None
def get_game_type_by_id(self, type_id: int) -> Optional[str]: def get_game_type_by_id(self, type_id: int) -> Optional[str]:
"""Convert type_id -> game_type""" """Convert type_id -> game_type"""
return self._id_map.get(type_id) return self._id_map.get(type_id)
def get_id_by_game_type(self, game_type: str) -> int: def get_id_by_game_type(self, game_type: str) -> int:
"""Convert game_type -> type_id""" """Convert game_type -> type_id. Returns -1 if not found."""
game = self._all_games.get(game_type) game = self._all_games.get(game_type)
return game.type_id if game else 0 return game.type_id if game else -1 # -1 = not found
def get_all_games(self) -> Dict[str, GameType]: def get_all_games(self) -> Dict[str, GameType]:
"""Lấy tất cả games ACTIVE""" """Lấy tất cả games ACTIVE"""
return {k: v for k, v in self._all_games.items() if v.active} return {k: v for k, v in self._all_games.items() if v.active}
def get_all_games_including_inactive(self) -> Dict[str, GameType]: def get_all_games_including_inactive(self) -> Dict[str, GameType]:
"""Lấy tất cả games (kể cả inactive)""" """Lấy tất cả games (kể cả inactive)"""
return self._all_games.copy() return self._all_games.copy()
def get_game_types(self) -> List[str]: def get_game_types(self) -> List[str]:
"""Lấy danh sách game types ACTIVE""" """Lấy danh sách game types ACTIVE"""
return [k for k, v in self._all_games.items() if v.active] return [k for k, v in self._all_games.items() if v.active]
def get_type_ids(self) -> List[int]: def get_type_ids(self) -> List[int]:
"""Lấy danh sách type_ids ACTIVE""" """Lấy danh sách type_ids ACTIVE"""
return [v.type_id for v in self._all_games.values() if v.active and v.type_id > 0] return [
v.type_id for v in self._all_games.values() if v.active and v.type_id > 0
]
def get_analyzer_context(self) -> str: def get_analyzer_context(self) -> str:
"""Tạo context cho Analyzer (chỉ từ active games)""" """Tạo context cho Analyzer (chỉ từ active games)"""
context_parts = [] context_parts = []
for game_type, game in self._all_games.items(): for game_type, game in self._all_games.items():
if not game.active: if not game.active:
continue continue
hints = game.analyzer_rules # New field name hints = game.analyzer_rules # New field name
if hints: if hints:
hints_text = "\n - ".join(hints) hints_text = "\n - ".join(hints)
context_parts.append( context_parts.append(
@@ -152,9 +158,9 @@ class GameRegistry:
f" Description: {game.description}\n" f" Description: {game.description}\n"
f" Suitable when:\n - {hints_text}" f" Suitable when:\n - {hints_text}"
) )
return "\n\n".join(context_parts) return "\n\n".join(context_parts)
def is_active(self, game_type: str) -> bool: def is_active(self, game_type: str) -> bool:
"""Kiểm tra game có active không""" """Kiểm tra game có active không"""
game = self._all_games.get(game_type) game = self._all_games.get(game_type)

View File

@@ -4,88 +4,180 @@ games/_template.py - TEMPLATE CHO GAME MỚI
THÊM GAME MỚI CHỈ CẦN: THÊM GAME MỚI CHỈ CẦN:
1. Copy file này 1. Copy file này
2. Rename thành <game_type>.py (ví dụ: matching.py) 2. Rename thành <game_type>.py (ví dụ: matching.py)
3. Sửa nội dung bên trong 3. Sửa nội dung bên trong theo hướng dẫn
4. DONE! Hệ thống tự động nhận diện. 4. DONE! Hệ thống tự động nhận diện.
Không cần sửa bất kỳ file nào khác! Không cần sửa bất kỳ file nào khác!
""" """
from typing import List, Optional from typing import List, Literal, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
# ============== 1. SCHEMA ============== # ============== 1. ITEM SCHEMA ==============
# Định nghĩa structure của 1 item trong game # Định nghĩa structure của 1 item trong game
# BẮT BUỘC phải có: original_quote và explanation # BẮT BUỘC phải có: original_quote
class YourGameItem(BaseModel): class YourGameItem(BaseModel):
"""Schema cho 1 item của game""" """Schema cho 1 item của game"""
# Các trường BẮT BUỘC (để chống hallucination) # === TRƯỜNG BẮT BUỘC ===
original_quote: str = Field( original_quote: str = Field(
description="Trích dẫn NGUYÊN VĂN từ văn bản gốc" description="EXACT quote from source text - dùng để verify không hallucinate"
) )
explanation: str = Field(description="Giải thích")
# Thêm các trường riêng của game ở đây # === TRƯỜNG RIÊNG CỦA GAME ===
# Thêm các trường cần thiết cho game của bạn
# Ví dụ: # Ví dụ:
# question: str = Field(description="Câu hỏi") question: str = Field(description="The question")
# answer: str = Field(description="Đáp án") answer: str = Field(description="The correct answer")
# === TRƯỜNG HÌNH ẢNH (Khuyến nghị) ===
image_description: str = Field(default="", description="Visual description in English")
image_keywords: List[str] = Field(default=[], description="2-3 English keywords for image search")
image_is_complex: bool = Field(default=False, description="True if needs precise quantities/humans/complex scene")
# ============== 2. CONFIG ============== # ============== 2. METADATA SCHEMA ==============
# Cấu hình cho game # Metadata mô tả nội dung được generate
class YourGameMetadata(BaseModel):
"""Metadata đánh giá nội dung"""
title: str = Field(description="Title from source or short descriptive title")
description: str = Field(description="One sentence summary")
grade: int = Field(description="Grade level 1-5 (1=easy, 5=advanced)")
type: Literal["your_game"] = Field(default="your_game", description="Game type - MUST match game_type below")
difficulty: int = Field(description="Difficulty 1-5 for that grade")
# ============== 3. OUTPUT SCHEMA ==============
# Wrapper chứa danh sách items và metadata
class YourGameOutput(BaseModel):
"""Output wrapper - BẮT BUỘC phải có"""
items: List[YourGameItem] = Field(description="List of game items")
metadata: YourGameMetadata = Field(description="Metadata about the content")
# Output parser - tự động từ output schema
output_parser = PydanticOutputParser(pydantic_object=YourGameOutput)
# ============== 4. CONFIG ==============
# Cấu hình cho game - ĐÂY LÀ PHẦN QUAN TRỌNG NHẤT
GAME_CONFIG = { GAME_CONFIG = {
# Key duy nhất cho game (dùng trong API) # === REQUIRED FIELDS ===
"game_type": "your_game",
# Key duy nhất cho game (dùng trong API) - PHẢI unique
"game_type": "your_game",
# ID số nguyên unique - PHẢI khác các game khác
# Quiz=1, Sequence=2, ... tiếp tục từ 3
"type_id": 99, # TODO: Đổi thành số unique
# Tên hiển thị # Tên hiển thị
"display_name": "Tên Game", "display_name": "Your Game Name",
# Mô tả ngắn # Mô tả ngắn
"description": "Mô tả game của bạn", "description": "Description of your game",
# Số lượng items # Schema classes - BẮT BUỘC
"max_items": 5,
# Trỏ đến schema class
"schema": YourGameItem, "schema": YourGameItem,
"output_schema": YourGameOutput,
"output_parser": output_parser,
# Prompt cho LLM # === OPTIONAL FIELDS (có default) ===
"system_prompt": """Bạn là chuyên gia tạo [tên game].
# Game có active không
NHIỆM VỤ: [Mô tả nhiệm vụ] "active": True,
QUY TẮC: # Số lượng items tối đa
1. original_quote PHẢI là trích dẫn NGUYÊN VĂN "max_items": 10,
2. [Quy tắc khác]
3. [Quy tắc khác]""", # Rules validate input trước khi generate (Direct Mode)
"input_format_rules": [
"Text should contain ... suitable for this game.",
"Text MUST have ...",
],
# Rules cho Analyzer nhận diện game phù hợp
"analyzer_rules": [
"Text MUST contain ...",
"NOT suitable if text is ...",
],
# Rules cho Generator tạo nội dung
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"original_quote = EXACT quote from source text",
"ALL content must come from source only - do NOT invent",
# Thêm rules riêng cho game của bạn
"Your specific rule 1",
"Your specific rule 2",
# Visual fields
"image_description: MUST be visual description in ENGLISH",
"image_keywords: MUST provide 2-3 English keywords",
"NEVER leave image fields empty!",
],
# Examples - giúp LLM học format
"examples": [] # Sẽ định nghĩa bên dưới
} }
# ============== 3. EXAMPLES ============== # ============== 5. EXAMPLES ==============
# Ví dụ input/output để: # Ví dụ input/output để LLM học pattern
# - Analyzer học khi nào nên suggest game này
# - Generator dùng làm few-shot
EXAMPLES = [ EXAMPLES = [
{ {
# Input text mẫu # Input text mẫu
"input": "Văn bản mẫu ở đây...", "input": "Sample text for your game...",
# Output mong đợi # Output mong đợi - PHẢI match schema
"output": { "output": {
"items": [ "items": [
{ {
"original_quote": "Trích dẫn từ văn bản", "original_quote": "Exact quote from input",
"explanation": "Giải thích", "question": "Sample question?",
# Các trường khác của schema... "answer": "Sample answer",
"image_description": "Visual description",
"image_keywords": ["keyword1", "keyword2"],
"image_is_complex": False
} }
] ],
"metadata": {
"title": "Sample Title",
"description": "Sample description",
"grade": 2,
"type": "your_game",
"difficulty": 2
}
}, },
# Analyzer học từ trường này # Giải thích tại sao phù hợp - Analyzer học từ đây
"why_suitable": "Giải thích tại sao văn bản này phù hợp với game này" "why_suitable": "Explain why this input is suitable for this game"
}, },
# Thêm 1-2 examples nữa... # Thêm 1-2 examples nữa để LLM học tốt hơn...
] ]
# Gán examples vào config
GAME_CONFIG["examples"] = EXAMPLES
# ============== 6. POST PROCESS (Optional) ==============
# Function xử lý output sau khi LLM generate
def post_process_your_game(items: List[dict]) -> List[dict]:
"""Clean up hoặc transform items sau khi generate"""
for item in items:
# Ví dụ: clean up text
if item.get("answer"):
item["answer"] = item["answer"].strip()
return items
# Đăng ký handler (optional)
# GAME_CONFIG["post_process_handler"] = post_process_your_game

View File

@@ -1,139 +1,172 @@
""" """
games/quiz.py - Quiz Game - Multiple choice questions games/quiz.py - Optimized for LLM Performance while keeping System Integrity
""" """
from typing import List, Literal
import re from typing import List, Literal, Optional
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
from langchain_core.output_parsers import PydanticOutputParser from langchain_core.output_parsers import PydanticOutputParser
import re
# ============== SCHEMA ============== # ==========================================
# 1. OPTIMIZED SCHEMA (Thông minh hơn)
# ==========================================
class QuizItem(BaseModel): class QuizItem(BaseModel):
question: str = Field(description="The question based on source content") # LLM chỉ cần tập trung sinh ra raw data, việc clean để code lo
answers: str = Field(description="The correct answer") question: str = Field(description="Question text. Use ____ for blanks.")
options: List[str] = Field(description="List of options including correct answer") # Request field có thể để default, logic xử lý sau
original_quote: str = Field(description="EXACT quote from source text") request: str = Field(
image_description: str = Field(default="", description="Visual description for the question") default="Choose the correct answer", description="Instruction type"
image_keywords: List[str] = Field(default=[], description="Keywords for image search") )
image_is_complex: bool = Field(default=False, description="True if image needs precise quantities, humans, or multiple detailed objects") answer: str = Field(description="Correct answer text")
options: List[str] = Field(description="List of options")
original_quote: str = Field(description="Exact source sentence")
# Gom nhóm image fields để prompt gọn hơn
image_description: str = Field(
default="", description="Visual description (if needed)"
)
image_keywords: List[str] = Field(default=[])
image_is_complex: bool = Field(default=False)
@field_validator("answer", "options", mode="before")
@classmethod
def clean_prefixes(cls, v):
"""Tự động xóa A., B., (1)... ngay khi nhận dữ liệu từ LLM"""
def clean_str(text):
# Regex xóa (A), 1., Q: ở đầu và (1) ở cuối
text = re.sub(
r"^(\([A-Za-z0-9]\)|[A-Za-z0-9]\.|Q\d*:)\s*",
"",
str(text),
flags=re.IGNORECASE,
)
text = re.sub(r"\s*\([A-Za-z0-9]\)$", "", text)
return text.strip()
if isinstance(v, list):
return [clean_str(item) for item in v]
return clean_str(v)
class QuizMetadata(BaseModel): class QuizMetadata(BaseModel):
"""Metadata đánh giá nội dung""" title: str = Field(description="Short content title")
title: str = Field( description: str = Field(description="Summary")
description="Title for this content. Prefer title from source document if available and suitable, otherwise create a short descriptive title." grade: int = Field(description="Level 1-5")
) type: Literal["quiz"] = "quiz"
description: str = Field( difficulty: int = Field(description="Level 1-5")
description="Short description summarizing the content/topic of the quiz."
)
grade: int = Field(
description="Estimated grade level 1-5 (1=easy/young, 5=advanced/older). Judge by vocabulary, concepts, required knowledge."
)
type: Literal["quiz"] = Field(default="quiz", description="Game type (always 'quiz')")
difficulty: int = Field(
description="Difficulty 1-5 for that grade (1=very easy, 5=very hard). Judge by question complexity, number of options, abstract concepts."
)
class QuizOutput(BaseModel): class QuizOutput(BaseModel):
"""Output wrapper for quiz items""" items: List[QuizItem]
items: List[QuizItem] = Field(description="List of quiz items generated from source text") metadata: QuizMetadata
metadata: QuizMetadata = Field(description="Metadata about the quiz content")
# Output parser
output_parser = PydanticOutputParser(pydantic_object=QuizOutput) output_parser = PydanticOutputParser(pydantic_object=QuizOutput)
# ==========================================
# 2. COMPACT CONFIG (Giữ đủ key, giảm nội dung)
# ==========================================
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = { GAME_CONFIG = {
# --- SYSTEM FIELDS (Giữ nguyên không đổi) ---
"game_type": "quiz", "game_type": "quiz",
"display_name": "Quiz", "display_name": "Quiz",
"description": "Multiple choice questions", "description": "Multiple choice questions",
"type_id": 1, "type_id": 0,
"active": True, "active": True,
"max_items": 10, "max_items": 10,
"schema": QuizItem, "schema": QuizItem,
"output_schema": QuizOutput, "output_schema": QuizOutput,
"output_parser": output_parser, "output_parser": output_parser,
# --- USER UI HINTS (Rút gọn văn bản hiển thị) ---
"input_format_rules": [ "input_format_rules": [
"Text should contain facts or questions suitable for a quiz.", "Text must contain specific facts or Q&A content.",
"Prefer extracting existing multiple choice questions if available.", "Suitable for multiple choice extraction.",
"Text MUST contain questions with multiple choice options",
], ],
# --- PRE-CHECK LOGIC (Rút gọn) ---
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [ "analyzer_rules": [
"Text MUST contain questions with multiple choice options", "Contains questions with options OR factual statements.",
"NOT suitable if text is just a list of words with no questions", "Not just a list of unconnected words.",
], ],
# --- LLM INSTRUCTIONS ---
# 2. Rules tạo nội dung (cho Generator) "generation_rules": [
"generation_rules": [ "MODE: STRICT EXTRACTION & LOCALITY PRIORITIZED.",
"KEEP ORIGINAL LANGUAGE - Do NOT translate", "1. MANDATORY OPTIONS & LOCALITY: Only create a quiz item if 2-4 options are EXPLICITLY present and located immediately after/below the question. SKIP if options are shared in a 'Word Box' or 'Word Bank' tại đầu/cuối trang.",
"original_quote = EXACT quote from source text (full question block)", "2. ANSWER PRIORITY: Use the provided key if available. If the marker is empty, solve it yourself using grammar rules. Do not redefine existing keys.",
"ALL content must come from source only - do NOT invent", "3. ZERO FABRICATION: Do NOT invent distractors. Only extract what is explicitly present.",
"REMOVE unnecessary numbering: 'Question 1:', '(1)', '(2)', 'A.', 'B.' from question/options/answers", "4. LOGICAL AMBIGUITY: If a question is grammatically correct with multiple options but lacks context, SKIP IT.",
"STRICTLY CLEAN OUTPUT for 'answers': MUST contain ONLY the text content of the correct option.", "5. SEMANTIC OPTION EXTRACTION: Extract ONLY the meaningful word/phrase. Strip away ALL labels like (1), (A), or OCR noise.",
"FORBIDDEN in 'answers': Prefixes like '(1)', '(2)', 'A.', 'B.', '1.' - REMOVE THEM.", "6. SMART FILL-IN-THE-BLANK: If the question is a 'Fill in the blank' type, you MUST analyze the sentence structure and place the '____' at the grammatically correct position (e.g., 'Blood ____ oozing'). DO NOT blindly put it at the end. If the sentence is already a complete question (not a blank type), do not add '____'.",
"IMPORTANT: The 'answers' field MUST EXACTLY MATCH one of the 'options' values text-wise.", "7. METADATA: Fill metadata accurately based on content. Do not leave empty."
],
# VISUAL FIELD COMPULSORY # --- EXAMPLES (Chỉ giữ 1 cái tốt nhất để làm mẫu format) ---
"image_description: MUST be a visual description relevant to the question in ENGLISH.", "examples": [
"image_keywords: MUST provide 2-3 English keywords for search.", {
"image_is_complex: FALSE for simple/static objects, TRUE for quantities/humans/complex scenes", "input": "The giraffe has a long neck. Options: neck, leg, tail.",
"NEVER leave image fields empty!", "output": {
"items": [
{
"question": "The giraffe has a long ____.",
"request": "Fill in the blank",
"answer": "neck",
"options": ["neck", "leg", "tail"],
"original_quote": "The giraffe has a long neck.",
"image_description": "A giraffe",
"image_keywords": ["giraffe"],
"image_is_complex": False,
}
],
"metadata": {
"title": "Animals",
"description": "Giraffe anatomy",
"grade": 2,
"type": "quiz",
"difficulty": 1,
},
},
"why_suitable": "Valid extraction: Text has Fact + Options.",
}
], ],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
} }
def clean_prefix(text: str) -> str: # # ==========================================
"""Remove prefixes like (1), (A), 1., A. from text""" # # 3. HANDLER (Logic hậu xử lý gọn nhẹ)
if not text: return text # # ==========================================
# Regex: Start with ( (number/letter) ) OR number/letter dot. Followed by spaces. # def post_process_quiz(items: List[dict]) -> List[dict]:
return re.sub(r'^(\(\d+\)|\([A-Za-z]\)|\d+\.|[A-Za-z]\.)\s*', '', text).strip() # valid_items = []
# for item in items:
# options = item.get("options", [])
# answer = item.get("answer", "")
# if len(options) < 2:
# continue
# # Nếu có answer từ input, thì so khớp để làm sạch
# if answer:
# matched_option = next(
# (opt for opt in options if opt.lower() == answer.lower()), None
# )
# if matched_option:
# item["answer"] = matched_option
# # Nếu có answer mà không khớp option nào thì mới cân nhắc loại (hoặc để AI tự đoán lại)
# # Nếu answer rỗng (do ngoặc trống), ta vẫn giữ câu này lại
# # (với điều kiện LLM đã được dặn là phải tự điền vào trường answer)
# if not item.get("answer"):
# # Bạn có thể chọn loại bỏ hoặc tin tưởng vào đáp án LLM tự suy luận
# pass
# item["request"] = (
# "Fill in the blank"
# if "____" in item.get("question", "")
# else "Choose the correct answer"
# )
# valid_items.append(item)
# return valid_items
def post_process_quiz(items: List[dict]) -> List[dict]: # # Đăng ký handler
"""Clean up answers and options prefixes""" # GAME_CONFIG["post_process_handler"] = post_process_quiz
for item in items:
# Clean answers
if item.get("answers"):
item["answers"] = clean_prefix(item["answers"])
# Clean options
if item.get("options") and isinstance(item["options"], list):
item["options"] = [clean_prefix(opt) for opt in item["options"]]
return items
# Register handler
GAME_CONFIG["post_process_handler"] = post_process_quiz
# ============== EXAMPLES ==============
EXAMPLES = [
{
"input": "The Sun is a star at the center of the Solar System.",
"output": {
"items": [{
"question": "Where is the Sun located?",
"answers": "At the center of the Solar System",
"options": ["At the center of the Solar System", "At the edge of the Solar System", "Near the Moon", "Outside the universe"],
"original_quote": "The Sun is a star at the center of the Solar System.",
"image_description": "The sun in the middle of planets",
"image_keywords": ["sun", "planets"],
"image_is_complex": False
}]
},
"why_suitable": "Has clear facts"
}
]

View File

@@ -1,6 +1,6 @@
""" """
games/sequence.py - Arrange Sequence Game (Sentences OR Words) games/sequence.py - Arrange Sequence Game (Sentences OR Words)
type_id = 2 type_id = 1
LLM tự quyết định dựa vào ngữ nghĩa: LLM tự quyết định dựa vào ngữ nghĩa:
- "good morning", "apple", "happy" → WORD - "good morning", "apple", "happy" → WORD
- "Hi, I'm Lisa", "The sun rises" → SENTENCE - "Hi, I'm Lisa", "The sun rises" → SENTENCE
@@ -38,7 +38,7 @@ class SequenceMetadata(BaseModel):
description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences" description="LLM decides: 'word' for words/phrases, 'sentence' for complete sentences"
) )
difficulty: int = Field( difficulty: int = Field(
description="Difficulty 1-5 for that grade." description="Difficulty 1-3 for that grade."
) )
@@ -52,59 +52,7 @@ class SequenceOutput(BaseModel):
output_parser = PydanticOutputParser(pydantic_object=SequenceOutput) output_parser = PydanticOutputParser(pydantic_object=SequenceOutput)
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 2,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}
# ============== EXAMPLES ============== # ============== EXAMPLES ==============
@@ -171,3 +119,59 @@ EXAMPLES = [
"why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field" "why": "These are PHRASES/GREETINGS, not complete sentences → use 'word' field"
} }
] ]
# ============== CONFIG ==============
# ============== CONFIG ==============
GAME_CONFIG = {
"game_type": "sequence",
"display_name": "Arrange Sequence",
"description": "Arrange sentences or words in order",
"type_id": 1,
"active": True,
"max_items": 10,
"schema": SequenceItem,
"output_schema": SequenceOutput,
"output_parser": output_parser,
"input_format_rules": [
"Text MUST be a list of items (words, phrases, sentences) to be ordered.",
"Do NOT generate sequence from multiple choice questions (A/B/C/D).",
"Do NOT generate sequence if the text is a quiz or test format.",
],
# 1. Recognition Rules (for Analyzer)
"analyzer_rules": [
"Text is a list of words, phrases, or sentences suitable for ordering",
"Items are separated by commas, semicolons, or newlines",
"Example: 'apple, banana, orange' or 'Sentence 1; Sentence 2'",
"NO questions required - just a list of items",
"Text is NOT a long essay or complex dialogue",
],
# 2. Rules tạo nội dung (cho Generator)
"generation_rules": [
"KEEP ORIGINAL LANGUAGE - Do NOT translate",
"Analyze text semantically to extract meaningful items",
"For each item, decide type: WORD/PHRASE or SENTENCE",
"- If item is a WORD/PHRASE (label, noun, greeting) -> Fill 'word' field",
"- If item is a COMPLETE SENTENCE (subject+verb) -> Fill 'sentence' field",
"NEVER fill both fields for the same item",
"Set metadata.sub_type = 'word' or 'sentence' (all items should match sub_type)",
"Clean up OCR noise, numbering (e.g. '1. Apple' -> 'Apple')",
# CONSISTENCY RULES
"CRITICAL: All extracted items MUST be of the SAME type.",
"Choose ONE type for the whole list: either ALL 'word' OR ALL 'sentence'.",
"If input has mixed types, pick the MAJORITY type and ignore the others.",
# VISUAL FIELD COMPULSORY
"image_description: MUST be a visual description of the item in ENGLISH. Example: 'A red apple', 'Two people shaking hands'",
"image_keywords: MUST provide 2-3 English keywords for search. Example: ['apple', 'fruit', 'red']",
],
"examples": EXAMPLES if 'EXAMPLES' in globals() else []
}

View File

@@ -74,7 +74,7 @@ DEFAULT_CONFIGS = {
"openai": ModelConfig( "openai": ModelConfig(
provider="openai", provider="openai",
model_name="gpt-4o-mini", model_name="gpt-4o-mini",
temperature=0.1 temperature=0.1,
), ),
"openai_light": ModelConfig( "openai_light": ModelConfig(
provider="openai", provider="openai",
@@ -117,13 +117,19 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
from langchain_google_genai import ChatGoogleGenerativeAI from langchain_google_genai import ChatGoogleGenerativeAI
api_key = config.api_key or os.getenv("GOOGLE_API_KEY") api_key = config.api_key or os.getenv("GOOGLE_API_KEY")
print("Using GOOGLE_API_KEY:", api_key)
if not api_key: if not api_key:
raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key") raise ValueError("GOOGLE_API_KEY required for Gemini. Set via env or config.api_key")
return ChatGoogleGenerativeAI( return ChatGoogleGenerativeAI(
model=config.model_name, model=config.model_name,
temperature=config.temperature, temperature=config.temperature,
google_api_key=api_key google_api_key=api_key,
version="v1",
additional_headers={
"User-Agent": "PostmanRuntime/7.43.0",
"Accept": "*/*"
}
) )
elif provider == "openai": elif provider == "openai":
@@ -136,7 +142,8 @@ def get_llm(config: ModelConfig) -> BaseChatModel:
return ChatOpenAI( return ChatOpenAI(
model=config.model_name, model=config.model_name,
temperature=config.temperature, temperature=config.temperature,
api_key=api_key api_key=api_key,
base_url=config.base_url or None
) )
else: else: