From 0f325c81890334654e9789ef4234475b534fa6f3 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Tue, 13 Jan 2026 23:20:01 +0800 Subject: [PATCH 1/5] fix: add repetition_penalty --- graphgen/models/llm/local/vllm_wrapper.py | 35 +++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/graphgen/models/llm/local/vllm_wrapper.py b/graphgen/models/llm/local/vllm_wrapper.py index 2f01e511..2d3a27ee 100644 --- a/graphgen/models/llm/local/vllm_wrapper.py +++ b/graphgen/models/llm/local/vllm_wrapper.py @@ -1,4 +1,5 @@ import math +from tracemalloc import stop import uuid from typing import Any, List, Optional import asyncio @@ -42,20 +43,17 @@ def __init__( ) self.engine = AsyncLLMEngine.from_engine_args(engine_args) self.timeout = float(timeout) + self.tokenizer = self.engine.engine.tokenizer.tokenizer - @staticmethod - def _build_inputs(prompt: str, history: Optional[List[str]] = None) -> str: - msgs = history or [] - lines = [] - for m in msgs: - if isinstance(m, dict): - role = m.get("role", "") - content = m.get("content", "") - lines.append(f"{role}: {content}") - else: - lines.append(str(m)) - lines.append(prompt) - return "\n".join(lines) + def _build_inputs(self, prompt: str, history: Optional[List[dict]] = None) -> Any: + messages = history or [] + messages.append({"role": "user", "content": prompt}) + + return self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) async def _consume_generator(self, generator): final_output = None @@ -70,14 +68,14 @@ async def generate_answer( request_id = f"graphgen_req_{uuid.uuid4()}" sp = self.SamplingParams( - temperature=self.temperature if self.temperature > 0 else 1.0, - top_p=self.top_p if self.temperature > 0 else 1.0, + temperature=self.temperature if self.temperature >= 0 else 1.0, + top_p=self.top_p if self.top_p >= 0 else 1.0, max_tokens=extra.get("max_new_tokens", 2048), + repetition_penalty=extra.get("repetition_penalty", 1.05), ) - result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - try: + result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) final_output = await asyncio.wait_for( self._consume_generator(result_generator), timeout=self.timeout @@ -105,9 +103,8 @@ async def generate_topk_per_token( logprobs=self.top_k, ) - result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - try: + result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) final_output = await asyncio.wait_for( self._consume_generator(result_generator), timeout=self.timeout From 3387988fe1ee16ffc476e4a9f9b10e3ad71be992 Mon Sep 17 00:00:00 2001 From: chenzihong <58508660+ChenZiHong-Gavin@users.noreply.github.com> Date: Tue, 13 Jan 2026 23:35:55 +0800 Subject: [PATCH 2/5] Potential fix for pull request finding 'Unused import' Co-authored-by: Copilot Autofix powered by AI <223894421+github-code-quality[bot]@users.noreply.github.com> --- graphgen/models/llm/local/vllm_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/graphgen/models/llm/local/vllm_wrapper.py b/graphgen/models/llm/local/vllm_wrapper.py index 2d3a27ee..d992be62 100644 --- a/graphgen/models/llm/local/vllm_wrapper.py +++ b/graphgen/models/llm/local/vllm_wrapper.py @@ -1,5 +1,4 @@ import math -from tracemalloc import stop import uuid from typing import Any, List, Optional import asyncio From 5e57ac4df67ec6cde1536dd3549e4a269449bed2 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Wed, 14 Jan 2026 00:02:36 +0800 Subject: [PATCH 3/5] fix: catch timeouterror --- graphgen/models/llm/local/vllm_wrapper.py | 31 ++++++++++------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/graphgen/models/llm/local/vllm_wrapper.py b/graphgen/models/llm/local/vllm_wrapper.py index 2d3a27ee..23c94eae 100644 --- a/graphgen/models/llm/local/vllm_wrapper.py +++ b/graphgen/models/llm/local/vllm_wrapper.py @@ -1,5 +1,4 @@ import math -from tracemalloc import stop import uuid from typing import Any, List, Optional import asyncio @@ -55,12 +54,6 @@ def _build_inputs(self, prompt: str, history: Optional[List[dict]] = None) -> An add_generation_prompt=True ) - async def _consume_generator(self, generator): - final_output = None - async for request_output in generator: - final_output = request_output - return final_output - async def generate_answer( self, text: str, history: Optional[List[str]] = None, **extra: Any ) -> str: @@ -76,10 +69,12 @@ async def generate_answer( try: result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - final_output = await asyncio.wait_for( - self._consume_generator(result_generator), - timeout=self.timeout - ) + final_output = None + async with asyncio.timeout(self.timeout): + async for request_output in result_generator: + if request_output.finished: + final_output = request_output + break if not final_output or not final_output.outputs: return "" @@ -87,7 +82,7 @@ async def generate_answer( result_text = final_output.outputs[0].text return result_text - except (Exception, asyncio.CancelledError): + except (Exception, asyncio.CancelledError, asyncio.TimeoutError): await self.engine.abort(request_id) raise @@ -105,10 +100,12 @@ async def generate_topk_per_token( try: result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - final_output = await asyncio.wait_for( - self._consume_generator(result_generator), - timeout=self.timeout - ) + final_output = None + async with asyncio.timeout(self.timeout): + async for request_output in result_generator: + if request_output.finished: + final_output = request_output + break if ( not final_output @@ -138,7 +135,7 @@ async def generate_topk_per_token( return [main_token] return [] - except (Exception, asyncio.CancelledError): + except (Exception, asyncio.CancelledError, asyncio.TimeoutError): await self.engine.abort(request_id) raise From 4198cde64c8ce97fb39c65e9ca854b9f1b91f212 Mon Sep 17 00:00:00 2001 From: chenzihong <522023320011@smail.nju.edu.cn> Date: Wed, 14 Jan 2026 00:40:08 +0800 Subject: [PATCH 4/5] fix: fix no attribute timeout --- graphgen/models/llm/local/vllm_wrapper.py | 29 +++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/graphgen/models/llm/local/vllm_wrapper.py b/graphgen/models/llm/local/vllm_wrapper.py index 23c94eae..cdcc4561 100644 --- a/graphgen/models/llm/local/vllm_wrapper.py +++ b/graphgen/models/llm/local/vllm_wrapper.py @@ -54,6 +54,14 @@ def _build_inputs(self, prompt: str, history: Optional[List[dict]] = None) -> An add_generation_prompt=True ) + async def _consume_generator(self, generator): + final_output = None + async for request_output in generator: + if request_output.finished: + final_output = request_output + break + return final_output + async def generate_answer( self, text: str, history: Optional[List[str]] = None, **extra: Any ) -> str: @@ -69,12 +77,10 @@ async def generate_answer( try: result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - final_output = None - async with asyncio.timeout(self.timeout): - async for request_output in result_generator: - if request_output.finished: - final_output = request_output - break + final_output = await asyncio.wait_for( + self._consume_generator(result_generator), + timeout=self.timeout + ) if not final_output or not final_output.outputs: return "" @@ -100,12 +106,11 @@ async def generate_topk_per_token( try: result_generator = self.engine.generate(full_prompt, sp, request_id=request_id) - final_output = None - async with asyncio.timeout(self.timeout): - async for request_output in result_generator: - if request_output.finished: - final_output = request_output - break + final_output = await asyncio.wait_for( + self._consume_generator(result_generator), + timeout=self.timeout + ) + if ( not final_output From b03a9d1059ab2fc6bc57c1940c027bb293308c4b Mon Sep 17 00:00:00 2001 From: chenzihong_gavin <522023320011@smail.nju.edu.cn> Date: Wed, 14 Jan 2026 12:15:32 +0800 Subject: [PATCH 5/5] fix: change vllm timeout from 300 to 600 --- graphgen/models/llm/local/vllm_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphgen/models/llm/local/vllm_wrapper.py b/graphgen/models/llm/local/vllm_wrapper.py index cdcc4561..cafe6529 100644 --- a/graphgen/models/llm/local/vllm_wrapper.py +++ b/graphgen/models/llm/local/vllm_wrapper.py @@ -20,7 +20,7 @@ def __init__( temperature: float = 0.6, top_p: float = 1.0, top_k: int = 5, - timeout: float = 300, + timeout: float = 600, **kwargs: Any, ): super().__init__(temperature=temperature, top_p=top_p, top_k=top_k, **kwargs)