diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index 7033e3800..8ecf73446 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -1380,7 +1380,15 @@ def generate_schema( ) # Extract and return schema - return json.loads(response.choices[0].message.content) + # Clean markdown code blocks that LLMs sometimes wrap JSON in + content = response.choices[0].message.content + # Remove markdown code block markers if present + if "```json" in content: + content = content.replace("```json\n", "").replace("\n```", "") + elif "```" in content: + content = content.replace("```\n", "").replace("\n```", "") + content = content.strip() + return json.loads(content) except Exception as e: raise Exception(f"Failed to generate schema: {str(e)}")