Variable SCRAPER_PYTHON_CODE`Const`

SCRAPER_PYTHON_CODE: "\nimport html\nimport json\nimport sys\nimport traceback\nfrom typing import Any, Optional, List, Dict\nfrom urllib.parse import urlparse\n\nDEBUG = True\n\ndef _log(level: str, message: str) -> None:\n if DEBUG or level in ('ERROR', 'WARN'):\n print(f\"[PyScraper:{level}] {message}\", file=sys.stderr)\n\ndef _log_debug(message: str) -> None:\n _log('DEBUG', message)\n\ndef _log_info(message: str) -> None:\n _log('INFO', message)\n\ndef _log_warn(message: str) -> None:\n _log('WARN', message)\n\ndef _log_error(message: str, exc: Optional[Exception] = None) -> None:\n _log('ERROR', message)\n if exc and DEBUG:\n _log('ERROR', f\"Traceback:\\n{traceback.format_exc()}\")\n\n# Import dependencies\ntry:\n from recipe_scrapers import scrape_html, SCRAPERS\n _log_info(f\"recipe_scrapers imported successfully ({len(SCRAPERS)} scrapers available)\")\nexcept ImportError as e:\n _log_error(f\"Failed to import recipe_scrapers: {e}\", e)\n raise\n\nAUTH_URL_PATTERNS = ['/login', '/signin', '/sign-in', '/auth', '/connexion', '/account/login', '/user/login']\nAUTH_TITLE_KEYWORDS = ['login', 'sign in', 'connexion', 'se connecter', 'log in', 'anmelden', 'iniciar sesión']\n\nRECIPE_SCHEMA_INDICATORS = [\n '\"@type\":\"recipe\"',\n '\"@type\": \"recipe\"',\n \"'@type':'recipe'\",\n \"'@type': 'recipe'\",\n 'itemtype=\"http://schema.org/recipe\"',\n 'itemtype=\"https://schema.org/recipe\"',\n \"itemtype='http://schema.org/recipe'\",\n \"itemtype='https://schema.org/recipe'\",\n]\n\nclass AuthenticationRequiredError(Exception):\n def __init__(self, host: str, message: str = \"This recipe requires authentication\"):\n self.host = host\n self.message = message\n super().__init__(message)\n\nclass NoRecipeFoundError(Exception):\n def __init__(self, message: str = \"No recipe found on this page\"):\n self.message = message\n super().__init__(message)\n\ndef _has_recipe_schema(html: str) -> bool:\n lower_html = html.lower()\n for indicator in RECIPE_SCHEMA_INDICATORS:\n if indicator in lower_html:\n return True\n return False\n\ndef scrape_recipe_from_html(html: str, url: str, wild_mode: bool = True, final_url: Optional[str] = None) -> str:\n _log_info(f\"scrape_recipe_from_html called: url={url}, wild_mode={wild_mode}, html_len={len(html)}\")\n try:\n auth_error = _detect_auth_required(html, final_url or url, url)\n if auth_error:\n _log_warn(f\"Auth required detected for host: {auth_error.host}\")\n raise auth_error\n\n if wild_mode and not _has_recipe_schema(html):\n _log_warn(f\"No recipe schema found in HTML from {url}\")\n raise NoRecipeFoundError()\n\n _log_debug(f\"Calling scrape_html with supported_only={not wild_mode}...\")\n scraper = scrape_html(html=html, org_url=url, supported_only=not wild_mode)\n _log_debug(\"scrape_html succeeded, extracting data...\")\n\n data = _extract_all_data(scraper)\n _log_info(f\"Scrape successful: title='{data.get('title', 'N/A')}', ingredients={len(data.get('ingredients', []))}\")\n\n return json.dumps({\n \"success\": True,\n \"data\": data\n }, ensure_ascii=False)\n except AuthenticationRequiredError as e:\n _log_warn(f\"AuthenticationRequiredError: {e.message}\")\n return json.dumps({\n \"success\": False,\n \"error\": {\"type\": \"AuthenticationRequired\", \"message\": e.message, \"host\": e.host}\n }, ensure_ascii=False)\n except NoRecipeFoundError as e:\n return json.dumps({\n \"success\": False,\n \"error\": {\"type\": \"NoRecipeFoundError\", \"message\": e.message}\n }, ensure_ascii=False)\n except Exception as e:\n _log_error(f\"scrape_recipe_from_html failed: {type(e).__name__}: {e}\", e)\n return json.dumps({\n \"success\": False,\n \"error\": {\"type\": type(e).__name__, \"message\": str(e)}\n }, ensure_ascii=False)\n\ndef get_supported_hosts() -> str:\n try:\n hosts = list(SCRAPERS.keys())\n return json.dumps({\n \"success\": True,\n \"data\": hosts\n }, ensure_ascii=False)\n except Exception as e:\n return json.dumps({\n \"success\": False,\n \"error\": {\"type\": type(e).__name__, \"message\": str(e)}\n }, ensure_ascii=False)\n\ndef is_host_supported(host: str) -> str:\n try:\n supported = host.lower() in (h.lower() for h in SCRAPERS.keys())\n return json.dumps({\n \"success\": True,\n \"data\": supported\n }, ensure_ascii=False)\n except Exception as e:\n return json.dumps({\n \"success\": False,\n \"error\": {\"type\": type(e).__name__, \"message\": str(e)}\n }, ensure_ascii=False)\n\ndef _unescape(value):\n if value is None:\n return None\n if isinstance(value, str):\n return html.unescape(value)\n if isinstance(value, list):\n return [html.unescape(item) if isinstance(item, str) else item for item in value]\n return value\n\ndef _extract_all_data(scraper) -> Dict[str, Any]:\n ingredients = _safe_call(scraper.ingredients) or []\n\n return {\n \"title\": _unescape(_safe_call(scraper.title)),\n \"description\": _unescape(_safe_call(scraper.description)),\n \"ingredients\": _unescape(ingredients),\n \"parsedIngredients\": None,\n \"ingredientGroups\": _safe_call_ingredient_groups(scraper),\n \"instructions\": _unescape(_safe_call(scraper.instructions)),\n \"instructionsList\": _unescape(_safe_call(scraper.instructions_list)),\n \"parsedInstructions\": None,\n \"totalTime\": _safe_call_numeric(scraper.total_time),\n \"prepTime\": _safe_call_numeric(scraper.prep_time),\n \"cookTime\": _safe_call_numeric(scraper.cook_time),\n \"yields\": _safe_call(scraper.yields),\n \"image\": _safe_call(scraper.image),\n \"host\": _safe_call(scraper.host),\n \"canonicalUrl\": _safe_call(scraper.canonical_url),\n \"siteName\": _safe_call(scraper.site_name),\n \"author\": _safe_call(scraper.author),\n \"language\": _safe_call(scraper.language),\n \"category\": _safe_call(scraper.category),\n \"cuisine\": _safe_call(scraper.cuisine),\n \"cookingMethod\": _safe_call(scraper.cooking_method),\n \"keywords\": _safe_call(scraper.keywords),\n \"dietaryRestrictions\": _safe_call(scraper.dietary_restrictions),\n \"ratings\": _safe_call(scraper.ratings),\n \"ratingsCount\": _safe_call_numeric(scraper.ratings_count),\n \"nutrients\": _safe_call(scraper.nutrients),\n \"equipment\": _safe_call(scraper.equipment),\n \"links\": _safe_call(scraper.links),\n }\n\ndef _safe_call(method) -> Optional[Any]:\n try:\n result = method()\n if result is None:\n return None\n if result == 0 and not isinstance(result, bool):\n return None\n if result == \"\":\n return None\n return result\n except Exception:\n return None\n\ndef _safe_call_numeric(method) -> Optional[int]:\n try:\n result = method()\n if result is None:\n return None\n if isinstance(result, (int, float)):\n return int(result)\n return None\n except Exception:\n return None\n\ndef _safe_call_ingredient_groups(scraper) -> Optional[List[Dict[str, Any]]]:\n try:\n groups = scraper.ingredient_groups()\n if not groups:\n return None\n return [\n {\n \"purpose\": _unescape(getattr(group, 'purpose', None)),\n \"ingredients\": _unescape(getattr(group, 'ingredients', []))\n }\n for group in groups\n ]\n except Exception:\n return None\n\ndef _detect_auth_required(html: str, final_url: str, original_url: str) -> Optional[AuthenticationRequiredError]:\n try:\n host = urlparse(original_url).netloc.replace('www.', '')\n except Exception:\n host = ''\n\n try:\n final_path = urlparse(final_url).path.lower()\n for pattern in AUTH_URL_PATTERNS:\n if pattern in final_path:\n return AuthenticationRequiredError(host)\n except Exception:\n pass\n\n import re\n title_match = re.search(r'<title[^>]*>([^<]+)</title>', html, re.IGNORECASE)\n if title_match:\n title = title_match.group(1).lower()\n for keyword in AUTH_TITLE_KEYWORDS:\n if keyword in title:\n return AuthenticationRequiredError(host)\n\n return None\n\n# Signal that scraper is ready\n_log_info(\"Scraper module initialized and ready\")\n" = ...

Python scraper code for Pyodide execution.

This module contains the Python recipe scraping code as a string, ready to be executed in Pyodide. It's derived from the shared python/scraper.py but adapted for the Pyodide environment.

Variable SCRAPER_PYTHON_CODEConst

Variable SCRAPER_PYTHON_CODE`Const`