From c5b6e2a0aac1f67c76b1abc9a11e5edb502a22a8 Mon Sep 17 00:00:00 2001 From: Paul Klein Date: Sat, 29 Jun 2024 14:34:35 -0700 Subject: [PATCH] Add Browserbase Support for Agent-E, with integrated captcha solving and proxies --- .env-example | 3 ++- ae/core/playwright_manager.py | 36 +++++++++++++++++++---------------- requirements.txt | 17 ++++++++++++++--- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/.env-example b/.env-example index e5fda22..85e3c58 100644 --- a/.env-example +++ b/.env-example @@ -2,5 +2,6 @@ AUTOGEN_MODEL_NAME=PUT YOUR MODEL NAME HERE, for example: gpt-4-turbo-preview or AUTOGEN_MODEL_API_KEY=PUT YOUR MODEL API KEY HERE AUTOGEN_MODEL_BASE_URL=IF YOU ARE USING OpenAI, remove this line, otherwise add the base URL, for example: https://api.groq.com/openai/v1 OPENAI_API_KEY=If you are using the testing/benchmarking, put your openai key here otherwise remove this line - +BROWSERBASE_PROJECT_ID=Browserbase Project ID +BROWSERBASE_API_KEY=Browserbase API Key BROWSER_STORAGE_DIR=/Users/macuser/Library/Application Support/Google/Chrome/Profile 5 \ No newline at end of file diff --git a/ae/core/playwright_manager.py b/ae/core/playwright_manager.py index 6fb8b06..3e686a2 100644 --- a/ae/core/playwright_manager.py +++ b/ae/core/playwright_manager.py @@ -14,6 +14,8 @@ from ae.utils.js_helper import escape_js_message from ae.utils.logger import logger +from browserbase import Browserbase, CreateSessionOptions + # Enusres that playwright does not wait for font loading when taking screenshots. Reference: https://github.com/microsoft/playwright/issues/28995 os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1" @@ -133,26 +135,28 @@ async def create_browser_context(self): if self.browser_type == "chromium": logger.info(f"User dir: {user_dir}") try: - PlaywrightManager._browser_context = await PlaywrightManager._playwright.chromium.launch_persistent_context(user_dir, - channel= "chrome", headless=self.isheadless, - args=["--disable-blink-features=AutomationControlled", - "--disable-session-crashed-bubble", # disable the restore session bubble - "--disable-infobars", # disable informational popups, - ], - no_viewport=True - ) + browserbase = Browserbase(os.environ["BROWSERBASE_API_KEY"]) + options = CreateSessionOptions() + session = browserbase.create_session(options) + browser = await PlaywrightManager._playwright.chromium.connect_over_cdp(browserbase.get_connect_url(session.id)) + PlaywrightManager._browser_context = browser.contexts[0] + + debug_urls = browserbase.get_debug_connection_urls(session.id) + logger.info(f"Browser is connected and running here: {debug_urls.debuggerUrl}") + except Exception as e: if "Target page, context or browser has been closed" in str(e): new_user_dir = tempfile.mkdtemp() logger.error(f"Failed to launch persistent context with user dir {user_dir}: {e} Trying to launch with a new user dir {new_user_dir}") - PlaywrightManager._browser_context = await PlaywrightManager._playwright.chromium.launch_persistent_context(new_user_dir, - channel= "chrome", headless=self.isheadless, - args=["--disable-blink-features=AutomationControlled", - "--disable-session-crashed-bubble", # disable the restore session bubble - "--disable-infobars", # disable informational popups, - ], - no_viewport=True - ) + browserbase = Browserbase(os.environ["BROWSERBASE_API_KEY"]) + options = CreateSessionOptions() + session = browserbase.create_session(options) + browser = await PlaywrightManager._playwright.chromium.connect_over_cdp(browserbase.get_connect_url(session.id)) + PlaywrightManager._browser_context = browser.contexts[0] + + debug_urls = browserbase.get_debug_connection_urls(session.id) + logger.info(f"Browser is connected and running here: {debug_urls.debuggerUrl}") + elif "Chromium distribution 'chrome' is not found " in str(e): raise ValueError("Chrome is not installed on this device. Install Google Chrome or install playwright using 'playwright install chrome'. Refer to the readme for more information.") from None else: diff --git a/requirements.txt b/requirements.txt index d0a6fdf..2593d3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ annotated-types==0.6.0 # via pydantic anthropic==0.23.1 + # via agent-e (pyproject.toml) anyio==4.3.0 # via # anthropic @@ -58,6 +59,7 @@ google-auth==2.29.0 google-auth-httplib2==0.2.0 # via google-api-python-client google-generativeai==0.5.1 + # via agent-e (pyproject.toml) googleapis-common-protos==1.63.0 # via # google-api-core @@ -92,6 +94,7 @@ idna==3.6 joblib==1.3.2 # via nltk nltk==3.8.1 + # via agent-e (pyproject.toml) numpy==1.26.4 # via # flaml @@ -105,9 +108,11 @@ packaging==23.2 pdfminer-six==20231228 # via pdfplumber pdfplumber==0.11.1 + # via agent-e (pyproject.toml) pillow==10.3.0 # via pdfplumber playwright==1.44.0 + # via agent-e (pyproject.toml) proto-plus==1.23.0 # via # google-ai-generativelanguage @@ -127,15 +132,17 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pyautogen==0.2.27 + # via agent-e (pyproject.toml) pycparser==2.22 # via cffi -pydantic==2.6.2 +pydantic==2.7.1 # via + # agent-e (pyproject.toml) # anthropic # google-generativeai # openai # pyautogen -pydantic-core==2.16.3 +pydantic-core==2.7.1 # via pydantic pyee==11.1.0 # via playwright @@ -144,7 +151,9 @@ pyparsing==3.1.2 pypdfium2==4.30.0 # via pdfplumber python-dotenv==1.0.0 - # via pyautogen + # via + # agent-e (pyproject.toml) + # pyautogen pyyaml==6.0.1 # via huggingface-hub regex==2023.12.25 @@ -166,6 +175,7 @@ sniffio==1.3.1 # httpx # openai tabulate==0.9.0 + # via agent-e (pyproject.toml) termcolor==2.4.0 # via pyautogen tiktoken==0.6.0 @@ -193,3 +203,4 @@ urllib3==2.2.1 # via # docker # requests +browserbase==0.3.0 \ No newline at end of file