Skip to main content

Browser API Reference

Complete API documentation for AG-Kit browser automation tools, including method signatures, input/output schemas, and practical usage examples.

API Overview

The browser API provides web automation capabilities through multiple components with consistent interfaces for different execution environments.

Core Interfaces

UnifiedBrowserClient

Main browser client interface for session management and automation.
class UnifiedBrowserClient {
  constructor(config: BrowserClientConfig);
  
  // Session management
  createSession(options: SessionOptions): Promise<BrowserSession>;
  getSession(sessionId: string): Promise<BrowserSession | null>;
  closeSession(sessionId: string): Promise<void>;
  
  // Navigation
  navigate(sessionId: string, url: string): Promise<NavigationResult>;
  goBack(sessionId: string): Promise<NavigationResult>;
  goForward(sessionId: string): Promise<NavigationResult>;
  reload(sessionId: string): Promise<NavigationResult>;
  
  // Element interaction
  click(sessionId: string, selector: string): Promise<ActionResult>;
  type(sessionId: string, selector: string, text: string): Promise<ActionResult>;
  select(sessionId: string, selector: string, value: string): Promise<ActionResult>;
  
  // Data extraction
  extractText(sessionId: string, selector: string): Promise<ExtractionResult>;
  extractAttribute(sessionId: string, selector: string, attribute: string): Promise<ExtractionResult>;
  
  // Page operations
  screenshot(sessionId: string, options?: ScreenshotOptions): Promise<ScreenshotResult>;
  evaluate(sessionId: string, code: string): Promise<EvaluationResult>;
}

UnifiedBrowserTool

High-level browser tool with unified action interface.
class UnifiedBrowserTool extends BaseTool<BrowserToolSchema, any, BrowserToolResult> {
  constructor(config: BrowserToolConfig);
  
  protected async _invoke(
    input: TBrowserToolInput,
    context?: ToolExecutionContext
  ): Promise<BrowserToolResult>;
}

Input Schemas

BrowserToolInput

interface TBrowserToolInput {
  action: BrowserActionType;
  sessionId: string;
  url?: string;
  selector?: string;
  text?: string;
  value?: string;
  attribute?: string;
  code?: string;
  timeout?: number;
  extractors?: DataExtractor[];
  options?: ActionOptions;
}

BrowserActionType

type BrowserActionType =
  // Navigation
  | 'navigate'
  | 'go_back'
  | 'go_forward'
  | 'reload'
  
  // Element interaction
  | 'click'
  | 'type'
  | 'select'
  | 'hover'
  | 'focus'
  | 'blur'
  
  // Data extraction
  | 'extract_text'
  | 'extract_attribute'
  | 'extract_data'
  
  // Page operations
  | 'screenshot'
  | 'scroll'
  | 'wait_for_element'
  | 'wait_for_navigation'
  
  // JavaScript execution
  | 'evaluate'
  | 'inject_script'
  
  // Session management
  | 'create_session'
  | 'close_session'
  | 'get_session_info';

Configuration Interfaces

interface BrowserClientConfig {
  apiKey: string;
  domain?: string;
  template?: string;
  defaultTimeout?: number;
  headless?: boolean;
  viewport?: { width: number; height: number };
  userAgent?: string;
}

interface BrowserToolConfig extends BrowserClientConfig {
  maxSessions?: number;
  sessionTimeout?: number;
  debug?: boolean;
}

interface SessionOptions {
  sessionId?: string;
  viewport?: { width: number; height: number };
  userAgent?: string;
  headless?: boolean;
}

Output Schemas

BrowserToolResult

interface BrowserToolResult {
  success: boolean;
  data?: BrowserActionResult;
  error?: string;
  error_type?: 'validation' | 'execution' | 'permission' | 'network';
  executionTime?: number;
  sessionId?: string;
}

Action Results

interface NavigationResult {
  url: string;
  title: string;
  status: number;
  loadTime: number;
}

interface ActionResult {
  success: boolean;
  element?: ElementInfo;
  message?: string;
}

interface ExtractionResult {
  data: string | string[];
  selector: string;
  count: number;
}

interface ScreenshotResult {
  data: string; // Base64 encoded image
  format: 'png' | 'jpeg';
  size: { width: number; height: number };
}

interface EvaluationResult {
  result: any;
  type: string;
  error?: string;
}

Method Reference

Navigate to a URL.
await browserTool.invoke({
  action: 'navigate',
  sessionId: 'session-1',
  url: 'https://example.com',
  timeout: 30000
});
Parameters:
  • sessionId (string): Browser session identifier
  • url (string): Target URL to navigate to
  • timeout (number, optional): Navigation timeout in milliseconds
Returns: NavigationResult

goBack()

Navigate back in browser history.
await browserTool.invoke({
  action: 'go_back',
  sessionId: 'session-1'
});

goForward()

Navigate forward in browser history.
await browserTool.invoke({
  action: 'go_forward',
  sessionId: 'session-1'
});

reload()

Reload the current page.
await browserTool.invoke({
  action: 'reload',
  sessionId: 'session-1'
});

Element Interaction Methods

click()

Click on an element.
await browserTool.invoke({
  action: 'click',
  sessionId: 'session-1',
  selector: 'button#submit'
});
Parameters:
  • sessionId (string): Browser session identifier
  • selector (string): CSS selector for target element
Returns: ActionResult

type()

Type text into an input element.
await browserTool.invoke({
  action: 'type',
  sessionId: 'session-1',
  selector: 'input[name="username"]',
  text: 'myusername'
});
Parameters:
  • sessionId (string): Browser session identifier
  • selector (string): CSS selector for input element
  • text (string): Text to type

select()

Select an option from a dropdown.
await browserTool.invoke({
  action: 'select',
  sessionId: 'session-1',
  selector: 'select#country',
  value: 'US'
});
Parameters:
  • sessionId (string): Browser session identifier
  • selector (string): CSS selector for select element
  • value (string): Option value to select

Data Extraction Methods

extractText()

Extract text content from elements.
await browserTool.invoke({
  action: 'extract_text',
  sessionId: 'session-1',
  selector: '.article-title'
});
Parameters:
  • sessionId (string): Browser session identifier
  • selector (string): CSS selector for target elements
Returns: ExtractionResult

extractAttribute()

Extract attribute values from elements.
await browserTool.invoke({
  action: 'extract_attribute',
  sessionId: 'session-1',
  selector: 'img.hero',
  attribute: 'src'
});
Parameters:
  • sessionId (string): Browser session identifier
  • selector (string): CSS selector for target elements
  • attribute (string): Attribute name to extract

extractData()

Extract multiple data points using extractors.
await browserTool.invoke({
  action: 'extract_data',
  sessionId: 'session-1',
  extractors: [
    { name: 'title', selector: 'h1', attribute: 'text' },
    { name: 'links', selector: 'a', attribute: 'href' },
    { name: 'images', selector: 'img', attribute: 'src' }
  ]
});
Parameters:
  • sessionId (string): Browser session identifier
  • extractors (DataExtractor[]): Array of extraction configurations
interface DataExtractor {
  name: string;
  selector: string;
  attribute: 'text' | 'html' | string;
}

Page Operation Methods

screenshot()

Capture a screenshot of the page.
await browserTool.invoke({
  action: 'screenshot',
  sessionId: 'session-1',
  options: {
    fullPage: true,
    format: 'png'
  }
});
Parameters:
  • sessionId (string): Browser session identifier
  • options (ScreenshotOptions, optional): Screenshot configuration
interface ScreenshotOptions {
  fullPage?: boolean;
  format?: 'png' | 'jpeg';
  quality?: number; // 0-100 for JPEG
  clip?: { x: number; y: number; width: number; height: number };
}
Returns: ScreenshotResult

scroll()

Scroll the page.
await browserTool.invoke({
  action: 'scroll',
  sessionId: 'session-1',
  options: {
    direction: 'down',
    pixels: 500
  }
});

waitForElement()

Wait for an element to appear.
await browserTool.invoke({
  action: 'wait_for_element',
  sessionId: 'session-1',
  selector: '.loading-complete',
  timeout: 10000
});

JavaScript Execution Methods

evaluate()

Execute JavaScript code in the page context.
await browserTool.invoke({
  action: 'evaluate',
  sessionId: 'session-1',
  code: `
    return {
      title: document.title,
      url: window.location.href,
      userAgent: navigator.userAgent
    };
  `
});
Parameters:
  • sessionId (string): Browser session identifier
  • code (string): JavaScript code to execute
Returns: EvaluationResult

injectScript()

Inject a JavaScript file into the page.
await browserTool.invoke({
  action: 'inject_script',
  sessionId: 'session-1',
  options: {
    scriptPath: '/path/to/script.js'
  }
});

Session Management Methods

createSession()

Create a new browser session.
await browserTool.invoke({
  action: 'create_session',
  sessionId: 'new-session',
  options: {
    viewport: { width: 1920, height: 1080 },
    headless: false
  }
});

closeSession()

Close a browser session.
await browserTool.invoke({
  action: 'close_session',
  sessionId: 'session-1'
});

getSessionInfo()

Get information about a session.
await browserTool.invoke({
  action: 'get_session_info',
  sessionId: 'session-1'
});

Error Handling

Error Types

type BrowserErrorType = 
  | 'validation'    // Invalid input parameters
  | 'execution'     // Browser operation failed
  | 'permission'    // Access denied
  | 'network'       // Network/connection issues
  | 'timeout'       // Operation timed out
  | 'element_not_found' // Element not found
  | 'session_not_found'; // Session doesn't exist

Error Examples

const result = await browserTool.invoke({
  action: 'click',
  sessionId: 'invalid-session',
  selector: '#button'
});

if (!result.success) {
  switch (result.error_type) {
    case 'session_not_found':
      console.error('Session not found:', result.error);
      break;
    case 'element_not_found':
      console.error('Element not found:', result.error);
      break;
    case 'timeout':
      console.error('Operation timed out:', result.error);
      break;
    default:
      console.error('Unknown error:', result.error);
  }
}

Factory Functions

createLocalBrowserTool()

Create a browser tool for local Playwright execution.
import { createLocalBrowserTool } from '@ag-kit/tools';

const browserTool = createLocalBrowserTool({
  headless: false,
  defaultTimeout: 30000,
  viewport: { width: 1280, height: 720 }
});

createSandboxBrowserTool()

Create a browser tool for E2B sandbox execution.
import { createSandboxBrowserTool } from '@ag-kit/tools';

const browserTool = createSandboxBrowserTool({
  apiKey: process.env.E2B_API_KEY,
  domain: process.env.E2B_DOMAIN,
  defaultTimeout: 30000,
  headless: false
});

Type Definitions

Complete Type Reference

// Main tool input type
interface TBrowserToolInput {
  action: BrowserActionType;
  sessionId: string;
  url?: string;
  selector?: string;
  text?: string;
  value?: string;
  attribute?: string;
  code?: string;
  timeout?: number;
  extractors?: DataExtractor[];
  options?: ActionOptions;
}

// Action options
interface ActionOptions {
  // Navigation options
  waitUntil?: 'load' | 'domcontentloaded' | 'networkidle';
  
  // Screenshot options
  fullPage?: boolean;
  format?: 'png' | 'jpeg';
  quality?: number;
  
  // Scroll options
  direction?: 'up' | 'down' | 'left' | 'right';
  pixels?: number;
  
  // Script options
  scriptPath?: string;
  scriptContent?: string;
}

// Session information
interface BrowserSession {
  sessionId: string;
  createdAt: Date;
  lastActivity: Date;
  config: BrowserToolConfig;
  status: 'active' | 'idle' | 'closed';
  url?: string;
  title?: string;
}

// Element information
interface ElementInfo {
  tagName: string;
  attributes: Record<string, string>;
  boundingBox?: {
    x: number;
    y: number;
    width: number;
    height: number;
  };
}