Amux

IR Format API

Complete API reference for Intermediate Representation (IR)

IR is at the heart of Amux, a unified format for all adapter conversions that captures all LLM functionality in a provider-agnostic manner, providing a common language understood by all adapters and enabling seamless conversions between any provider format.

IR Type Hierarchy

Loading diagram...

Core Types

LLMRequestIR

The unified request format.

interface LLMRequestIR {
  messages: Message[]              // Required: Conversation messages
  model?: string                   // Model identifier
  tools?: Tool[]                   // Available tools/functions
  toolChoice?: ToolChoice          // Tool usage control
  stream?: boolean                 // Enable streaming
  generation?: GenerationConfig    // Generation parameters
  system?: string                  // System prompt
  metadata?: Record<string, unknown>
  extensions?: Record<string, unknown>
  raw?: unknown
}

Only messages is required. All other fields are optional and depend on the use case.

Example:

const request: LLMRequestIR = {
  messages: [
    { role: 'user', content: 'What is the weather in Tokyo?' }
  ],
  model: 'gpt-4',
  tools: [{
    type: 'function',
    function: {
      name: 'get_weather',
      description: 'Get weather for a location',
      parameters: {
        type: 'object',
        properties: {
          location: { type: 'string' }
        },
        required: ['location']
      }
    }
  }],
  toolChoice: 'auto',
  generation: {
    temperature: 0.7,
    maxTokens: 1000
  }
}

LLMResponseIR

The unified response format.

interface LLMResponseIR {
  id: string                       // Required: Response ID
  model: string                    // Required: Model used
  choices: Choice[]                // Required: Generated responses
  usage?: Usage                    // Token usage stats
  metadata?: Record<string, unknown>
  extensions?: Record<string, unknown>
  raw?: unknown
}

Example:

const response: LLMResponseIR = {
  id: 'resp_123',
  model: 'gpt-4',
  choices: [{
    index: 0,
    message: {
      role: 'assistant',
      content: 'The weather in Tokyo is sunny, 22°C.'
    },
    finishReason: 'stop'
  }],
  usage: {
    promptTokens: 20,
    completionTokens: 15,
    totalTokens: 35
  }
}

Message

Represents a single message in the conversation.

interface Message {
  role: Role                       // Required: Message role
  content: MessageContent          // Required: Message content
  name?: string                    // Sender name (for tool messages)
  toolCallId?: string              // Tool call ID (for tool responses)
  toolCalls?: ToolCall[]           // Tool calls (for assistant)
  reasoningContent?: string        // Reasoning/thinking content (DeepSeek, Qwen, Anthropic)
}

type Role = 'system' | 'user' | 'assistant' | 'tool'
type MessageContent = string | ContentPart[]

Examples:

// Simple text message
{ role: 'user', content: 'Hello!' }

// Multimodal message
{
  role: 'user',
  content: [
    { type: 'text', text: 'What is in this image?' },
    {
      type: 'image',
      source: { type: 'url', url: 'https://example.com/image.jpg' }
    }
  ]
}

// Assistant with tool calls
{
  role: 'assistant',
  content: 'Let me check the weather.',
  toolCalls: [{
    id: 'call_123',
    type: 'function',
    function: {
      name: 'get_weather',
      arguments: '{"location":"Tokyo"}'
    }
  }]
}

// Tool result
{
  role: 'tool',
  content: '{"temperature":22,"condition":"sunny"}',
  toolCallId: 'call_123',
  name: 'get_weather'
}

// Assistant with reasoning content
{
  role: 'assistant',
  content: 'The answer is 42.',
  reasoningContent: 'Let me think about this step by step...'
}

Tool

Function/tool definition.

interface Tool {
  type: 'function'
  function: FunctionDefinition
}

interface FunctionDefinition {
  name: string                     // Required: Function name
  description?: string             // Function description
  parameters?: JSONSchema          // Parameter schema
  strict?: boolean                 // OpenAI structured outputs
}

Example:

const tool: Tool = {
  type: 'function',
  function: {
    name: 'get_weather',
    description: 'Get current weather for a location',
    parameters: {
      type: 'object',
      properties: {
        location: {
          type: 'string',
          description: 'City name'
        },
        unit: {
          type: 'string',
          enum: ['celsius', 'fahrenheit'],
          description: 'Temperature unit'
        }
      },
      required: ['location']
    }
  }
}

ToolChoice

Controls how the model uses tools.

type ToolChoice =
  | 'auto'      // Let model decide
  | 'none'      // Don't use tools
  | 'required'  // Must use a tool
  | {           // Use specific tool
      type: 'function'
      function: { name: string }
    }

LLMStreamEvent

Stream event for streaming responses.

interface LLMStreamEvent {
  type: StreamEventType
  id?: string
  model?: string
  content?: ContentDelta
  reasoning?: ReasoningDelta        // Reasoning/thinking content delta
  toolCall?: ToolCallDelta
  finishReason?: FinishReason
  message?: Message
  usage?: Usage                     // Usage statistics (for end event)
  error?: { message: string; code?: string }
  raw?: unknown
}

type StreamEventType =
  | 'start'      // Stream started
  | 'content'    // Content delta
  | 'reasoning'  // Reasoning/thinking content delta
  | 'tool_call'  // Tool call delta
  | 'end'        // Stream ended
  | 'error'      // Error occurred

interface ReasoningDelta {
  type: 'reasoning'
  delta: string
  index?: number
}

Example Flow:

// 1. Start event
{ type: 'start', id: 'stream_123', model: 'gpt-4' }

// 2. Content deltas
{ type: 'content', content: { type: 'content', delta: 'Hello' } }
{ type: 'content', content: { type: 'content', delta: ' world' } }
{ type: 'content', content: { type: 'content', delta: '!' } }

// 3. End event
{
  type: 'end',
  finishReason: 'stop',
  message: {
    role: 'assistant',
    content: 'Hello world!'
  }
}

Flow with Reasoning (DeepSeek, Qwen, Anthropic):

// 1. Start event
{ type: 'start', id: 'stream_456', model: 'deepseek-reasoner' }

// 2. Reasoning content deltas
{ type: 'reasoning', reasoning: { type: 'reasoning', delta: 'Let me analyze this problem...' } }
{ type: 'reasoning', reasoning: { type: 'reasoning', delta: ' First, I need to consider...' } }

// 3. Content deltas
{ type: 'content', content: { type: 'content', delta: 'Based on my analysis, ' } }
{ type: 'content', content: { type: 'content', delta: 'the answer is 42.' } }

// 4. End event
{
  type: 'end',
  finishReason: 'stop',
  message: {
    role: 'assistant',
    content: 'Based on my analysis, the answer is 42.',
    reasoningContent: 'Let me analyze this problem... First, I need to consider...'
  }
}

LLMErrorIR

Unified error format.

interface LLMErrorIR {
  type: ErrorType
  message: string
  code?: string
  status?: number
  retryable?: boolean
  details?: Record<string, unknown>
  raw?: unknown
}

type ErrorType =
  | 'network'        // Network error
  | 'api'            // API error
  | 'validation'     // Validation error
  | 'rate_limit'     // Rate limit error
  | 'authentication' // Authentication error
  | 'permission'     // Permission error
  | 'not_found'      // Resource not found
  | 'server'         // Server error
  | 'unknown'        // Unknown error

Example:

{
  type: 'rate_limit',
  message: 'Rate limit exceeded',
  code: 'rate_limit_exceeded',
  status: 429,
  retryable: true,
  details: {
    retryAfter: 60
  }
}

Message Content Types

Messages can contain different types of content for multimodal interactions.

Loading diagram...

TextContent

interface TextContent {
  type: 'text'
  text: string
}

Example:

{
  type: 'text',
  text: 'What is in this image?'
}

ImageContent

interface ImageContent {
  type: 'image'
  source: ImageSource
}

type ImageSource =
  | { type: 'url'; url: string }
  | { type: 'base64'; mediaType: string; data: string }

Examples:

// URL image
{
  type: 'image',
  source: {
    type: 'url',
    url: 'https://example.com/image.jpg'
  }
}

// Base64 image
{
  type: 'image',
  source: {
    type: 'base64',
    mediaType: 'image/jpeg',
    data: '/9j/4AAQSkZJRg...'
  }
}

Note on Tool Calls: Tool calls are represented using the toolCalls field on the Message object (OpenAI-style), not as ContentPart types. See the Tool Calling Flow section for details.

Generation Parameters

Control how the model generates responses.

interface GenerationConfig {
  temperature?: number          // 0-2: Randomness
  topP?: number                // 0-1: Nucleus sampling
  topK?: number                // >0: Top-k sampling
  maxTokens?: number           // >0: Max tokens to generate
  stopSequences?: string[]     // Stop at these sequences
  presencePenalty?: number     // -2 to 2: Penalize presence
  frequencyPenalty?: number    // -2 to 2: Penalize frequency
  n?: number                   // >0: Number of completions
  seed?: number                // Deterministic generation
  responseFormat?: ResponseFormat  // Response format configuration
  thinking?: ThinkingConfig    // Thinking/reasoning configuration
  enableSearch?: boolean       // Enable web search (Qwen)
  logprobs?: boolean           // Log probabilities
  topLogprobs?: number         // Number of top log probabilities
}

interface ResponseFormat {
  type: 'text' | 'json_object' | 'json_schema'
  jsonSchema?: {
    name: string
    description?: string
    schema: Record<string, unknown>
    strict?: boolean
  }
}

interface ThinkingConfig {
  enabled: boolean
  budgetTokens?: number        // Budget tokens for thinking (Anthropic)
}

Temperature

Controls randomness in generation (0-2, typically 0-1).

generation: {
  temperature: 0.7  // Balanced creativity
}

Guidelines:

  • 0.0-0.3: Focused, deterministic (good for factual tasks)
  • 0.4-0.7: Balanced (good for general use)
  • 0.8-1.0: Creative (good for brainstorming)
  • 1.0+: Very creative (experimental)

Sampling Methods

generation: {
  topP: 0.9,    // Nucleus sampling
  topK: 40      // Top-k sampling
}

Top-P (Nucleus Sampling):

  • Considers tokens with cumulative probability ≤ P
  • 0.9 is a good default
  • Alternative to temperature

Top-K:

  • Considers only top K tokens
  • 40-50 is typical
  • Can be combined with top-P

Token Limits

generation: {
  maxTokens: 1000,
  stopSequences: ['\n\n', 'END', '---']
}

Max Tokens:

  • Maximum tokens to generate
  • Does not include prompt tokens
  • Model-specific limits apply

Stop Sequences:

  • Generation stops when any sequence is encountered
  • Useful for structured output

Penalties

generation: {
  presencePenalty: 0.1,   // Encourage new topics
  frequencyPenalty: 0.1   // Reduce repetition
}

Presence Penalty (-2 to 2):

  • Positive: Encourage new tokens
  • Negative: Encourage existing tokens
  • Good for topic diversity

Frequency Penalty (-2 to 2):

  • Positive: Reduce repetition
  • Negative: Encourage repetition
  • Good for reducing redundancy

Response Format

Configure structured output format.

generation: {
  responseFormat: {
    type: 'json_object'  // or 'json_schema', 'text'
  }
}

Format Types:

  • 'text': Plain text response (default)
  • 'json_object': Valid JSON object
  • 'json_schema': Structured output with schema validation

JSON Schema Example:

generation: {
  responseFormat: {
    type: 'json_schema',
    jsonSchema: {
      name: 'weather_response',
      description: 'Weather information',
      schema: {
        type: 'object',
        properties: {
          temperature: { type: 'number' },
          condition: { type: 'string' }
        },
        required: ['temperature', 'condition']
      },
      strict: true  // OpenAI structured outputs
    }
  }
}

Thinking/Reasoning Mode

Enable thinking mode for reasoning models.

generation: {
  thinking: {
    enabled: true,
    budgetTokens: 1000  // Optional: for Anthropic
  }
}

Supported Providers:

  • DeepSeek: deepseek-reasoner model
  • Qwen: QwQ model
  • Anthropic: Extended thinking mode (with budget)

How it works:

  • Model generates internal reasoning steps
  • Reasoning content available in reasoningContent field
  • Final answer in content field

Advanced Features

generation: {
  // Web search (Qwen specific)
  enableSearch: true,

  // Log probabilities
  logprobs: true,
  topLogprobs: 5  // Top 5 alternatives per token
}

enableSearch (Qwen only):

  • Enable real-time web search
  • Model can access current information
  • Useful for fact-checking and recent events

logprobs / topLogprobs:

  • Get probability distribution over tokens
  • Useful for uncertainty estimation
  • Available in response logprobs field

Tool Calling Flow

Loading diagram...

Unified Tool Call Format: IR uses OpenAI-style tool calling as the unified format. When converting from Anthropic format, the adapter automatically converts tool_use content blocks to toolCalls and tool_result content blocks to tool role messages.

Tool Calling in IR

// 1. User message
{
  role: 'user',
  content: 'What is the weather in Tokyo?'
}

// 2. Assistant with tool call
{
  role: 'assistant',
  content: 'Let me check the weather for you.',
  toolCalls: [{
    id: 'call_abc123',
    type: 'function',
    function: {
      name: 'get_weather',
      arguments: '{"location":"Tokyo"}'
    }
  }]
}

// 3. Tool result
{
  role: 'tool',
  content: '{"temperature":22,"condition":"sunny"}',
  toolCallId: 'call_abc123',
  name: 'get_weather'
}

// 4. Final assistant response
{
  role: 'assistant',
  content: 'The weather in Tokyo is sunny with a temperature of 22°C.'
}

Complete Examples

const request: LLMRequestIR = {
  messages: [
    {
      role: 'system',
      content: 'You are a helpful assistant.'
    },
    {
      role: 'user',
      content: 'Explain quantum computing in simple terms.'
    }
  ],
  model: 'gpt-4',
  generation: {
    temperature: 0.7,
    maxTokens: 500
  }
}

const response: LLMResponseIR = {
  id: 'resp_123',
  model: 'gpt-4',
  choices: [{
    index: 0,
    message: {
      role: 'assistant',
      content: 'Quantum computing uses quantum mechanics...'
    },
    finishReason: 'stop'
  }],
  usage: {
    promptTokens: 25,
    completionTokens: 150,
    totalTokens: 175
  }
}
const request: LLMRequestIR = {
  messages: [
    {
      role: 'user',
      content: 'What is the weather in Tokyo and Paris?'
    }
  ],
  model: 'gpt-4',
  tools: [{
    type: 'function',
    function: {
      name: 'get_weather',
      description: 'Get weather for a location',
      parameters: {
        type: 'object',
        properties: {
          location: { type: 'string' }
        },
        required: ['location']
      }
    }
  }],
  toolChoice: 'auto'
}

const response: LLMResponseIR = {
  id: 'resp_456',
  model: 'gpt-4',
  choices: [{
    index: 0,
    message: {
      role: 'assistant',
      content: null,
      toolCalls: [
        {
          id: 'call_1',
          type: 'function',
          function: {
            name: 'get_weather',
            arguments: '{"location":"Tokyo"}'
          }
        },
        {
          id: 'call_2',
          type: 'function',
          function: {
            name: 'get_weather',
            arguments: '{"location":"Paris"}'
          }
        }
      ]
    },
    finishReason: 'tool_calls'
  }]
}
const request: LLMRequestIR = {
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'text',
          text: 'What objects are in this image?'
        },
        {
          type: 'image',
          source: {
            type: 'url',
            url: 'https://example.com/image.jpg'
          }
        }
      ]
    }
  ],
  model: 'gpt-4-vision'
}

const response: LLMResponseIR = {
  id: 'resp_789',
  model: 'gpt-4-vision',
  choices: [{
    index: 0,
    message: {
      role: 'assistant',
      content: 'I can see a cat, a laptop, and a coffee mug in the image.'
    },
    finishReason: 'stop'
  }]
}
const request: LLMRequestIR = {
  messages: [
    { role: 'user', content: 'Tell me a short story.' }
  ],
  model: 'gpt-4',
  stream: true
}

// Stream events:
const events: LLMStreamEvent[] = [
  {
    type: 'start',
    id: 'stream_123',
    model: 'gpt-4'
  },
  {
    type: 'content',
    content: { type: 'content', delta: 'Once' }
  },
  {
    type: 'content',
    content: { type: 'content', delta: ' upon' }
  },
  {
    type: 'content',
    content: { type: 'content', delta: ' a' }
  },
  {
    type: 'content',
    content: { type: 'content', delta: ' time' }
  },
  // ... more content deltas
  {
    type: 'end',
    finishReason: 'stop',
    message: {
      role: 'assistant',
      content: 'Once upon a time...'
    }
  }
]

Next Steps

On this page