Chat Completions - Perceptron Docs

cURL

curl --request POST \
  --url https://api.perceptron.inc/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>",
      "role": "system"
    }
  ],
  "model": "<string>",
  "frequency_penalty": 0,
  "max_completion_tokens": 1,
  "presence_penalty": 0,
  "regex": "<string>",
  "response_format": {
    "type": "text"
  },
  "stream": false,
  "stream_options": {
    "include_usage": true
  },
  "temperature": 1,
  "top_k": 1,
  "top_p": 0,
  "vision_config": {
    "enable_thinking": true,
    "internal_tools": {
      "focus": true
    }
  }
}
'

import requests

url = "https://api.perceptron.inc/v1/chat/completions"

payload = {
    "messages": [
        {
            "content": "<string>",
            "role": "system"
        }
    ],
    "model": "<string>",
    "frequency_penalty": 0,
    "max_completion_tokens": 1,
    "presence_penalty": 0,
    "regex": "<string>",
    "response_format": { "type": "text" },
    "stream": False,
    "stream_options": { "include_usage": True },
    "temperature": 1,
    "top_k": 1,
    "top_p": 0,
    "vision_config": {
        "enable_thinking": True,
        "internal_tools": { "focus": True }
    }
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>', role: 'system'}],
    model: '<string>',
    frequency_penalty: 0,
    max_completion_tokens: 1,
    presence_penalty: 0,
    regex: '<string>',
    response_format: {type: 'text'},
    stream: false,
    stream_options: {include_usage: true},
    temperature: 1,
    top_k: 1,
    top_p: 0,
    vision_config: {enable_thinking: true, internal_tools: {focus: true}}
  })
};

fetch('https://api.perceptron.inc/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.perceptron.inc/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>',
                'role' => 'system'
        ]
    ],
    'model' => '<string>',
    'frequency_penalty' => 0,
    'max_completion_tokens' => 1,
    'presence_penalty' => 0,
    'regex' => '<string>',
    'response_format' => [
        'type' => 'text'
    ],
    'stream' => false,
    'stream_options' => [
        'include_usage' => true
    ],
    'temperature' => 1,
    'top_k' => 1,
    'top_p' => 0,
    'vision_config' => [
        'enable_thinking' => true,
        'internal_tools' => [
                'focus' => true
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.perceptron.inc/v1/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.perceptron.inc/v1/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.perceptron.inc/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "choices": [
    {
      "index": 1,
      "message": {
        "content": "<string>",
        "reasoning_content": "<string>"
      }
    }
  ],
  "created": 1,
  "id": "<string>",
  "model": "<string>",
  "object": "<string>",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 1,
    "total_tokens": 1
  }
}

{
  "error": {
    "code": null,
    "message": "Model 'test' does not support video input",
    "param": null,
    "type": "invalid_request_error"
  }
}

{
  "error": {
    "code": null,
    "message": "Invalid API key",
    "param": null,
    "type": "authentication_error"
  }
}

{
  "error": {
    "code": "rate_limit_exceeded",
    "message": "Organization rate limit exceeded (300 requests/minute). Please retry after 30 seconds.",
    "param": null,
    "type": "rate_limit_error"
  }
}

{
  "error": {
    "code": null,
    "message": "The server had an error while processing your request.",
    "param": null,
    "type": "server_error"
  }
}

POST

chat

completions

cURL

curl --request POST \
  --url https://api.perceptron.inc/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>",
      "role": "system"
    }
  ],
  "model": "<string>",
  "frequency_penalty": 0,
  "max_completion_tokens": 1,
  "presence_penalty": 0,
  "regex": "<string>",
  "response_format": {
    "type": "text"
  },
  "stream": false,
  "stream_options": {
    "include_usage": true
  },
  "temperature": 1,
  "top_k": 1,
  "top_p": 0,
  "vision_config": {
    "enable_thinking": true,
    "internal_tools": {
      "focus": true
    }
  }
}
'

import requests

url = "https://api.perceptron.inc/v1/chat/completions"

payload = {
    "messages": [
        {
            "content": "<string>",
            "role": "system"
        }
    ],
    "model": "<string>",
    "frequency_penalty": 0,
    "max_completion_tokens": 1,
    "presence_penalty": 0,
    "regex": "<string>",
    "response_format": { "type": "text" },
    "stream": False,
    "stream_options": { "include_usage": True },
    "temperature": 1,
    "top_k": 1,
    "top_p": 0,
    "vision_config": {
        "enable_thinking": True,
        "internal_tools": { "focus": True }
    }
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>', role: 'system'}],
    model: '<string>',
    frequency_penalty: 0,
    max_completion_tokens: 1,
    presence_penalty: 0,
    regex: '<string>',
    response_format: {type: 'text'},
    stream: false,
    stream_options: {include_usage: true},
    temperature: 1,
    top_k: 1,
    top_p: 0,
    vision_config: {enable_thinking: true, internal_tools: {focus: true}}
  })
};

fetch('https://api.perceptron.inc/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.perceptron.inc/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>',
                'role' => 'system'
        ]
    ],
    'model' => '<string>',
    'frequency_penalty' => 0,
    'max_completion_tokens' => 1,
    'presence_penalty' => 0,
    'regex' => '<string>',
    'response_format' => [
        'type' => 'text'
    ],
    'stream' => false,
    'stream_options' => [
        'include_usage' => true
    ],
    'temperature' => 1,
    'top_k' => 1,
    'top_p' => 0,
    'vision_config' => [
        'enable_thinking' => true,
        'internal_tools' => [
                'focus' => true
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.perceptron.inc/v1/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.perceptron.inc/v1/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.perceptron.inc/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "choices": [
    {
      "index": 1,
      "message": {
        "content": "<string>",
        "reasoning_content": "<string>"
      }
    }
  ],
  "created": 1,
  "id": "<string>",
  "model": "<string>",
  "object": "<string>",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 1,
    "total_tokens": 1
  }
}

{
  "error": {
    "code": null,
    "message": "Model 'test' does not support video input",
    "param": null,
    "type": "invalid_request_error"
  }
}

{
  "error": {
    "code": null,
    "message": "Invalid API key",
    "param": null,
    "type": "authentication_error"
  }
}

{
  "error": {
    "code": "rate_limit_exceeded",
    "message": "Organization rate limit exceeded (300 requests/minute). Please retry after 30 seconds.",
    "param": null,
    "type": "rate_limit_error"
  }
}

{
  "error": {
    "code": null,
    "message": "The server had an error while processing your request.",
    "param": null,
    "type": "server_error"
  }
}

Overview

The Chat Completions API is fully compatible with OpenAI’s chat completions specification, supporting both text-only and multimodal (vision) requests. Use it to generate responses from Perceptron Mk1. Perceptron Mk1 triggers thinking and structured grounding through the typed vision_config body field.

`vision_config`

For perceptron-mk1, pass a top-level vision_config object alongside messages:

Field	Values	Purpose
`annotation_format`	`"point"` / `"box"` / `"polygon"` / `"clip"`	Grounded output format. `clip` is video-only.
`enable_thinking`	`true` / `false`	Chain-of-thought reasoning.
`internal_tools.focus`	`true` / `false`	Enable the focus tool — model can zoom into regions. Image only.

When to `enable_thinking`

On for text Q&A, captioning, OCR, and video clipping (annotation_format: "clip").
Off for spatial detection (annotation_format in "point", "box", "polygon").

Example: Grounded detection

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "perceptron-mk1",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text",
            "text": "Find every worker wearing PPE." }
        ]
      }
    ],
    "vision_config": { "annotation_format": "box" }
  }'

Example: Video clipping

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "perceptron-mk1",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "video_url",
            "video_url": { "url": "<video-url>" } },
          { "type": "text",
            "text": "Clip the moment the worker scans the package." }
        ]
      }
    ],
    "vision_config": { "annotation_format": "clip", "enable_thinking": true }
  }'

Example: Video from pre-decoded frames

If you’ve already sampled frames client-side, pass them inline with a video_frames content part instead of a single video_url. Each frame carries an image_url (HTTP(S) URL or base64 data URL) and a timestamp_ms offset from the start of the clip. Provide between two and 256 frames, ordered by non-decreasing timestamp_ms. For optimal performance, follow a uniform sampling strategy aligned with the Video Token Counting guide.

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "perceptron-mk1",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "video_frames",
            "video_frames": {
              "frames": [
                { "image_url": { "url": "<frame-url>" }, "timestamp_ms": 0 },
                { "image_url": { "url": "<frame-url>" }, "timestamp_ms": 500 }
              ]
            }
          },
          { "type": "text",
            "text": "What changes between these frames?" }
        ]
      }
    ],
    "vision_config": { "enable_thinking": true }
  }'

Example: Image reasoning with focus

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "perceptron-mk1",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text",
            "text": "What is the serial number on the device in the corner?" }
        ]
      }
    ],
    "vision_config": {
      "enable_thinking": true,
      "internal_tools": { "focus": true }
    }
  }'

Streaming

Set "stream": true to receive Server-Sent Events (SSE). To get token usage, also set stream_options.include_usage: true — when enabled, usage is attached to the final chunk (the one with finish_reason: "stop"), immediately before data: [DONE].

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "perceptron-mk1",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text", "text": "Describe this scene in detail." }
        ]
      }
    ],
    "vision_config": { "enable_thinking": true },
    "stream": true,
    "stream_options": { "include_usage": true }
  }'

Best Practices

Thinking pairs well with text and clipping; not with spatial detection. Turn enable_thinking on for text Q&A, captioning, OCR, and annotation_format: "clip". Turn it off for "point", "box", and "polygon".
Leave temperature unset. The default is 0.0 (deterministic). Only set a non-zero value if you want more varied outputs.
Image format: HTTP(S) URLs and base64 data URLs are both supported. MIME types: image/png, image/jpeg, image/webp, video/mp4, video/webm.
Inline frames vs. video_url: Send a whole clip with video_url, or — if you’ve already sampled frames — pass them inline with video_frames (two to 256 frames, timestamp_ms non-decreasing). Inline frames give you precise control over exactly which frames the model sees; for optimal performance, sample uniformly in line with the Video Token Counting guide.
Token limits: 32K context, 8K output.

Limits

Limit	Value
Requests	300/min
Request body size	20 MB
Media upload	20 GB per 48 hours

For large images, resize client-side before uploading. See the Tokenization guide for optimization tips.

Authorizations

Authorization

string

header

required

Bearer token authentication using your Perceptron API key

Body

application/json

messages

object[]

required

Conversation history listed in order. Supported roles: system, user, assistant.

Author role of the message as defined by the OpenAI Chat Completions spec.

Option 1
Option 2
Option 3

Show child attributes

model

string

required

The model to invoke. Available options: isaac-0.1, isaac-0.2-1b, isaac-0.2-2b-preview, perceptron-mk1.

frequency_penalty

number<float> | null

Positive values discourage the model from repeating previously used tokens.

Required range: -2 <= x <= 2

max_completion_tokens

integer<int32> | null

Maximum number of completion tokens to generate.

Model-specific limits:

Isaac 0.1: The combined total of input tokens and output tokens must not exceed 8192 tokens.

Required range: x >= 0

presence_penalty

number<float> | null

Positive values encourage the model to introduce new concepts.

Required range: -2 <= x <= 2

regex

string | null

Regex pattern for constrained generation.

response_format

object

An object specifying the format that the model must output. Setting to { "type": "json_schema", "json_schema": {...} } enables Structured Outputs which ensures the model will match your supplied JSON schema.

Option 1
Option 2

Show child attributes

stream

boolean | null

default:false

Set to true for SSE streaming. When omitted, the API returns a single JSON response.

stream_options

null | object

Optional streaming flags. Token usage is always reported in the final chunk of a streaming response.

Show child attributes

temperature

number<float> | null

Sampling temperature. Lower values yield deterministic replies; higher values explore more creative outputs.

Model-specific recommendations:

Isaac 0.1: Default and recommended value is 0.0.

Required range: 0 <= x <= 2

top_k

integer<int32> | null

Top-k sampling. The model samples from the top k most likely tokens.

Required range: x >= 0

top_p

number<float> | null

Nucleus sampling probability. The model samples from the smallest token set whose cumulative probability exceeds this threshold.

Required range: x <= 1

vision_config

null | object

Perceptron vision-model controls (thinking, spatial output format, internal-tool toggles). Only supported on Perceptron-owned models.

Show child attributes

Response

Chat completion generated successfully.

Non-streaming response body when stream=false.

choices

object[]

required

Show child attributes

created

integer<int64>

required

Required range: x >= 0

string

required

model

string

required

object

string

required

usage

null | object

Token accounting emitted with every completion.

Show child attributes

Detect

​Overview

​vision_config

​When to enable_thinking

​Example: Grounded detection

​Example: Video clipping

​Example: Video from pre-decoded frames

​Example: Image reasoning with focus

​Streaming

​Best Practices

​Limits

Authorizations

Body

Response

Overview

`vision_config`

When to `enable_thinking`

Example: Grounded detection

Example: Video clipping

Example: Video from pre-decoded frames

Example: Image reasoning with focus

Streaming

Best Practices

Limits