Chat Completions - Perceptron Docs

cURL

curl --request POST \
  --url https://api.perceptron.inc/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>",
      "role": "system"
    }
  ],
  "model": "<string>",
  "frequency_penalty": 0,
  "max_completion_tokens": 1,
  "presence_penalty": 0,
  "regex": "<string>",
  "response_format": {
    "type": "text"
  },
  "stream": false,
  "stream_options": {
    "include_usage": true
  },
  "temperature": 1,
  "top_k": 1,
  "top_p": 0,
  "vision_config": {
    "enable_thinking": true,
    "internal_tools": {
      "focus": true
    }
  }
}
'

import requests

url = "https://api.perceptron.inc/v1/chat/completions"

payload = {
    "messages": [
        {
            "content": "<string>",
            "role": "system"
        }
    ],
    "model": "<string>",
    "frequency_penalty": 0,
    "max_completion_tokens": 1,
    "presence_penalty": 0,
    "regex": "<string>",
    "response_format": { "type": "text" },
    "stream": False,
    "stream_options": { "include_usage": True },
    "temperature": 1,
    "top_k": 1,
    "top_p": 0,
    "vision_config": {
        "enable_thinking": True,
        "internal_tools": { "focus": True }
    }
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>', role: 'system'}],
    model: '<string>',
    frequency_penalty: 0,
    max_completion_tokens: 1,
    presence_penalty: 0,
    regex: '<string>',
    response_format: {type: 'text'},
    stream: false,
    stream_options: {include_usage: true},
    temperature: 1,
    top_k: 1,
    top_p: 0,
    vision_config: {enable_thinking: true, internal_tools: {focus: true}}
  })
};

fetch('https://api.perceptron.inc/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.perceptron.inc/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>',
                'role' => 'system'
        ]
    ],
    'model' => '<string>',
    'frequency_penalty' => 0,
    'max_completion_tokens' => 1,
    'presence_penalty' => 0,
    'regex' => '<string>',
    'response_format' => [
        'type' => 'text'
    ],
    'stream' => false,
    'stream_options' => [
        'include_usage' => true
    ],
    'temperature' => 1,
    'top_k' => 1,
    'top_p' => 0,
    'vision_config' => [
        'enable_thinking' => true,
        'internal_tools' => [
                'focus' => true
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.perceptron.inc/v1/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.perceptron.inc/v1/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.perceptron.inc/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "choices": [
    {
      "index": 1,
      "message": {
        "content": "<string>",
        "reasoning_content": "<string>"
      }
    }
  ],
  "created": 1,
  "id": "<string>",
  "model": "<string>",
  "object": "<string>",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 1,
    "total_tokens": 1
  }
}

{
  "error": {
    "code": null,
    "message": "Model 'test' does not support video input",
    "param": null,
    "type": "invalid_request_error"
  }
}

{
  "error": {
    "code": null,
    "message": "Invalid API key",
    "param": null,
    "type": "authentication_error"
  }
}

{
  "error": {
    "code": "rate_limit_exceeded",
    "message": "Organization rate limit exceeded (300 requests/minute). Please retry after 30 seconds.",
    "param": null,
    "type": "rate_limit_error"
  }
}

{
  "error": {
    "code": null,
    "message": "The server had an error while processing your request.",
    "param": null,
    "type": "server_error"
  }
}

POST

chat

completions

cURL

curl --request POST \
  --url https://api.perceptron.inc/v1/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>",
      "role": "system"
    }
  ],
  "model": "<string>",
  "frequency_penalty": 0,
  "max_completion_tokens": 1,
  "presence_penalty": 0,
  "regex": "<string>",
  "response_format": {
    "type": "text"
  },
  "stream": false,
  "stream_options": {
    "include_usage": true
  },
  "temperature": 1,
  "top_k": 1,
  "top_p": 0,
  "vision_config": {
    "enable_thinking": true,
    "internal_tools": {
      "focus": true
    }
  }
}
'

import requests

url = "https://api.perceptron.inc/v1/chat/completions"

payload = {
    "messages": [
        {
            "content": "<string>",
            "role": "system"
        }
    ],
    "model": "<string>",
    "frequency_penalty": 0,
    "max_completion_tokens": 1,
    "presence_penalty": 0,
    "regex": "<string>",
    "response_format": { "type": "text" },
    "stream": False,
    "stream_options": { "include_usage": True },
    "temperature": 1,
    "top_k": 1,
    "top_p": 0,
    "vision_config": {
        "enable_thinking": True,
        "internal_tools": { "focus": True }
    }
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>', role: 'system'}],
    model: '<string>',
    frequency_penalty: 0,
    max_completion_tokens: 1,
    presence_penalty: 0,
    regex: '<string>',
    response_format: {type: 'text'},
    stream: false,
    stream_options: {include_usage: true},
    temperature: 1,
    top_k: 1,
    top_p: 0,
    vision_config: {enable_thinking: true, internal_tools: {focus: true}}
  })
};

fetch('https://api.perceptron.inc/v1/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.perceptron.inc/v1/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>',
                'role' => 'system'
        ]
    ],
    'model' => '<string>',
    'frequency_penalty' => 0,
    'max_completion_tokens' => 1,
    'presence_penalty' => 0,
    'regex' => '<string>',
    'response_format' => [
        'type' => 'text'
    ],
    'stream' => false,
    'stream_options' => [
        'include_usage' => true
    ],
    'temperature' => 1,
    'top_k' => 1,
    'top_p' => 0,
    'vision_config' => [
        'enable_thinking' => true,
        'internal_tools' => [
                'focus' => true
        ]
    ]
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.perceptron.inc/v1/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.perceptron.inc/v1/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.perceptron.inc/v1/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\",\n      \"role\": \"system\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"frequency_penalty\": 0,\n  \"max_completion_tokens\": 1,\n  \"presence_penalty\": 0,\n  \"regex\": \"<string>\",\n  \"response_format\": {\n    \"type\": \"text\"\n  },\n  \"stream\": false,\n  \"stream_options\": {\n    \"include_usage\": true\n  },\n  \"temperature\": 1,\n  \"top_k\": 1,\n  \"top_p\": 0,\n  \"vision_config\": {\n    \"enable_thinking\": true,\n    \"internal_tools\": {\n      \"focus\": true\n    }\n  }\n}"

response = http.request(request)
puts response.read_body

{
  "choices": [
    {
      "index": 1,
      "message": {
        "content": "<string>",
        "reasoning_content": "<string>"
      }
    }
  ],
  "created": 1,
  "id": "<string>",
  "model": "<string>",
  "object": "<string>",
  "usage": {
    "completion_tokens": 1,
    "prompt_tokens": 1,
    "total_tokens": 1
  }
}

{
  "error": {
    "code": null,
    "message": "Model 'test' does not support video input",
    "param": null,
    "type": "invalid_request_error"
  }
}

{
  "error": {
    "code": null,
    "message": "Invalid API key",
    "param": null,
    "type": "authentication_error"
  }
}

{
  "error": {
    "code": "rate_limit_exceeded",
    "message": "Organization rate limit exceeded (300 requests/minute). Please retry after 30 seconds.",
    "param": null,
    "type": "rate_limit_error"
  }
}

{
  "error": {
    "code": null,
    "message": "The server had an error while processing your request.",
    "param": null,
    "type": "server_error"
  }
}

Overview

The Chat Completions API is fully compatible with OpenAI’s chat completions specification, supporting both text-only and multimodal (image) requests. Use it to generate responses from Isaac 0.2. Isaac 0.2 triggers thinking and structured grounding through <hint>...</hint> tags inside a system-role message.

`<hint>` system messages

Place hint values inside a system-role message. Multiple hints can share one <hint> tag, separated by spaces.

Hint	Output
`<hint>BOX</hint>`	Bounding boxes
`<hint>POINT</hint>`	Points / keypoints
`<hint>POLYGON</hint>`	Polygon masks
`<hint>THINK</hint>`	Chain-of-thought reasoning
`<hint>FOCUS</hint>`	Enable internal focus tool

Example: Grounded detection

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "isaac-0.2-2b-preview",
    "messages": [
      { "role": "system", "content": "<hint>BOX</hint>" },
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text",
            "text": "Find every worker wearing PPE." }
        ]
      }
    ]
  }'

Example: Counting with grounding

For counting tasks or multi-step spatial reasoning, combining THINK with BOX (or POINT) is helpful on Isaac 0.2. For pure detection without counting, use the spatial hint alone.

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "isaac-0.2-2b-preview",
    "messages": [
      { "role": "system", "content": "<hint>BOX THINK</hint>" },
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text",
            "text": "Count the safety violations and box each one. Explain your reasoning." }
        ]
      }
    ]
  }'

Example: OCR without hints

For free-form text tasks like OCR, no hint is needed — just send your prompt.

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "isaac-0.2-2b-preview",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text",
            "text": "Extract each produce label along with its listed price." }
        ]
      }
    ]
  }'

Streaming

Set "stream": true to receive Server-Sent Events (SSE). To get token usage, also set stream_options.include_usage: true — when enabled, usage is attached to the final chunk (the one with finish_reason: "stop"), immediately before data: [DONE].

curl https://api.perceptron.inc/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer $PERCEPTRON_API_KEY" \
  -d '{
    "model": "isaac-0.2-2b-preview",
    "messages": [
      { "role": "user",
        "content": [
          { "type": "image_url",
            "image_url": { "url": "<image-url>" } },
          { "type": "text", "text": "Describe this scene in detail." }
        ]
      }
    ],
    "stream": true,
    "stream_options": { "include_usage": true }
  }'

Best Practices

Combining THINK with BOX/POINT is helpful for counting. Use the spatial hint alone for pure detection; add THINK when you need step-by-step reasoning alongside the bounding boxes.
Leave temperature unset. The default is 0.0 (deterministic). Only set a non-zero value if you want more varied outputs.
Image format: HTTP(S) URLs and base64 data URLs are both supported. MIME types: image/png, image/jpeg, image/webp.
Token limits: 8K context.

Limits

Limit	Value
Requests	300/min
Request body size	20 MB
Media upload	20 GB per 48 hours

For large images, resize client-side before uploading. See the Tokenization guide for optimization tips.

Authorizations

Authorization

string

header

required

Bearer token authentication using your Perceptron API key

Body

application/json

messages

object[]

required

Conversation history listed in order. Supported roles: system, user, assistant.

Author role of the message as defined by the OpenAI Chat Completions spec.

Option 1
Option 2
Option 3

Show child attributes

model

string

required

The model to invoke. Available options: isaac-0.1, isaac-0.2-1b, isaac-0.2-2b-preview, perceptron-mk1.

frequency_penalty

number<float> | null

Positive values discourage the model from repeating previously used tokens.

Required range: -2 <= x <= 2

max_completion_tokens

integer<int32> | null

Maximum number of completion tokens to generate.

Model-specific limits:

Isaac 0.1: The combined total of input tokens and output tokens must not exceed 8192 tokens.

Required range: x >= 0

presence_penalty

number<float> | null

Positive values encourage the model to introduce new concepts.

Required range: -2 <= x <= 2

regex

string | null

Regex pattern for constrained generation.

response_format

object

An object specifying the format that the model must output. Setting to { "type": "json_schema", "json_schema": {...} } enables Structured Outputs which ensures the model will match your supplied JSON schema.

Option 1
Option 2

Show child attributes

stream

boolean | null

default:false

Set to true for SSE streaming. When omitted, the API returns a single JSON response.

stream_options

null | object

Optional streaming flags. Token usage is always reported in the final chunk of a streaming response.

Show child attributes

temperature

number<float> | null

Sampling temperature. Lower values yield deterministic replies; higher values explore more creative outputs.

Model-specific recommendations:

Isaac 0.1: Default and recommended value is 0.0.

Required range: 0 <= x <= 2

top_k

integer<int32> | null

Top-k sampling. The model samples from the top k most likely tokens.

Required range: x >= 0

top_p

number<float> | null

Nucleus sampling probability. The model samples from the smallest token set whose cumulative probability exceeds this threshold.

Required range: x <= 1

vision_config

null | object

Perceptron vision-model controls (thinking, spatial output format, internal-tool toggles). Only supported on Perceptron-owned models.

Show child attributes

Response

Chat completion generated successfully.

Non-streaming response body when stream=false.

choices

object[]

required

Show child attributes

created

integer<int64>

required

Required range: x >= 0

string

required

model

string

required

object

string

required

usage

null | object

Token accounting emitted with every completion.

Show child attributes

Detect

​Overview

​<hint> system messages

​Example: Grounded detection

​Example: Counting with grounding

​Example: OCR without hints

​Streaming

​Best Practices

​Limits

Authorizations

Body

Response

Overview

`<hint>` system messages

Example: Grounded detection

Example: Counting with grounding

Example: OCR without hints

Streaming

Best Practices

Limits