In-context learning (Image)

Single-image — Run in Colab

Step through this example interactively

Multi-image — Run in Colab

Step through this example interactively

In-context learning (ICL) lets you attach annotated reference frames so Isaac 0.1 can mimic that concept on a new target image. Use one exemplar for quick specialization or several exemplars to disambiguate between classes.

Single-image ICL

Find a mixer in a kitchen using a single annotated exemplar.

from pathlib import Path
from urllib.request import urlretrieve

from perceptron import annotate_image, bbox, configure, detect, image
from PIL import Image as PILImage, ImageDraw

configure(
    provider="perceptron",
    model="isaac-0.1",
    api_key="YOUR_API_KEY",
)

EXEMPLAR_URL = "https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/single/cake_mixer_example.webp"
TARGET_URL = "https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/single/find_kitchen_item.webp"
EXEMPLAR_PATH = Path("cake_mixer_example.webp")
TARGET_PATH = Path("find_kitchen_item.webp")
ANNOTATED_PATH = Path("find_kitchen_item_annotated.png")

for url, path in [(EXEMPLAR_URL, EXEMPLAR_PATH), (TARGET_URL, TARGET_PATH)]:
    if not path.exists():
        urlretrieve(url, path)

# Bootstrap the exemplar via detection + annotation
bootstrap = detect(
    image(str(EXEMPLAR_PATH)),
    classes=["mixer"],
    expects="box",
)
first_box = (bootstrap.boxes or [])[0]
exemplar = annotate_image(
    str(EXEMPLAR_PATH),
    {
        "mixer": [
            bbox(
                int(first_box.top_left.x),
                int(first_box.top_left.y),
                int(first_box.bottom_right.x),
                int(first_box.bottom_right.y),
                mention="mixer",
            )
        ]
    },
)

# Apply the exemplar to the new scene
result = detect(
    image(str(TARGET_PATH)),
    classes=["mixer"],
    expects="box",
    examples=[exemplar],
)

print(result.text)

img = PILImage.open(TARGET_PATH).convert("RGB")
draw = ImageDraw.Draw(img)
pixel_boxes = result.boxes_to_pixels(width=img.width, height=img.height) or []

for box in pixel_boxes:
    draw.rectangle(
        [
            int(box.top_left.x),
            int(box.top_left.y),
            int(box.bottom_right.x),
            int(box.bottom_right.y),
        ],
        outline="magenta",
        width=3,
    )
    draw.text((int(box.top_left.x), max(int(box.top_left.y) - 18, 0)), box.mention or "mixer", fill="magenta")

img.save(ANNOTATED_PATH)
print(f"Saved annotated kitchen frame to {ANNOTATED_PATH}")

Multi-image ICL

Separate cats and dogs by providing multiple annotated exemplars for each class.

from pathlib import Path
from urllib.request import urlretrieve

from perceptron import annotate_image, bbox, configure, detect, image
from PIL import Image as PILImage, ImageDraw

configure(
    provider="perceptron",
    model="isaac-0.1",
    api_key="YOUR_API_KEY",
)

ASSETS = [
    ("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/classA.jpg", "classA.jpg"),
    ("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/classB.webp", "classB.webp"),
    ("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/cat_dog_input.png", "cat_dog_input.png"),
]

for url, filename in ASSETS:
    path = Path(filename)
    if not path.exists():
        urlretrieve(url, path)

# Annotate exemplars
cat_example = annotate_image(
    "classA.jpg",
    {"cat": [bbox(316, 136, 703, 906, mention="cat")]},
)

dog_example = annotate_image(
    "classB.webp",
    {"dog": [bbox(161, 48, 666, 980, mention="dog")]},
)

# Detect on the mixed scene
result = detect(
    image("cat_dog_input.png"),
    classes=["cat", "dog"],
    expects="box",
    examples=[cat_example, dog_example],
)

print(result.text)

img = PILImage.open("cat_dog_input.png").convert("RGB")
draw = ImageDraw.Draw(img)
pixel_boxes = result.boxes_to_pixels(width=img.width, height=img.height) or []

for box in pixel_boxes:
    draw.rectangle(
        [
            int(box.top_left.x),
            int(box.top_left.y),
            int(box.bottom_right.x),
            int(box.bottom_right.y),
        ],
        outline="lime",
        width=3,
    )
    draw.text((int(box.top_left.x), max(int(box.top_left.y) - 18, 0)), box.mention or "class", fill="lime")

img.save("cat_dog_input_annotated.png")
print("Saved annotated output to cat_dog_input_annotated.png")

Key parameters (single or multi-image)

Parameter	Type	Purpose
`classes`	`list[str]`	Logical labels shared by exemplars and target detections
`examples`	`list[dict]`	Annotated frames generated via `annotate_image()`
`expects`	`str`	Geometry to return (`"box"`, `"point"`, `"polygon"`)

Annotate each exemplar with bbox() coordinates using the normalized 0–1000 grid. Bootstrap exemplars by hand or reuse accurate detections from a prior dry run.

Best practices

Use clean exemplars; quality beats quantity.

Run through the full Jupyter notebook here. Reach out to Perceptron support if you have questions.

Single-image — Run in Colab

Multi-image — Run in Colab

​Single-image ICL

​Multi-image ICL

​Key parameters (single or multi-image)

​Best practices

Single-image ICL

Multi-image ICL

Key parameters (single or multi-image)

Best practices