In-context learning (ICL) lets you attach annotated reference frames so Isaac can mimic that concept on a new target image. Use one exemplar for quick specialization or several exemplars to disambiguate between classes.
Single-image ICL
Find a mixer in a kitchen using a single annotated exemplar.
import os
from pathlib import Path
from urllib.request import urlretrieve
from perceptron import annotate_image, bbox, configure, detect
from PIL import Image, ImageDraw
configure(
provider="perceptron",
api_key=os.getenv("PERCEPTRON_API_KEY", "<your_api_key_here>"),
)
EXEMPLAR_URL = "https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/single/cake_mixer_example.webp"
TARGET_URL = "https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/single/find_kitchen_item.webp"
EXEMPLAR_PATH = Path("cake_mixer_example.webp")
TARGET_PATH = Path("find_kitchen_item.webp")
ANNOTATED_PATH = Path("find_kitchen_item_annotated.png")
for url, path in [(EXEMPLAR_URL, EXEMPLAR_PATH), (TARGET_URL, TARGET_PATH)]:
if not path.exists():
urlretrieve(url, path)
# Bootstrap the exemplar via detection + annotation
bootstrap = detect(
image_path=str(EXEMPLAR_PATH),
classes=["mixer"],
expects="box",
)
first_box = (bootstrap.points or [])[0]
exemplar = annotate_image(
str(EXEMPLAR_PATH),
{
"mixer": [
bbox(
int(first_box.top_left.x),
int(first_box.top_left.y),
int(first_box.bottom_right.x),
int(first_box.bottom_right.y),
mention="mixer",
)
]
},
)
# Apply the exemplar to the new scene
result = detect(
image_path=str(TARGET_PATH),
classes=["mixer"],
expects="box",
examples=[exemplar],
)
print(result.text)
img = Image.open(TARGET_PATH).convert("RGB")
draw = ImageDraw.Draw(img)
pixel_boxes = result.points_to_pixels(width=img.width, height=img.height) or []
for box in pixel_boxes:
draw.rectangle(
[
int(box.top_left.x),
int(box.top_left.y),
int(box.bottom_right.x),
int(box.bottom_right.y),
],
outline="magenta",
width=3,
)
draw.text((int(box.top_left.x), max(int(box.top_left.y) - 18, 0)), box.mention or "mixer", fill="magenta")
img.save(ANNOTATED_PATH)
print(f"Saved annotated kitchen frame to {ANNOTATED_PATH}")
Multi-image ICL
Separate cats and dogs by providing multiple annotated exemplars for each class.
import os
from pathlib import Path
from urllib.request import urlretrieve
from perceptron import annotate_image, bbox, configure, detect
from PIL import Image, ImageDraw
configure(
provider="perceptron",
api_key=os.getenv("PERCEPTRON_API_KEY", "<your_api_key_here>"),
)
ASSETS = [
("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/classA.jpg", "classA.jpg"),
("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/classB.webp", "classB.webp"),
("https://raw.githubusercontent.com/perceptron-ai-inc/perceptron/main/cookbook/_shared/assets/in-context-learning/multi/cat_dog_input.png", "cat_dog_input.png"),
]
for url, filename in ASSETS:
path = Path(filename)
if not path.exists():
urlretrieve(url, path)
# Annotate exemplars
cat_example = annotate_image(
"classA.jpg",
{"cat": [bbox(316, 136, 703, 906, mention="cat")]},
)
dog_example = annotate_image(
"classB.webp",
{"dog": [bbox(161, 48, 666, 980, mention="dog")]},
)
# Detect on the mixed scene
result = detect(
"cat_dog_input.png",
classes=["cat", "dog"],
expects="box",
examples=[cat_example, dog_example],
)
print(result.text)
img = Image.open("cat_dog_input.png").convert("RGB")
draw = ImageDraw.Draw(img)
pixel_boxes = result.points_to_pixels(width=img.width, height=img.height) or []
for box in pixel_boxes:
draw.rectangle(
[
int(box.top_left.x),
int(box.top_left.y),
int(box.bottom_right.x),
int(box.bottom_right.y),
],
outline="lime",
width=3,
)
draw.text((int(box.top_left.x), max(int(box.top_left.y) - 18, 0)), box.mention or "class", fill="lime")
img.save("cat_dog_input_annotated.png")
print("Saved annotated output to cat_dog_input_annotated.png")
Key parameters (single or multi-image)
| Parameter | Type | Purpose |
|---|
classes | list[str] | Logical labels shared by exemplars and target detections |
examples | list[dict] | Annotated frames generated via annotate_image() |
expects | str | Geometry to return ("box", "point", "polygon") |
Annotate each exemplar with bbox() coordinates using the normalized 0–1000 grid. Bootstrap exemplars by hand or reuse accurate detections from a prior dry run.
Best practices
- Use clean exemplars; quality beats quantity.
- Balance classes when using multiple examples; include varied angles or lighting.
- Call out contrasts in text or mentions (“highlight the red mixer, ignore toasters”; “striped cat vs. harnessed dog”).
- Teach one concept per call when possible; issue separate calls for additional objects.
- Validate outputs with follow-up Q&A or OCR when you need certainty.