I know that we can directly use “results.show()” to get the image with objects being figured out. But what if I want to just show some objects that have bigger confidence than a threshold? Then we need to fetch the results one by one manually:

import torch
import cv2

model = torch.hub.load('.', 'custom', path='best.pt', source='local')

colors = {
    14: (0,255,0),
    80: (0,0,255)

names = {
    14: "bird",
    80: "squirrel"

for index in ["1.jpeg", "2.jpeg", "4.jpeg", "7.jpeg", "3.webp", "5.webp", "6.webp", "8.png"]:
    img_name = f"squirrel_bird{index}"
    image = cv2.imread(img_name)
    results = model(img, size=960)
    for obj in results.pred[0]:
        x1, y1, x2, y2, conf, cat = obj.numpy()
        x1, y1, x2, y2, cat = int(x1), int(y1), int(x2), int(y2), int(cat)
        print(x1, y1, x2, y2, conf, cat)
        if conf > 0.581 and cat in colors.keys():
            cv2.rectangle(image, (x1, y1), (x2, y2), colors[cat], 2)
            cv2.putText(image, f"{names[cat]},{conf:.2f}", (x1, y1+12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors[cat], 1, 2)
    cv2.imshow("yolov5", image)

The key is to get tensors from “results.pred[0]”, and get coordinates/confidence/category from every tensor.