main

分支 (1)

管理

管理

main

camera-start-up-code-samples
/
OAK-D-Pro
/
depth_align.py

#!/usr/bin/env python3
# coding=utf-8

import cv2
import depthai as dai
import numpy as np

# Weights to use when blending depth/rgb image (should equal 1.0)
rgbWeight = 0.4
depthWeight = 0.6

topLeft = dai.Point2f(0.4, 0.4)
bottomRight = dai.Point2f(0.6, 0.6)


def updateBlendWeights(percent_rgb):
    """
    Update the rgb and depth weights used to blend depth/rgb image

    @param[in] percent_rgb The rgb weight expressed as a percentage (0..100)
    """
    global depthWeight
    global rgbWeight
    rgbWeight = float(percent_rgb) / 100.0
    depthWeight = 1.0 - rgbWeight


def create_pipeline(device):
    monoResolution = dai.MonoCameraProperties.SensorResolution.THE_720_P
    # Create pipeline
    pipeline = dai.Pipeline()

    # Define sources and outputs
    camRgb = pipeline.create(dai.node.ColorCamera)
    left = pipeline.create(dai.node.MonoCamera)
    right = pipeline.create(dai.node.MonoCamera)
    stereo = pipeline.create(dai.node.StereoDepth)
    spatialLocationCalculator = pipeline.create(dai.node.SpatialLocationCalculator)

    rgbOut = pipeline.create(dai.node.XLinkOut)
    disparityOut = pipeline.create(dai.node.XLinkOut)

    xoutSpatialData = pipeline.create(dai.node.XLinkOut)
    xinSpatialCalcConfig = pipeline.create(dai.node.XLinkIn)

    rgbOut.setStreamName("rgb")
    disparityOut.setStreamName("disp")

    xoutSpatialData.setStreamName("spatialData")
    xinSpatialCalcConfig.setStreamName("spatialCalcConfig")

    # Properties
    camRgb.setBoardSocket(dai.CameraBoardSocket.RGB)
    camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
    camRgb.setFps(30)
    camRgb.setIspScale(2, 3)
    # For now, RGB needs fixed focus to properly align with depth.
    # This value was used during calibration
    try:
        calibData = device.readCalibration2()
        lensPosition = calibData.getLensPosition(dai.CameraBoardSocket.RGB)
        if lensPosition:
            camRgb.initialControl.setManualFocus(lensPosition)
    except:
        raise
    left.setResolution(monoResolution)
    left.setBoardSocket(dai.CameraBoardSocket.LEFT)
    left.setFps(30)
    right.setResolution(monoResolution)
    right.setBoardSocket(dai.CameraBoardSocket.RIGHT)
    right.setFps(30)

    stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
    # LR-check is required for depth alignment
    stereo.setLeftRightCheck(True)
    stereo.setDepthAlign(dai.CameraBoardSocket.RGB)

    # Config

    config = dai.SpatialLocationCalculatorConfigData()
    config.depthThresholds.lowerThreshold = 100
    config.depthThresholds.upperThreshold = 10000
    config.roi = dai.Rect(topLeft, bottomRight)

    spatialLocationCalculator.inputConfig.setWaitForMessage(False)
    spatialLocationCalculator.initialConfig.addROI(config)

    # Linking
    camRgb.isp.link(rgbOut.input)

    left.out.link(stereo.left)
    right.out.link(stereo.right)

    stereo.disparity.link(disparityOut.input)
    stereo.depth.link(spatialLocationCalculator.inputDepth)

    # spatialLocationCalculator.passthroughDepth.link(spatialLocationCalculator.inputDepth)
    spatialLocationCalculator.out.link(xoutSpatialData.input)

    xinSpatialCalcConfig.out.link(spatialLocationCalculator.inputConfig)

    return pipeline, stereo.initialConfig.getMaxDisparity()


def check_input(roi, frame, DELTA=5):
    """
    Check if input is ROI or point. If point, convert to ROI
    """
    # Limit the point so ROI won't be outside the frame
    if len(roi) == 2:
        if len(roi[0]) == 2:
            roi = np.array(roi) + [[-DELTA, -DELTA], [DELTA, DELTA]]
        else:
            roi = np.array([roi, roi]) + [[-DELTA, -DELTA], [DELTA, DELTA]]
    elif len(roi) == 4:
        roi = np.array(roi) + [[-DELTA, -DELTA], [DELTA, DELTA]]

    roi.clip([DELTA, DELTA], [frame.shape[1] - DELTA, frame.shape[0] - DELTA])

    return roi / frame.shape[1::-1]


def click_and_crop(event, x, y, flags, param):
    # grab references to the global variables
    global refPt, click_roi
    # if the left mouse button was clicked, record the starting
    # (x, y) coordinates and indicate that cropping is being
    # performed
    if event == cv2.EVENT_LBUTTONDOWN:
        refPt = [(x, y)]
    # check to see if the left mouse button was released
    elif event == cv2.EVENT_LBUTTONUP:
        # record the ending (x, y) coordinates and indicate that
        # the cropping operation is finished
        refPt.append((x, y))
        refPt = np.array(refPt)
        click_roi = np.array([np.min(refPt, axis=0), np.max(refPt, axis=0)])


def run():
    global refPt, click_roi
    # Connect to device and start pipeline
    with dai.Device() as device:
        pipeline,  maxDisparity = create_pipeline(device)
        device.startPipeline(pipeline)

        frameRgb = None
        frameDisp = None
        depthDatas = []
        stepSize = 0.01
        newConfig = False

        # Configure windows; trackbar adjusts blending ratio of rgb/depth
        rgbWindowName = "rgb"
        depthWindowName = "depth"
        blendedWindowName = "rgb-depth"
        cv2.namedWindow(rgbWindowName)
        cv2.namedWindow(depthWindowName)
        cv2.namedWindow(blendedWindowName)
        cv2.createTrackbar(
            "RGB Weight %",
            blendedWindowName,
            int(rgbWeight * 100),
            100,
            updateBlendWeights,
        )
        cv2.setMouseCallback(rgbWindowName, click_and_crop)
        cv2.setMouseCallback(depthWindowName, click_and_crop)
        cv2.setMouseCallback(blendedWindowName, click_and_crop)

        print("Use WASD keys to move ROI!")

        spatialCalcConfigInQueue = device.getInputQueue("spatialCalcConfig")
        imageQueue = device.getOutputQueue("rgb")
        dispQueue = device.getOutputQueue("disp")
        spatialDataQueue = device.getOutputQueue("spatialData")

        def drawText(frame, text, org, color=(255, 255, 255)):
            cv2.putText(
                frame,
                text,
                org,
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 0, 0),
                4,
                cv2.LINE_AA,
            )
            cv2.putText(
                frame, text, org, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA
            )

        def drawSpatialLocations(frame, spatialLocations):
            for depthData in spatialLocations:
                roi = depthData.config.roi
                roi = roi.denormalize(width=frame.shape[1], height=frame.shape[0])
                xmin = int(roi.topLeft().x)
                ymin = int(roi.topLeft().y)
                xmax = int(roi.bottomRight().x)
                ymax = int(roi.bottomRight().y)

                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 0), 4)
                cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (255, 255, 255), 1)
                drawText(
                    frame,
                    f"X: {int(depthData.spatialCoordinates.x)} mm",
                    (xmin + 10, ymin + 20),
                )
                drawText(
                    frame,
                    f"Y: {int(depthData.spatialCoordinates.y)} mm",
                    (xmin + 10, ymin + 35),
                )
                drawText(
                    frame,
                    f"Z: {int(depthData.spatialCoordinates.z)} mm",
                    (xmin + 10, ymin + 50),
                )

        while not device.isClosed():
            imageData = imageQueue.tryGet()
            dispData = dispQueue.tryGet()
            spatialData = spatialDataQueue.tryGet()

            if spatialData is not None:
                depthDatas = spatialData.getSpatialLocations()

            if imageData is not None:
                frameRgb = imageData.getCvFrame()
                drawSpatialLocations(frameRgb, depthDatas)

                cv2.imshow(rgbWindowName, frameRgb)

            if dispData is not None:
                frameDisp = dispData.getFrame()
                frameDisp = (frameDisp * (255 / maxDisparity)).astype(np.uint8)
                frameDisp = cv2.applyColorMap(frameDisp, cv2.COLORMAP_JET)
                frameDisp = np.ascontiguousarray(frameDisp)
                drawSpatialLocations(frameDisp, depthDatas)

                cv2.imshow(depthWindowName, frameDisp)

            # Blend when both received
            if frameRgb is not None and frameDisp is not None:
                # Need to have both frames in BGR format before blending
                if len(frameDisp.shape) < 3:
                    frameDisp = cv2.cvtColor(frameDisp, cv2.COLOR_GRAY2BGR)
                blended = cv2.addWeighted(
                    frameRgb, rgbWeight, frameDisp, depthWeight, 0
                )

                drawSpatialLocations(blended, depthDatas)

                if click_roi is not None:
                    [topLeft.x, topLeft.y], [bottomRight.x, bottomRight.y] = check_input(click_roi, blended)
                    click_roi = None
                    newConfig = True

                cv2.imshow(blendedWindowName, blended)
                frameRgb = None
                frameDisp = None
                depthDatas = []

            key = cv2.waitKey(1)
            if key == ord("q"):
                break
            elif key == ord("w"):
                if topLeft.y - stepSize >= 0:
                    topLeft.y -= stepSize
                    bottomRight.y -= stepSize
                    newConfig = True
            elif key == ord("a"):
                if topLeft.x - stepSize >= 0:
                    topLeft.x -= stepSize
                    bottomRight.x -= stepSize
                    newConfig = True
            elif key == ord("s"):
                if bottomRight.y + stepSize <= 1:
                    topLeft.y += stepSize
                    bottomRight.y += stepSize
                    newConfig = True
            elif key == ord("d"):
                if bottomRight.x + stepSize <= 1:
                    topLeft.x += stepSize
                    bottomRight.x += stepSize
                    newConfig = True

            if newConfig:
                config = dai.SpatialLocationCalculatorConfigData()
                config.depthThresholds.lowerThreshold = 100
                config.depthThresholds.upperThreshold = 10000
                config.roi = dai.Rect(topLeft, bottomRight)
                config.calculationAlgorithm = (
                    dai.SpatialLocationCalculatorAlgorithm.AVERAGE
                )
                cfg = dai.SpatialLocationCalculatorConfig()
                cfg.addROI(config)
                spatialCalcConfigInQueue.send(cfg)
                newConfig = False


if __name__ == "__main__":
    refPt = None
    click_roi = None
    run()