File: js_object_detection_with_camera.html

package info (click to toggle)
opencv 4.6.0%2Bdfsg-12
links: PTS, VCS
area: main
in suites: bookworm
size: 276,172 kB
sloc: cpp: 1,079,020; xml: 682,526; python: 43,885; lisp: 30,943; java: 25,642; ansic: 7,968; javascript: 5,956; objc: 2,039; sh: 1,017; cs: 601; perl: 494; makefile: 179
file content (402 lines) | stat: -rw-r--r-- 14,555 bytes
parent folder | download | duplicates (3)
<!DOCTYPE html>
<html>

<head>
    <meta charset="utf-8">
    <title>Object Detection Example with Camera</title>
    <link href="js_example_style.css" rel="stylesheet" type="text/css" />
</head>

<body>
<h2>Object Detection Example with Camera </h2>
<p>
    This tutorial shows you how to write an object detection example with camera.<br>
    To try the example you should click the <b>modelFile</b> button(and <b>configInput</b> button if needed) to upload inference model.
    You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
    Then You should change the parameters in the first code snippet according to the uploaded model.
    Finally click <b>Start/Stop</b> button to start or stop the camera capture.<br>
</p>

<div class="control"><button id="startAndStop" disabled>Start</button></div>
<div>
    <table cellpadding="0" cellspacing="0" width="0" border="0">
        <tr>
            <td>
                <video id="videoInput" width="400" height="400"></video>
            </td>
            <td>
                <canvas id="canvasOutput" style="visibility: hidden;" width="400" height="400"></canvas>
            </td>
        </tr>
        <tr>
            <td>
                <div class="caption">
                    videoInput
                </div>
            </td>
            <td>
                <p id='status' align="left"></p>
            </td>
        </tr>
        <tr>
            <td>
                <div class="caption">
                    modelFile <input type="file" id="modelFile" name="file">
                </div>
            </td>
        </tr>
        <tr>
            <td>
                <div class="caption">
                    configFile <input type="file" id="configFile">
                </div>
            </td>
        </tr>
    </table>
</div>

<div>
    <p class="err" id="errorMessage"></p>
</div>

<div>
    <h3>Help function</h3>
    <p>1.The parameters for model inference which you can modify to investigate more models.</p>
    <textarea class="code" rows="15" cols="100" id="codeEditor" spellcheck="false"></textarea>
    <p>2.The function to capture video from camera, and the main loop in which will do inference once.</p>
    <textarea class="code" rows="34" cols="100" id="codeEditor1" spellcheck="false"></textarea>
    <p>3.Load labels from txt file and process it into an array.</p>
    <textarea class="code" rows="7" cols="100" id="codeEditor2" spellcheck="false"></textarea>
    <p>4.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
    <textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
    <p>5.Fetch model file and save to emscripten file system once click the input button.</p>
    <textarea class="code" rows="17" cols="100" id="codeEditor4" spellcheck="false"></textarea>
    <p>6.The post-processing, including get boxes from output and draw boxes into the image.</p>
    <textarea class="code" rows="35" cols="100" id="codeEditor5" spellcheck="false"></textarea>
</div>

<div id="appendix">
    <h2>Model Info:</h2>
</div>

<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>

<script id="codeSnippet" type="text/code-snippet">
inputSize = [300, 300];
mean = [127.5, 127.5, 127.5];
std = 0.007843;
swapRB = false;
confThreshold = 0.5;
nmsThreshold = 0.4;

// the type of output, can be YOLO or SSD
outType = "SSD";

// url for label file, can from local or Internet
labelsUrl = "https://raw.githubusercontent.com/opencv/opencv/4.x/samples/data/dnn/object_detection_classes_pascal_voc.txt";
</script>

<script id="codeSnippet1" type="text/code-snippet">
let frame = new cv.Mat(videoInput.height, videoInput.width, cv.CV_8UC4);
let cap = new cv.VideoCapture(videoInput);

main = async function(frame) {
    const labels = await loadLables(labelsUrl);
    const input = getBlobFromImage(inputSize, mean, std, swapRB, frame);
    let net = cv.readNet(configPath, modelPath);
    net.setInput(input);
    const start = performance.now();
    const result = net.forward();
    const time  = performance.now()-start;
    const output = postProcess(result, labels, frame);

    updateResult(output, time);
    setTimeout(processVideo, 0);
    input.delete();
    net.delete();
    result.delete();
}

function processVideo() {
    try {
        if (!streaming) {
            return;
        }
        cap.read(frame);
        main(frame);
    } catch (err) {
        utils.printError(err);
    }
}

setTimeout(processVideo, 0);
</script>

<script id="codeSnippet5" type="text/code-snippet">
postProcess = function(result, labels, frame) {
    let canvasOutput = document.getElementById('canvasOutput');
    const outputWidth = canvasOutput.width;
    const outputHeight = canvasOutput.height;
    const resultData = result.data32F;

    // Get the boxes(with class and confidence) from the output
    let boxes = [];
    switch(outType) {
        case "YOLO": {
            const vecNum = result.matSize[0];
            const vecLength = result.matSize[1];
            const classNum = vecLength - 5;

            for (let i = 0; i < vecNum; ++i) {
                let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
                let scores = vector.slice(5, vecLength);
                let classId = scores.indexOf(Math.max(...scores));
                let confidence = scores[classId];
                if (confidence > confThreshold) {
                    let center_x = Math.round(vector[0] * outputWidth);
                    let center_y = Math.round(vector[1] * outputHeight);
                    let width = Math.round(vector[2] * outputWidth);
                    let height = Math.round(vector[3] * outputHeight);
                    let left = Math.round(center_x - width / 2);
                    let top = Math.round(center_y - height / 2);

                    let box = {
                        scores: scores,
                        classId: classId,
                        confidence: confidence,
                        bounding: [left, top, width, height],
                        toDraw: true
                    }
                    boxes.push(box);
                }
            }

            // NMS(Non Maximum Suppression) algorithm
            let boxNum = boxes.length;
            let tmp_boxes = [];
            let sorted_boxes = [];
            for (let c = 0; c < classNum; ++c) {
                for (let i = 0; i < boxes.length; ++i) {
                    tmp_boxes[i] = [boxes[i], i];
                }
                sorted_boxes = tmp_boxes.sort((a, b) => { return (b[0].scores[c] - a[0].scores[c]); });
                for (let i = 0; i < boxNum; ++i) {
                    if (sorted_boxes[i][0].scores[c] === 0) continue;
                    else {
                        for (let j = i + 1; j < boxNum; ++j) {
                            if (IOU(sorted_boxes[i][0], sorted_boxes[j][0]) >= nmsThreshold) {
                                boxes[sorted_boxes[j][1]].toDraw = false;
                            }
                        }
                    }
                }
            }
        } break;
        case "SSD": {
            const vecNum = result.matSize[2];
            const vecLength = 7;

            for (let i = 0; i < vecNum; ++i) {
                let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
                let confidence = vector[2];
                if (confidence > confThreshold) {
                    let left, top, right, bottom, width, height;
                    left = Math.round(vector[3]);
                    top = Math.round(vector[4]);
                    right = Math.round(vector[5]);
                    bottom = Math.round(vector[6]);
                    width = right - left + 1;
                    height = bottom - top + 1;
                    if (width <= 2 || height <= 2) {
                        left = Math.round(vector[3] * outputWidth);
                        top = Math.round(vector[4] * outputHeight);
                        right = Math.round(vector[5] * outputWidth);
                        bottom = Math.round(vector[6] * outputHeight);
                        width = right - left + 1;
                        height = bottom - top + 1;
                    }
                    let box = {
                        classId: vector[1] - 1,
                        confidence: confidence,
                        bounding: [left, top, width, height],
                        toDraw: true
                    }
                    boxes.push(box);
                }
            }
        } break;
        default:
            console.error(`Unsupported output type ${outType}`)
    }

    // Draw the saved box into the image
    let output = new cv.Mat(outputWidth, outputHeight, cv.CV_8UC3);
    cv.cvtColor(frame, output, cv.COLOR_RGBA2RGB);
    let boxNum = boxes.length;
    for (let i = 0; i < boxNum; ++i) {
        if (boxes[i].toDraw) {
            drawBox(boxes[i]);
        }
    }

    return output;


    // Calculate the IOU(Intersection over Union) of two boxes
    function IOU(box1, box2) {
        let bounding1 = box1.bounding;
        let bounding2 = box2.bounding;
        let s1 = bounding1[2] * bounding1[3];
        let s2 = bounding2[2] * bounding2[3];

        let left1 = bounding1[0];
        let right1 = left1 + bounding1[2];
        let left2 = bounding2[0];
        let right2 = left2 + bounding2[2];
        let overlapW = calOverlap([left1, right1], [left2, right2]);

        let top1 = bounding2[1];
        let bottom1 = top1 + bounding1[3];
        let top2 = bounding2[1];
        let bottom2 = top2 + bounding2[3];
        let overlapH = calOverlap([top1, bottom1], [top2, bottom2]);

        let overlapS = overlapW * overlapH;
        return overlapS / (s1 + s2 + overlapS);
    }

    // Calculate the overlap range of two vector
    function calOverlap(range1, range2) {
        let min1 = range1[0];
        let max1 = range1[1];
        let min2 = range2[0];
        let max2 = range2[1];

        if (min2 > min1 && min2 < max1) {
            return max1 - min2;
        } else if (max2 > min1 && max2 < max1) {
            return max2 - min1;
        } else {
            return 0;
        }
    }

    // Draw one predict box into the origin image
    function drawBox(box) {
        let bounding = box.bounding;
        let left = bounding[0];
        let top = bounding[1];
        let width = bounding[2];
        let height = bounding[3];

        cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + height),
                             new cv.Scalar(0, 255, 0));
        cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + 15),
                             new cv.Scalar(255, 255, 255), cv.FILLED);
        let text = `${labels[box.classId]}: ${box.confidence.toFixed(4)}`;
        cv.putText(output, text, new cv.Point(left, top + 10), cv.FONT_HERSHEY_SIMPLEX, 0.3,
                                 new cv.Scalar(0, 0, 0));
    }
}
</script>

<script type="text/javascript">
    let jsonUrl = "js_object_detection_model_info.json";
    drawInfoTable(jsonUrl, 'appendix');

    let utils = new Utils('errorMessage');
    utils.loadCode('codeSnippet', 'codeEditor');
    utils.loadCode('codeSnippet1', 'codeEditor1');

    let loadLablesCode = 'loadLables = ' + loadLables.toString();
    document.getElementById('codeEditor2').value = loadLablesCode;
    let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
    document.getElementById('codeEditor3').value = getBlobFromImageCode;
    let loadModelCode = 'loadModel = ' + loadModel.toString();
    document.getElementById('codeEditor4').value = loadModelCode;

    utils.loadCode('codeSnippet5', 'codeEditor5');

    let videoInput = document.getElementById('videoInput');
    let streaming = false;
    let startAndStop = document.getElementById('startAndStop');
    startAndStop.addEventListener('click', () => {
        if (!streaming) {
            utils.clearError();
            utils.startCamera('qvga', onVideoStarted, 'videoInput');
        } else {
            utils.stopCamera();
            onVideoStopped();
        }
    });

    let configPath = "";
    let configFile = document.getElementById('configFile');
    configFile.addEventListener('change', async (e) => {
        initStatus();
        configPath = await loadModel(e);
        document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
    });

    let modelPath = "";
    let modelFile = document.getElementById('modelFile');
    modelFile.addEventListener('change', async (e) => {
        initStatus();
        modelPath = await loadModel(e);
        document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
        configPath = "";
        configFile.value = "";
    });

    utils.loadOpenCv(() => {
        startAndStop.removeAttribute('disabled');
    });

    var main = async function(frame) {};
    var postProcess = function(result, labels, frame) {};

    utils.executeCode('codeEditor1');
    utils.executeCode('codeEditor2');
    utils.executeCode('codeEditor3');
    utils.executeCode('codeEditor4');
    utils.executeCode('codeEditor5');

    function onVideoStarted() {
        streaming = true;
        startAndStop.innerText = 'Stop';
        videoInput.width = videoInput.videoWidth;
        videoInput.height = videoInput.videoHeight;
        utils.executeCode('codeEditor');
        utils.executeCode('codeEditor1');
    }

    function onVideoStopped() {
        streaming = false;
        startAndStop.innerText = 'Start';
        initStatus();
    }

    function updateResult(output, time) {
        try{
            let canvasOutput = document.getElementById('canvasOutput');
            canvasOutput.style.visibility = "visible";
            cv.imshow('canvasOutput', output);
            document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
                                                           <b>Inference time:</b> ${time.toFixed(2)} ms`;
        } catch(e) {
            console.log(e);
        }
    }

    function initStatus() {
        document.getElementById('status').innerHTML = '';
        document.getElementById('canvasOutput').style.visibility = "hidden";
        utils.clearError();
    }

</script>

</body>

</html>