https://github.com/spmallick/learnopencv/tree/master/ Detection-YOLO

硬件信息
8 Intel® Core™ i7-4790 CPU @ 3.60GHz

每张图像检测时间约 280ms

opencv 中的GPU 目前只支持 intel GPU

yolov3.cpp

// This code is written at BigVision LLC. It is  d on the OpenCV project. It is subject to the license terms in the LICENSE file found in this distribution and at http://opencv.org/license.html

// Usage example:  ./ _detection_yolo.out --video=run.mp4
//                 ./ _detection_yolo.out --image=bird.jpg
#include <fstream>
#include <sstream>
#include <iostream>

#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>

const char* keys =
\"{help h usage ? | | Usage examples: \\n\\t\\t./ _detection_yolo.out --image=dog.jpg \\n\\t\\t./ _detection_yolo.out --video=run_sm.mp4}\"
\"{image i        |<none>| input image   }\"
\"{video v       |<none>| input video   }\"
;
using namespace cv;
using namespace dnn;
using namespace std;

// Initialize the parameters
float confThreshold = 0.5; // Confidence threshold
float nmsThreshold = 0.4;  // Non-maximum suppression threshold
int inpWidth = 416;  // Width of network\'s input image
int inpHeight = 416; // Height of network\'s input image
vector<string> classes;

// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(Mat&  , const vector<Mat>& out);

// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat&  );

// Get the names of the output  s
vector<String> getOutputsNames(const Net& net);

int main(int argc, char** argv)
{
    CommandLineParser parser(argc, argv, keys);
    parser.about(\"Use this   to run   detection using YOLO3 in OpenCV.\");
    if (parser.has(\"help\"))
    {
        parser.printMessage();
        return 0;
    }
    // Load names of classes
    string classesFile = \"coco.names\";
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classes.push_back(line);
    
    // Give the configuration and weight files for the model
    String modelConfiguration = \"yolov3.cfg\";
    String modelWeights = \"yolov3.weights\";

    // Load the network
    Net net = readNetFromDarknet(modelConfiguration, modelWeights);
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);  //DNN_TARGET_OPENCL
    
    // Open a video file or an image file or a camera stream.
    string str, outputFile;
    VideoCapture cap;
    VideoWriter video;
    Mat  , blob;
    
    try {
        
        outputFile = \"yolo_out_cpp.avi\";
        if (parser.has(\"image\"))
        {
            // Open the image file
            str = parser.get<String>(\"image\");
            ifstream ifile(str);
            if (!ifile) throw(\"error\");
            cap.open(str);
            str.replace(str.end()-4, str.end(), \"_yolo_out_cpp.jpg\");
            outputFile = str;
        }
        else if (parser.has(\"video\"))
        {
            // Open the video file
            str = parser.get<String>(\"video\");
            ifstream ifile(str);
            if (!ifile) throw(\"error\");
            cap.open(str);
            str.replace(str.end()-4, str.end(), \"_yolo_out_cpp.avi\");
            outputFile = str;
        }
        // Open the webcaom
        else cap.open(parser.get<int>(\"device\"));
        
    }
    catch(...) {
        cout << \"Could not open the input image/video stream\" << endl;
        return 0;
    }
    
    // Get the video writer initialized to save the output video
    if (!parser.has(\"image\")) {
        video.open(outputFile, VideoWriter::fourcc(\'M\',\'J\',\'P\',\'G\'), 28, Size(cap.get(CAP_PROP_ _WIDTH), cap.get(CAP_PROP_ _HEIGHT)));
    }
    
    // Create a window
    static const string kWinName = \"Deep learning   detection in OpenCV\";
    namedWindow(kWinName, WINDOW_NORMAL);

    // Process  s.
    while (waitKey(1) < 0)
    {
        // get   from the video
        cap >>  ;

        // Stop the program if reached end of video
        if ( .empty()) {
            cout << \"Done processing !!!\" << endl;
            cout << \"Output file is stored as \" << outputFile << endl;
            waitKey(3000);
            break;
        }
        // Create a 4D blob from a  .
        blobFromImage( , blob, 1/255.0, cv::Size(inpWidth, inpHeight), Scalar(0,0,0), true, false);
        
        //Sets the input to the network
        net.setInput(blob);
        
        // Runs the forward pass to get output of the output  s
        vector<Mat> outs;
        net.forward(outs, getOutputsNames(net));
        
        // Remove the bounding boxes with low confidence
        postprocess( , outs);
        
        // Put efficiency information. The function getPerfProfile returns the overall time for inference(t) and the timings for each of the  s(in  sTimes)
        vector<double>  sTimes;
        double freq = getTickFrequency() / 1000;
        double t = net.getPerfProfile( sTimes) / freq;
        string label = format(\"Inference time for a   : %.2f ms\", t);
        putText( , label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));
        
        // Write the   with the detection boxes
        Mat detected ;
         .convertTo(detected , CV_8U);
        if (parser.has(\"image\")) imwrite(outputFile, detected );
        else video.write(detected );
        
        imshow(kWinName,  );
        
    }
    
    cap.release();
    if (!parser.has(\"image\")) video.release();

    return 0;
}

// Remove the bounding boxes with low confidence using non-maxima suppression
void postprocess(Mat&  , const vector<Mat>& outs)
{
    vector<int> classIds;
    vector<float> confidences;
    vector<Rect> boxes;
    
    for (size_t i = 0; i < outs.size(); ++i)
    {
        // Scan through all the bounding boxes output from the network and keep only the
        // ones with high confidence scores. Assign the box\'s class label as the class
        // with the highest score for the box.
        float* data = (float*)outs[i].data;
        for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
        {
            Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
            Point classIdPoint;
            double confidence;
            // Get the value and location of the maximum score
            minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
            if (confidence > confThreshold)
            {
                int centerX = (int)(data[0] *  .cols);
                int centerY = (int)(data[1] *  .rows);
                int width = (int)(data[2] *  .cols);
                int height = (int)(data[3] *  .rows);
                int left = centerX - width / 2;
                int top = centerY - height / 2;
                
                classIds.push_back(classIdPoint.x);
                confidences.push_back((float)confidence);
                boxes.push_back(Rect(left, top, width, height));
            }
        }
    }
    
    // Perform non maximum suppression to eliminate redundant overlapping boxes with
    // lower confidences
    vector<int> indices;
    NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
    for (size_t i = 0; i < indices.size(); ++i)
    {
        int idx = indices[i];
        Rect box = boxes[idx];
        drawPred(classIds[idx], confidences[idx], box.x, box.y,
                 box.x + box.width, box.y + box.height,  );
    }
}

// Draw the predicted bounding box
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat&  )
{
    //Draw a rectangle displaying the bounding box
    rectangle( , Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);
    
    //Get the label for the class name and its confidence
    string label = format(\"%.2f\", conf);
    if (!classes.empty())
    {
        CV_Assert(classId < (int)classes.size());
        label = classes[classId] + \":\" + label;
    }
    
    //Display the label at the top of the bounding box
    int  Line;
    Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, & Line);
    top = max(top, labelSize.height);
    rectangle( , Point(left, top - round(1.5*labelSize.height)), Point(left + round(1.5*labelSize.width), top +  Line), Scalar(255, 255, 255), FILLED);
    putText( , label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0,0,0),1);
}

// Get the names of the output  s
vector<String> getOutputsNames(const Net& net)
{
    static vector<String> names;
    if (names.empty())
    {
        //Get the indices of the output  s, i.e. the  s with unconnected outputs
        vector<int> out s = net.getUnconnectedOut s();
        
        //get the names of all the  s in the network
        vector<String>  sNames = net.get Names();
        
        // Get the names of the output  s in names
        names.resize(out s.size());
        for (size_t i = 0; i < out s.size(); ++i)
        names[i] =  sNames[out s[i] - 1];
    }
    return names;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.4)
set(CMAKE_CXX_FLAGS \"-std=c++11\")
project( opencv_yolov3 )
set(OpenCV_DIR \"/home/zhangjun/SoftWare/opencv-4.0.0/build\")
find_package( OpenCV REQUIRED )
message(STATUS \"OpenCV_VERSION:\" ${OpenCV_VERSION})

add_executable( opencv_yolov3 yolov3.cpp )

target_ _libraries( opencv_yolov3 ${OpenCV_LIBS} )

cmake .
make
./opencv_yolov3 --image=12.jpg
./opencv_yolov3 --video=run.mp4

收藏 打印