1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
|
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/tracking/tracking_by_matching.hpp>
#include <iostream>
#ifdef HAVE_OPENCV_DNN
#include <opencv2/dnn.hpp>
using namespace std;
using namespace cv;
using namespace cv::detail::tracking;
using namespace cv::detail::tracking::tbm;
static const char* keys =
{ "{video_name | | video name }"
"{start_frame |0| Start frame }"
"{frame_step |1| Frame step }"
"{detector_model | | Path to detector's Caffe model }"
"{detector_weights | | Path to detector's Caffe weights }"
"{desired_class_id |-1| The desired class that should be tracked }"
};
static void help()
{
cout << "\nThis example shows the functionality of \"Tracking-by-Matching\" approach:"
" detector is used to detect objects on frames, \n"
"matching is used to find correspondences between new detections and tracked objects.\n"
"Detection is made by DNN detection network every `--frame_step` frame.\n"
"Point a .prototxt file of the network as the parameter `--detector_model`, and a .caffemodel file"
" as the parameter `--detector_weights`.\n"
"(As an example of such detection network is a popular MobileNet_SSD network trained on VOC dataset.)\n"
"If `--desired_class_id` parameter is set, the detection result is filtered by class id,"
" returned by the detection network.\n"
"(That is, if a detection net was trained on VOC dataset, then to track pedestrians point --desired_class_id=15)\n"
"Example of <video_name> is in opencv_extra/testdata/cv/tracking/\n"
"Call:\n"
"./example_tracking_tracking_by_matching --video_name=<video_name> --detector_model=<detector_model_path> --detector_weights=<detector_weights_path> \\\n"
" [--start_frame=<start_frame>] \\\n"
" [--frame_step=<frame_step>] \\\n"
" [--desired_class_id=<desired_class_id>]\n"
<< endl;
cout << "\n\nHot keys: \n"
"\tq - quit the program\n"
"\tp - pause/resume video\n";
}
cv::Ptr<ITrackerByMatching> createTrackerByMatchingWithFastDescriptor();
class DnnObjectDetector
{
public:
DnnObjectDetector(const String& net_caffe_model_path, const String& net_caffe_weights_path,
int desired_class_id=-1,
float confidence_threshold = 0.2,
//the following parameters are default for popular MobileNet_SSD caffe model
const String& net_input_name="data",
const String& net_output_name="detection_out",
double net_scalefactor=0.007843,
const Size& net_size = Size(300,300),
const Scalar& net_mean = Scalar(127.5, 127.5, 127.5),
bool net_swapRB=false)
:desired_class_id(desired_class_id),
confidence_threshold(confidence_threshold),
net_input_name(net_input_name),
net_output_name(net_output_name),
net_scalefactor(net_scalefactor),
net_size(net_size),
net_mean(net_mean),
net_swapRB(net_swapRB)
{
net = dnn::readNetFromCaffe(net_caffe_model_path, net_caffe_weights_path);
if (net.empty())
CV_Error(Error::StsError, "Cannot read Caffe net");
}
TrackedObjects detect(const cv::Mat& frame, int frame_idx)
{
Mat resized_frame;
resize(frame, resized_frame, net_size);
Mat inputBlob = cv::dnn::blobFromImage(resized_frame, net_scalefactor, net_size, net_mean, net_swapRB);
net.setInput(inputBlob, net_input_name);
Mat detection = net.forward(net_output_name);
Mat detection_as_mat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
TrackedObjects res;
for (int i = 0; i < detection_as_mat.rows; i++)
{
float cur_confidence = detection_as_mat.at<float>(i, 2);
int cur_class_id = static_cast<int>(detection_as_mat.at<float>(i, 1));
int x_left = static_cast<int>(detection_as_mat.at<float>(i, 3) * frame.cols);
int y_bottom = static_cast<int>(detection_as_mat.at<float>(i, 4) * frame.rows);
int x_right = static_cast<int>(detection_as_mat.at<float>(i, 5) * frame.cols);
int y_top = static_cast<int>(detection_as_mat.at<float>(i, 6) * frame.rows);
Rect cur_rect(x_left, y_bottom, (x_right - x_left), (y_top - y_bottom));
if (cur_confidence < confidence_threshold)
continue;
if ((desired_class_id >= 0) && (cur_class_id != desired_class_id))
continue;
//clipping by frame size
cur_rect = cur_rect & Rect(Point(), frame.size());
if (cur_rect.empty())
continue;
TrackedObject cur_obj(cur_rect, cur_confidence, frame_idx, -1);
res.push_back(cur_obj);
}
return res;
}
private:
cv::dnn::Net net;
int desired_class_id;
float confidence_threshold;
String net_input_name;
String net_output_name;
double net_scalefactor;
Size net_size;
Scalar net_mean;
bool net_swapRB;
};
cv::Ptr<ITrackerByMatching>
createTrackerByMatchingWithFastDescriptor() {
tbm::TrackerParams params;
cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatching(params);
std::shared_ptr<IImageDescriptor> descriptor_fast =
std::make_shared<ResizedImageDescriptor>(
cv::Size(16, 32), cv::InterpolationFlags::INTER_LINEAR);
std::shared_ptr<IDescriptorDistance> distance_fast =
std::make_shared<MatchTemplateDistance>();
tracker->setDescriptorFast(descriptor_fast);
tracker->setDistanceFast(distance_fast);
return tracker;
}
int main( int argc, char** argv ){
CommandLineParser parser( argc, argv, keys );
cv::Ptr<ITrackerByMatching> tracker = createTrackerByMatchingWithFastDescriptor();
String video_name = parser.get<String>("video_name");
int start_frame = parser.get<int>("start_frame");
int frame_step = parser.get<int>("frame_step");
String detector_model = parser.get<String>("detector_model");
String detector_weights = parser.get<String>("detector_weights");
int desired_class_id = parser.get<int>("desired_class_id");
if( video_name.empty() || detector_model.empty() || detector_weights.empty() )
{
help();
return -1;
}
//open the capture
VideoCapture cap;
cap.open( video_name );
cap.set( CAP_PROP_POS_FRAMES, start_frame );
if( !cap.isOpened() )
{
help();
cout << "***Could not initialize capturing...***\n";
cout << "Current parameter's value: \n";
parser.printMessage();
return -1;
}
// If you use the popular MobileNet_SSD detector, the default parameters may be used.
// Otherwise, set your own parameters (net_mean, net_scalefactor, etc).
DnnObjectDetector detector(detector_model, detector_weights, desired_class_id);
Mat frame;
namedWindow( "Tracking by Matching", 1 );
int frame_counter = -1;
int64 time_total = 0;
bool paused = false;
for ( ;; )
{
if( paused )
{
char c = (char) waitKey(30);
if (c == 'p')
paused = !paused;
if (c == 'q')
break;
continue;
}
cap >> frame;
if(frame.empty()){
break;
}
frame_counter++;
if (frame_counter < start_frame)
continue;
if (frame_counter % frame_step != 0)
continue;
int64 frame_time = getTickCount();
TrackedObjects detections = detector.detect(frame, frame_counter);
// timestamp in milliseconds
uint64_t cur_timestamp = static_cast<uint64_t>(1000.0 / 30 * frame_counter);
tracker->process(frame, detections, cur_timestamp);
frame_time = getTickCount() - frame_time;
time_total += frame_time;
// Drawing colored "worms" (tracks).
frame = tracker->drawActiveTracks(frame);
// Drawing all detected objects on a frame by BLUE COLOR
for (const auto &detection : detections) {
cv::rectangle(frame, detection.rect, cv::Scalar(255, 0, 0), 3);
}
// Drawing tracked detections only by RED color and print ID and detection
// confidence level.
for (const auto &detection : tracker->trackedDetections()) {
cv::rectangle(frame, detection.rect, cv::Scalar(0, 0, 255), 3);
std::string text = std::to_string(detection.object_id) +
" conf: " + std::to_string(detection.confidence);
cv::putText(frame, text, detection.rect.tl(), cv::FONT_HERSHEY_COMPLEX,
1.0, cv::Scalar(0, 0, 255), 3);
}
imshow( "Tracking by Matching", frame );
char c = (char) waitKey( 2 );
if (c == 'q')
break;
if (c == 'p')
paused = !paused;
}
double s = frame_counter / (time_total / getTickFrequency());
printf("FPS: %f\n", s);
return 0;
}
#else // #ifdef HAVE_OPENCV_DNN
int main(int, char**){
CV_Error(cv::Error::StsNotImplemented, "At the moment the sample 'tracking_by_matching' can work only when opencv_dnn module is built.");
}
#endif // #ifdef HAVE_OPENCV_DNN
|