1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
|
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/objdetect.hpp>
#include <iostream>
using namespace cv;
using namespace std;
static
void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
{
std::string fpsString = cv::format("FPS : %.2f", (float)fps);
if (frame >= 0)
cout << "Frame " << frame << ", ";
cout << "FPS: " << fpsString << endl;
for (int i = 0; i < faces.rows; i++)
{
// Print results
cout << "Face " << i
<< ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
<< "box width: " << faces.at<float>(i, 2) << ", box height: " << faces.at<float>(i, 3) << ", "
<< "score: " << cv::format("%.2f", faces.at<float>(i, 14))
<< endl;
// Draw bounding box
rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
// Draw landmarks
circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
}
putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
}
int main(int argc, char** argv)
{
CommandLineParser parser(argc, argv,
"{help h | | Print this message}"
"{image1 i1 | | Path to the input image1. Omit for detecting through VideoCapture}"
"{image2 i2 | | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
"{video v | 0 | Path to the input video}"
"{scale sc | 1.0 | Scale factor used to resize input video frames}"
"{fd_model fd | face_detection_yunet_2021dec.onnx| Path to the model. Download yunet.onnx in https://github.com/opencv/opencv_zoo/tree/master/models/face_detection_yunet}"
"{fr_model fr | face_recognition_sface_2021dec.onnx | Path to the face recognition model. Download the model at https://github.com/opencv/opencv_zoo/tree/master/models/face_recognition_sface}"
"{score_threshold | 0.9 | Filter out faces of score < score_threshold}"
"{nms_threshold | 0.3 | Suppress bounding boxes of iou >= nms_threshold}"
"{top_k | 5000 | Keep top_k bounding boxes before NMS}"
"{save s | false | Set true to save results. This flag is invalid when using camera}"
);
if (parser.has("help"))
{
parser.printMessage();
return 0;
}
String fd_modelPath = parser.get<String>("fd_model");
String fr_modelPath = parser.get<String>("fr_model");
float scoreThreshold = parser.get<float>("score_threshold");
float nmsThreshold = parser.get<float>("nms_threshold");
int topK = parser.get<int>("top_k");
bool save = parser.get<bool>("save");
float scale = parser.get<float>("scale");
double cosine_similar_thresh = 0.363;
double l2norm_similar_thresh = 1.128;
//! [initialize_FaceDetectorYN]
// Initialize FaceDetectorYN
Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
//! [initialize_FaceDetectorYN]
TickMeter tm;
// If input is an image
if (parser.has("image1"))
{
String input1 = parser.get<String>("image1");
Mat image1 = imread(samples::findFile(input1));
if (image1.empty())
{
std::cerr << "Cannot read image: " << input1 << std::endl;
return 2;
}
int imageWidth = int(image1.cols * scale);
int imageHeight = int(image1.rows * scale);
resize(image1, image1, Size(imageWidth, imageHeight));
tm.start();
//! [inference]
// Set input size before inference
detector->setInputSize(image1.size());
Mat faces1;
detector->detect(image1, faces1);
if (faces1.rows < 1)
{
std::cerr << "Cannot find a face in " << input1 << std::endl;
return 1;
}
//! [inference]
tm.stop();
// Draw results on the input image
visualize(image1, -1, faces1, tm.getFPS());
// Save results if save is true
if (save)
{
cout << "Saving result.jpg...\n";
imwrite("result.jpg", image1);
}
// Visualize results
imshow("image1", image1);
pollKey(); // handle UI events to show content
if (parser.has("image2"))
{
String input2 = parser.get<String>("image2");
Mat image2 = imread(samples::findFile(input2));
if (image2.empty())
{
std::cerr << "Cannot read image2: " << input2 << std::endl;
return 2;
}
tm.reset();
tm.start();
detector->setInputSize(image2.size());
Mat faces2;
detector->detect(image2, faces2);
if (faces2.rows < 1)
{
std::cerr << "Cannot find a face in " << input2 << std::endl;
return 1;
}
tm.stop();
visualize(image2, -1, faces2, tm.getFPS());
if (save)
{
cout << "Saving result2.jpg...\n";
imwrite("result2.jpg", image2);
}
imshow("image2", image2);
pollKey();
//! [initialize_FaceRecognizerSF]
// Initialize FaceRecognizerSF
Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
//! [initialize_FaceRecognizerSF]
//! [facerecognizer]
// Aligning and cropping facial image through the first face of faces detected.
Mat aligned_face1, aligned_face2;
faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
// Run feature extraction with given aligned_face
Mat feature1, feature2;
faceRecognizer->feature(aligned_face1, feature1);
feature1 = feature1.clone();
faceRecognizer->feature(aligned_face2, feature2);
feature2 = feature2.clone();
//! [facerecognizer]
//! [match]
double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
//! [match]
if (cos_score >= cosine_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities;";
}
std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
if (L2_score <= l2norm_similar_thresh)
{
std::cout << "They have the same identity;";
}
else
{
std::cout << "They have different identities.";
}
std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
}
cout << "Press any key to exit..." << endl;
waitKey(0);
}
else
{
int frameWidth, frameHeight;
VideoCapture capture;
std::string video = parser.get<string>("video");
if (video.size() == 1 && isdigit(video[0]))
capture.open(parser.get<int>("video"));
else
capture.open(samples::findFileOrKeep(video)); // keep GStreamer pipelines
if (capture.isOpened())
{
frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
cout << "Video " << video
<< ": width=" << frameWidth
<< ", height=" << frameHeight
<< endl;
}
else
{
cout << "Could not initialize video capturing: " << video << "\n";
return 1;
}
detector->setInputSize(Size(frameWidth, frameHeight));
cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
int nFrame = 0;
for (;;)
{
// Get frame
Mat frame;
if (!capture.read(frame))
{
cerr << "Can't grab frame! Stop\n";
break;
}
resize(frame, frame, Size(frameWidth, frameHeight));
// Inference
Mat faces;
tm.start();
detector->detect(frame, faces);
tm.stop();
Mat result = frame.clone();
// Draw results on the input image
visualize(result, nFrame, faces, tm.getFPS());
// Visualize results
imshow("Live", result);
int key = waitKey(1);
bool saveFrame = save;
if (key == ' ')
{
saveFrame = true;
key = 0; // handled
}
if (saveFrame)
{
std::string frame_name = cv::format("frame_%05d.png", nFrame);
std::string result_name = cv::format("result_%05d.jpg", nFrame);
cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
imwrite(frame_name, frame);
imwrite(result_name, result);
}
++nFrame;
if (key > 0)
break;
}
cout << "Processed " << nFrame << " frames" << endl;
}
cout << "Done." << endl;
return 0;
}
|