we used following script for image classification and to get dump file in 10K imagenet 2012 validation images. In order to get exact result as reported in paper, I think we need to figure out what is missing in this script.
#include <time.h>
#include <sys/time.h>
#include <algorithm>
#include <string>
#include <cstdlib>
#include <stdio.h>
#include "float.h"
#include <cmath>
#include <stdlib.h>
#include <fstream>
#include <sstream>
#include <iostream>
#include <iomanip>
#include <unistd.h>
#include "opencv2/opencv.hpp"
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <vector>
using namespace cv;
using namespace dnn;
using namespace std;
const char* keys =
"{ help h | | Print help message. }"
"{ input i | | Path to input test image file. }"
"{ model m | | Path to a binary file of model contains trained weights. }"
"{ config c | | Path to a text file of model contains network configuration.}"
"{ scale | | scale factor. }"
"{ mean | | mean rgb values. }"
"{ width | | input width. }"
"{ height | | input height. }"
"{ classes | | Optional path to a text file with names of classes. }"
"{ backend | 0 | Choose one of computation backends: "
"0: automatically (by default, only this option is available now, to use other backend you have to rebuild opencv), "
"1: Halide language (http://halide-lang.org/), "
"2: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
"3: OpenCV implementation }"
"{ target | 0 | Choose one of target computation devices: "
"0: CPU target (by default, only this option is available now, to use other backend you have to rebuild opencv), "
"1: OpenCL, "
"2: OpenCL fp16 (half-float precision), "
"3: VPU }";
double what_time_is_it_now()
{
struct timeval time;
if (gettimeofday(&time,NULL)){
return 0;
}
return (double)time.tv_sec + (double)time.tv_usec * .000001;
}
void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
{
// Get the first occurrence
size_t pos = data.find(toSearch);
// Repeat till end is reached
while( pos != std::string::npos)
{
// Replace this occurrence of Sub String
data.replace(pos, toSearch.size(), replaceStr);
// Get the next occurrence from the current position
pos =data.find(toSearch, pos + replaceStr.size());
}
}
float get_temp()
{
remove( "temp.txt" );
system("/usr/bin/vcgencmd measure_temp >> temp.txt");
ifstream readFrom("temp.txt");
std::string line;
getline(readFrom, line);
findAndReplaceAll(line, "temp=", "");
findAndReplaceAll(line, "'C", "");
return atof(line.c_str());
}
std::string get_file_name_from_path(std::string path_name)
{
std::string base_filename = path_name.substr(path_name.find_last_of("/\\") + 1);
std::string::size_type const p(base_filename.find_last_of('.'));
std::string file_without_extension = base_filename.substr(0, p);
return file_without_extension;
}
std::vector<std::string> classes;
int main(int argc, char **argv)
{
CommandLineParser parser(argc, argv, keys);
parser.about("Use this script to run object detection deep learning networks using OpenCV.");
if (argc == 1 || parser.has("help"))
{
parser.printMessage();
return 0;
}
cout << "OpenCV version : " << CV_VERSION << endl;
String model = parser.get<String>("model");
String config = parser.get<String>("config");
float scale = parser.get<float>("scale");
Scalar mean = parser.get<Scalar>("mean");
//cout << scale << mean << endl;
int backendId = parser.get<int>("backend");
int targetId = parser.get<int>("target");
Net net = readNet(model, config);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
String image_dir = parser.get<String>("input");
int inpWidth = parser.get<int>("width");
int inpHeight = parser.get<int>("height");
// Open file with classes names.
if (parser.has("classes"))
{
std::string file = parser.get<String>("classes");
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
{
classes.push_back(line);
}
}
// Create a 4D blob from a frame.
Size inpSize(inpWidth, inpHeight);
std::vector<cv::String> imagenames;
cv::glob(image_dir, imagenames);
// dump predictions in file
std::string file_without_extension = get_file_name_from_path(model);
std::string filename = file_without_extension + "_imagenet.txt";
ofstream myfile(filename.c_str(), std::ios_base::out);
Mat frame_org, blob;
for(int i=0; i<imagenames.size(); i++)
//for(int i=0; i<10; i++)
{
if(i%50 == 0 && get_temp() > 55.0) sleep(5);
frame_org = cv::imread(imagenames[i]);
imagenames[i] = get_file_name_from_path(imagenames[i]);
blobFromImage(frame_org, blob, scale, inpSize, Scalar(127.5, 127.5, 127.5), true, false);
// run a model.
net.setInput(blob);
Mat out = net.forward().reshape(1, 1);
/*Point classIdPoint;
double confidence;
minMaxLoc(out, 0, &confidence, 0, &classIdPoint);
int classId = classIdPoint.x;
imagenames[i] = get_file_name_from_path(imagenames[i]);
std::string label = format("%s: %s, %.4f", imagenames[i].c_str(), (classes.empty() ? format("Class #%d", classId).c_str() : classes[classId].c_str()), confidence);
// print top 1 result
cout << label << endl;
*/
myfile << imagenames[i] << " : ";
float *data = (float*)out.data;
for (int i = 0; i < out.size().width; i++) {
myfile << data[i] << " ";
}
myfile << endl;
if((i+1)%100==0) cout << "Completed: " << i+1 << "/" << imagenames.size() << endl;
}
myfile.close();
return 0;
}