220 lines
7.0 KiB
C++
220 lines
7.0 KiB
C++
/*!
|
|
* \file main.cpp
|
|
* \brief Main application file
|
|
*
|
|
* \author
|
|
* Christos Choutouridis AEM:8997
|
|
* <cchoutou@ece.auth.gr>
|
|
*/
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <exception>
|
|
#include <unistd.h>
|
|
#include <cstdio>
|
|
|
|
#include "matrix.hpp"
|
|
|
|
#include "v0.hpp"
|
|
#include "v1.hpp"
|
|
#include "utils.hpp"
|
|
#include "config.h"
|
|
|
|
// Global session data
|
|
session_t session;
|
|
Log logger;
|
|
Timing timer;
|
|
|
|
/*!
|
|
* A small command line argument parser
|
|
* \return The status of the operation
|
|
*/
|
|
bool get_options(int argc, char* argv[]){
|
|
bool status =true;
|
|
|
|
// iterate over the passed arguments
|
|
for (int i=1 ; i<argc ; ++i) {
|
|
std::string arg(argv[i]); // get current argument
|
|
|
|
if (arg == "-c" || arg == "--corpus") {
|
|
if (i+2 < argc) {
|
|
session.corpusMtxFile = std::string(argv[++i]);
|
|
session.corpusDataSet = std::string(argv[++i]);
|
|
}
|
|
else
|
|
status = false;
|
|
}
|
|
else if (arg == "-o" || arg == "--output") {
|
|
if (i+3 < argc) {
|
|
session.outMtxFile = std::string(argv[++i]);
|
|
session.outMtxIdxDataSet = std::string(argv[++i]);
|
|
session.outMtxDstDataSet = std::string(argv[++i]);
|
|
}
|
|
else
|
|
status = false;
|
|
}
|
|
else if (arg == "-q" || arg == "--query") {
|
|
if (i+2 < argc) {
|
|
session.queryMtxFile = std::string(argv[++i]);
|
|
session.queryDataSet = std::string(argv[++i]);
|
|
session.queryMtx = true;
|
|
}
|
|
else
|
|
status = false;
|
|
}
|
|
else if (arg == "-k") {
|
|
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
|
|
}
|
|
else if (arg == "-n" || arg == "--max_threads") {
|
|
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
|
|
}
|
|
else if (arg == "-s" || arg == "--slices") {
|
|
session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices;
|
|
}
|
|
else if (arg == "-a" || arg == "--accuracy") {
|
|
session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy;
|
|
}
|
|
else if (arg == "-t" || arg == "--timing")
|
|
session.timing = true;
|
|
else if (arg == "-v" || arg == "--verbose")
|
|
session.verbose = true;
|
|
else if (arg == "-h" || arg == "--help") {
|
|
std::cout << "annsearch - an aproximation knnsearch utility\n\n";
|
|
std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
|
|
std::cout << '\n';
|
|
std::cout << "Options:\n\n";
|
|
std::cout << " -c | --corpus <file> <dataset>\n";
|
|
std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
|
|
std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
|
|
std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
|
|
std::cout << " -q | --query <file> <dataset>\n";
|
|
std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
|
|
std::cout << " If not defined, the corpus is used\n\n";
|
|
std::cout << " -k <number>\n";
|
|
std::cout << " Set the number of closest neighbors to find. \n\n";
|
|
std::cout << " -n | --max_trheads <threads>\n";
|
|
std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n";
|
|
std::cout << " -s | --slices <slices/threads>\n";
|
|
std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n";
|
|
std::cout << " <threads> should be less or equal to available CPUs\n\n";
|
|
std::cout << " -a | --accuracy <accuracy>\n";
|
|
std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n";
|
|
std::cout << " -t | --timing\n";
|
|
std::cout << " Request timing measurements output to stdout.\n\n";
|
|
std::cout << " -v | --verbose\n";
|
|
std::cout << " Request a more verbose output to stdout.\n\n";
|
|
std::cout << " -h | --help <size>\n";
|
|
std::cout << " Prints this and exit.\n\n";
|
|
std::cout << "Examples:\n\n";
|
|
std::cout << " ...Example case...:\n";
|
|
std::cout << " > ./annsearch -i <MFILE> ... \n\n";
|
|
|
|
exit(0);
|
|
}
|
|
else { // parse error
|
|
std::cout << "Invocation error. Try -h for details.\n";
|
|
status = false;
|
|
}
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
/*!
|
|
* Matrix load
|
|
*
|
|
* \fn void loadMtx(MatrixDst&, MatrixDst&)
|
|
* \param Corpus matrix to load to
|
|
* \param Query matrix to load to
|
|
*/
|
|
void loadMtx(MatrixDst& Corpus, MatrixDst& Query) {
|
|
if (access(session.outMtxFile.c_str(), F_OK) == 0)
|
|
std::remove(session.outMtxFile.c_str());
|
|
|
|
// timer.start();
|
|
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
|
|
if (session.queryMtx)
|
|
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
|
|
// timer.stop();
|
|
// timer.print_dt("Load hdf5 files");
|
|
}
|
|
|
|
|
|
/*!
|
|
* Matrix store
|
|
*
|
|
* \fn void storeMtx(MatrixIdx&, MatrixDst&)
|
|
* \param Idx Index part(neighbors) of the matrix to store
|
|
* \param Dst Distances part of the matrix to store
|
|
*/
|
|
void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) {
|
|
// timer.start();
|
|
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
|
|
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
|
|
// timer.stop();
|
|
// timer.print_dt("Store hdf5 files");
|
|
}
|
|
|
|
|
|
#ifndef TESTING
|
|
int main(int argc, char* argv[]) try {
|
|
// Instantiate matrixes
|
|
MatrixDst Corpus;
|
|
MatrixDst Query;
|
|
MatrixIdx Idx;
|
|
MatrixDst Dst;
|
|
|
|
#if CODE_VERSION == V0
|
|
using namespace v0;
|
|
#else
|
|
using namespace v1;
|
|
#endif
|
|
|
|
// try to read command line
|
|
if (!get_options(argc, argv))
|
|
exit(1);
|
|
|
|
init_workers();
|
|
|
|
// Load data
|
|
loadMtx(Corpus, Query);
|
|
|
|
// Prepare output memory
|
|
Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
|
|
Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
|
|
|
|
// Do the search
|
|
logger << "Start knnsearch ...";
|
|
timer.start();
|
|
size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0));
|
|
knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst);
|
|
timer.stop();
|
|
logger << " Done" << logger.endl;
|
|
timer.print_dt("knnsearch");
|
|
|
|
// Store data
|
|
storeMtx(Idx, Dst);
|
|
|
|
return 0;
|
|
}
|
|
catch (std::exception& e) {
|
|
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
|
|
std::cerr << "Error: " << e.what() << '\n';
|
|
exit(1);
|
|
}
|
|
|
|
#elif defined TESTING
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <exception>
|
|
|
|
GTEST_API_ int main(int argc, char **argv) try {
|
|
testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|
|
catch (std::exception& e) {
|
|
std::cout << "Exception: " << e.what() << '\n';
|
|
}
|
|
|
|
#endif
|