/*! * \file main.cpp * \brief Main application file * * \author * Christos Choutouridis AEM:8997 * <cchoutou@ece.auth.gr> */ #include <iostream> #include <string> #include <exception> #include <unistd.h> #include <cstdio> #include "matrix.hpp" #include "v0.hpp" #include "v1.hpp" #include "utils.hpp" #include "config.h" // Global session data session_t session; Log logger; Timing timer; /*! * A small command line argument parser * \return The status of the operation */ bool get_options(int argc, char* argv[]){ bool status =true; // iterate over the passed arguments for (int i=1 ; i<argc ; ++i) { std::string arg(argv[i]); // get current argument if (arg == "-c" || arg == "--corpus") { if (i+2 < argc) { session.corpusMtxFile = std::string(argv[++i]); session.corpusDataSet = std::string(argv[++i]); } else status = false; } else if (arg == "-o" || arg == "--output") { if (i+3 < argc) { session.outMtxFile = std::string(argv[++i]); session.outMtxIdxDataSet = std::string(argv[++i]); session.outMtxDstDataSet = std::string(argv[++i]); } else status = false; } else if (arg == "-q" || arg == "--query") { if (i+2 < argc) { session.queryMtxFile = std::string(argv[++i]); session.queryDataSet = std::string(argv[++i]); session.queryMtx = true; } else status = false; } else if (arg == "-k") { session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k; } else if (arg == "-n" || arg == "--max_threads") { session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads; } else if (arg == "-s" || arg == "--slices") { session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices; } else if (arg == "-a" || arg == "--accuracy") { session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy; } else if (arg == "-t" || arg == "--timing") session.timing = true; else if (arg == "-v" || arg == "--verbose") session.verbose = true; else if (arg == "-h" || arg == "--help") { std::cout << "annsearch - an aproximation knnsearch utility\n\n"; std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n"; std::cout << '\n'; std::cout << "Options:\n\n"; std::cout << " -c | --corpus <file> <dataset>\n"; std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n"; std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n"; std::cout << " Path to <file> to store the data and the names of the datasets.\n\n"; std::cout << " -q | --query <file> <dataset>\n"; std::cout << " Path to hdf5 file to open and name of the dataset to load\n"; std::cout << " If not defined, the corpus is used\n\n"; std::cout << " -k <number>\n"; std::cout << " Set the number of closest neighbors to find. \n\n"; std::cout << " -n | --max_trheads <threads>\n"; std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n"; std::cout << " -s | --slices <slices/threads>\n"; std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n"; std::cout << " <threads> should be less or equal to available CPUs\n\n"; std::cout << " -a | --accuracy <accuracy>\n"; std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n"; std::cout << " -t | --timing\n"; std::cout << " Request timing measurements output to stdout.\n\n"; std::cout << " -v | --verbose\n"; std::cout << " Request a more verbose output to stdout.\n\n"; std::cout << " -h | --help <size>\n"; std::cout << " Prints this and exit.\n\n"; std::cout << "Examples:\n\n"; std::cout << " ...Example case...:\n"; std::cout << " > ./annsearch -i <MFILE> ... \n\n"; exit(0); } else { // parse error std::cout << "Invocation error. Try -h for details.\n"; status = false; } } return status; } /*! * Matrix load * * \fn void loadMtx(MatrixDst&, MatrixDst&) * \param Corpus matrix to load to * \param Query matrix to load to */ void loadMtx(MatrixDst& Corpus, MatrixDst& Query) { if (access(session.outMtxFile.c_str(), F_OK) == 0) std::remove(session.outMtxFile.c_str()); // timer.start(); Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus); if (session.queryMtx) Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query); // timer.stop(); // timer.print_dt("Load hdf5 files"); } /*! * Matrix store * * \fn void storeMtx(MatrixIdx&, MatrixDst&) * \param Idx Index part(neighbors) of the matrix to store * \param Dst Distances part of the matrix to store */ void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) { // timer.start(); Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx); Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst); // timer.stop(); // timer.print_dt("Store hdf5 files"); } #ifndef TESTING int main(int argc, char* argv[]) try { // Instantiate matrixes MatrixDst Corpus; MatrixDst Query; MatrixIdx Idx; MatrixDst Dst; #if CODE_VERSION == V0 using namespace v0; #else using namespace v1; #endif // try to read command line if (!get_options(argc, argv)) exit(1); init_workers(); // Load data loadMtx(Corpus, Query); // Prepare output memory Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k); // Do the search logger << "Start knnsearch ..."; timer.start(); size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0)); knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst); timer.stop(); logger << " Done" << logger.endl; timer.print_dt("knnsearch"); // Store data storeMtx(Idx, Dst); return 0; } catch (std::exception& e) { //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it. std::cerr << "Error: " << e.what() << '\n'; exit(1); } #elif defined TESTING #include <gtest/gtest.h> #include <exception> GTEST_API_ int main(int argc, char **argv) try { testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } catch (std::exception& e) { std::cout << "Exception: " << e.what() << '\n'; } #endif