|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- /*!
- * \file main.cpp
- * \brief Main application file
- *
- * \author
- * Christos Choutouridis AEM:8997
- * <cchoutou@ece.auth.gr>
- */
-
- #include <iostream>
- #include <string>
- #include <exception>
- #include <unistd.h>
- #include <cstdio>
-
- #include "matrix.hpp"
-
- #include "v0.hpp"
- #include "v1.hpp"
- #include "utils.hpp"
- #include "config.h"
-
- // Global session data
- session_t session;
- Log logger;
- Timing timer;
-
- /*!
- * A small command line argument parser
- * \return The status of the operation
- */
- bool get_options(int argc, char* argv[]){
- bool status =true;
-
- // iterate over the passed arguments
- for (int i=1 ; i<argc ; ++i) {
- std::string arg(argv[i]); // get current argument
-
- if (arg == "-c" || arg == "--corpus") {
- if (i+2 < argc) {
- session.corpusMtxFile = std::string(argv[++i]);
- session.corpusDataSet = std::string(argv[++i]);
- }
- else
- status = false;
- }
- else if (arg == "-o" || arg == "--output") {
- if (i+3 < argc) {
- session.outMtxFile = std::string(argv[++i]);
- session.outMtxIdxDataSet = std::string(argv[++i]);
- session.outMtxDstDataSet = std::string(argv[++i]);
- }
- else
- status = false;
- }
- else if (arg == "-q" || arg == "--query") {
- if (i+2 < argc) {
- session.queryMtxFile = std::string(argv[++i]);
- session.queryDataSet = std::string(argv[++i]);
- session.queryMtx = true;
- }
- else
- status = false;
- }
- else if (arg == "-k") {
- session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
- }
- else if (arg == "-n" || arg == "--max_threads") {
- session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
- }
- else if (arg == "-s" || arg == "--slices") {
- session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices;
- }
- else if (arg == "-a" || arg == "--accuracy") {
- session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy;
- }
- else if (arg == "-t" || arg == "--timing")
- session.timing = true;
- else if (arg == "-v" || arg == "--verbose")
- session.verbose = true;
- else if (arg == "-h" || arg == "--help") {
- std::cout << "annsearch - an aproximation knnsearch utility\n\n";
- std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
- std::cout << '\n';
- std::cout << "Options:\n\n";
- std::cout << " -c | --corpus <file> <dataset>\n";
- std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
- std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
- std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
- std::cout << " -q | --query <file> <dataset>\n";
- std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
- std::cout << " If not defined, the corpus is used\n\n";
- std::cout << " -k <number>\n";
- std::cout << " Set the number of closest neighbors to find. \n\n";
- std::cout << " -n | --max_trheads <threads>\n";
- std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n";
- std::cout << " -s | --slices <slices/threads>\n";
- std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n";
- std::cout << " <threads> should be less or equal to available CPUs\n\n";
- std::cout << " -a | --accuracy <accuracy>\n";
- std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n";
- std::cout << " -t | --timing\n";
- std::cout << " Request timing measurements output to stdout.\n\n";
- std::cout << " -v | --verbose\n";
- std::cout << " Request a more verbose output to stdout.\n\n";
- std::cout << " -h | --help <size>\n";
- std::cout << " Prints this and exit.\n\n";
- std::cout << "Examples:\n\n";
- std::cout << " ...Example case...:\n";
- std::cout << " > ./annsearch -i <MFILE> ... \n\n";
-
- exit(0);
- }
- else { // parse error
- std::cout << "Invocation error. Try -h for details.\n";
- status = false;
- }
- }
-
- return status;
- }
-
- /*!
- * Matrix load
- *
- * \fn void loadMtx(MatrixDst&, MatrixDst&)
- * \param Corpus matrix to load to
- * \param Query matrix to load to
- */
- void loadMtx(MatrixDst& Corpus, MatrixDst& Query) {
- if (access(session.outMtxFile.c_str(), F_OK) == 0)
- std::remove(session.outMtxFile.c_str());
-
- // timer.start();
- Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
- if (session.queryMtx)
- Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
- // timer.stop();
- // timer.print_dt("Load hdf5 files");
- }
-
-
- /*!
- * Matrix store
- *
- * \fn void storeMtx(MatrixIdx&, MatrixDst&)
- * \param Idx Index part(neighbors) of the matrix to store
- * \param Dst Distances part of the matrix to store
- */
- void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) {
- // timer.start();
- Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
- Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
- // timer.stop();
- // timer.print_dt("Store hdf5 files");
- }
-
-
- #ifndef TESTING
- int main(int argc, char* argv[]) try {
- // Instantiate matrixes
- MatrixDst Corpus;
- MatrixDst Query;
- MatrixIdx Idx;
- MatrixDst Dst;
-
- #if CODE_VERSION == V0
- using namespace v0;
- #else
- using namespace v1;
- #endif
-
- // try to read command line
- if (!get_options(argc, argv))
- exit(1);
-
- init_workers();
-
- // Load data
- loadMtx(Corpus, Query);
-
- // Prepare output memory
- Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
- Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
-
- // Do the search
- logger << "Start knnsearch ...";
- timer.start();
- size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0));
- knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst);
- timer.stop();
- logger << " Done" << logger.endl;
- timer.print_dt("knnsearch");
-
- // Store data
- storeMtx(Idx, Dst);
-
- return 0;
- }
- catch (std::exception& e) {
- //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
- std::cerr << "Error: " << e.what() << '\n';
- exit(1);
- }
-
- #elif defined TESTING
-
- #include <gtest/gtest.h>
- #include <exception>
-
- GTEST_API_ int main(int argc, char **argv) try {
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
- }
- catch (std::exception& e) {
- std::cout << "Exception: " << e.what() << '\n';
- }
-
- #endif
|