PDS/homework_1/src/main.cpp

220 lines
7.0 KiB
C++

/*!
* \file main.cpp
* \brief Main application file
*
* \author
* Christos Choutouridis AEM:8997
* <cchoutou@ece.auth.gr>
*/
#include <iostream>
#include <string>
#include <exception>
#include <unistd.h>
#include <cstdio>
#include "matrix.hpp"
#include "v0.hpp"
#include "v1.hpp"
#include "utils.hpp"
#include "config.h"
// Global session data
session_t session;
Log logger;
Timing timer;
/*!
* A small command line argument parser
* \return The status of the operation
*/
bool get_options(int argc, char* argv[]){
bool status =true;
// iterate over the passed arguments
for (int i=1 ; i<argc ; ++i) {
std::string arg(argv[i]); // get current argument
if (arg == "-c" || arg == "--corpus") {
if (i+2 < argc) {
session.corpusMtxFile = std::string(argv[++i]);
session.corpusDataSet = std::string(argv[++i]);
}
else
status = false;
}
else if (arg == "-o" || arg == "--output") {
if (i+3 < argc) {
session.outMtxFile = std::string(argv[++i]);
session.outMtxIdxDataSet = std::string(argv[++i]);
session.outMtxDstDataSet = std::string(argv[++i]);
}
else
status = false;
}
else if (arg == "-q" || arg == "--query") {
if (i+2 < argc) {
session.queryMtxFile = std::string(argv[++i]);
session.queryDataSet = std::string(argv[++i]);
session.queryMtx = true;
}
else
status = false;
}
else if (arg == "-k") {
session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
}
else if (arg == "-n" || arg == "--max_threads") {
session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
}
else if (arg == "-s" || arg == "--slices") {
session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices;
}
else if (arg == "-a" || arg == "--accuracy") {
session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy;
}
else if (arg == "-t" || arg == "--timing")
session.timing = true;
else if (arg == "-v" || arg == "--verbose")
session.verbose = true;
else if (arg == "-h" || arg == "--help") {
std::cout << "annsearch - an aproximation knnsearch utility\n\n";
std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
std::cout << '\n';
std::cout << "Options:\n\n";
std::cout << " -c | --corpus <file> <dataset>\n";
std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
std::cout << " -q | --query <file> <dataset>\n";
std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
std::cout << " If not defined, the corpus is used\n\n";
std::cout << " -k <number>\n";
std::cout << " Set the number of closest neighbors to find. \n\n";
std::cout << " -n | --max_trheads <threads>\n";
std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n";
std::cout << " -s | --slices <slices/threads>\n";
std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n";
std::cout << " <threads> should be less or equal to available CPUs\n\n";
std::cout << " -a | --accuracy <accuracy>\n";
std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n";
std::cout << " -t | --timing\n";
std::cout << " Request timing measurements output to stdout.\n\n";
std::cout << " -v | --verbose\n";
std::cout << " Request a more verbose output to stdout.\n\n";
std::cout << " -h | --help <size>\n";
std::cout << " Prints this and exit.\n\n";
std::cout << "Examples:\n\n";
std::cout << " ...Example case...:\n";
std::cout << " > ./annsearch -i <MFILE> ... \n\n";
exit(0);
}
else { // parse error
std::cout << "Invocation error. Try -h for details.\n";
status = false;
}
}
return status;
}
/*!
* Matrix load
*
* \fn void loadMtx(MatrixDst&, MatrixDst&)
* \param Corpus matrix to load to
* \param Query matrix to load to
*/
void loadMtx(MatrixDst& Corpus, MatrixDst& Query) {
if (access(session.outMtxFile.c_str(), F_OK) == 0)
std::remove(session.outMtxFile.c_str());
// timer.start();
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
if (session.queryMtx)
Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
// timer.stop();
// timer.print_dt("Load hdf5 files");
}
/*!
* Matrix store
*
* \fn void storeMtx(MatrixIdx&, MatrixDst&)
* \param Idx Index part(neighbors) of the matrix to store
* \param Dst Distances part of the matrix to store
*/
void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) {
// timer.start();
Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
// timer.stop();
// timer.print_dt("Store hdf5 files");
}
#ifndef TESTING
int main(int argc, char* argv[]) try {
// Instantiate matrixes
MatrixDst Corpus;
MatrixDst Query;
MatrixIdx Idx;
MatrixDst Dst;
#if CODE_VERSION == V0
using namespace v0;
#else
using namespace v1;
#endif
// try to read command line
if (!get_options(argc, argv))
exit(1);
init_workers();
// Load data
loadMtx(Corpus, Query);
// Prepare output memory
Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
// Do the search
logger << "Start knnsearch ...";
timer.start();
size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0));
knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst);
timer.stop();
logger << " Done" << logger.endl;
timer.print_dt("knnsearch");
// Store data
storeMtx(Idx, Dst);
return 0;
}
catch (std::exception& e) {
//we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
std::cerr << "Error: " << e.what() << '\n';
exit(1);
}
#elif defined TESTING
#include <gtest/gtest.h>
#include <exception>
GTEST_API_ int main(int argc, char **argv) try {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
catch (std::exception& e) {
std::cout << "Exception: " << e.what() << '\n';
}
#endif