AUTH's THMMY "Parallel and distributed systems" course assignments.
Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 
 
 
 

220 рядки
7.0 KiB

  1. /*!
  2. * \file main.cpp
  3. * \brief Main application file
  4. *
  5. * \author
  6. * Christos Choutouridis AEM:8997
  7. * <cchoutou@ece.auth.gr>
  8. */
  9. #include <iostream>
  10. #include <string>
  11. #include <exception>
  12. #include <unistd.h>
  13. #include <cstdio>
  14. #include "matrix.hpp"
  15. #include "v0.hpp"
  16. #include "v1.hpp"
  17. #include "utils.hpp"
  18. #include "config.h"
  19. // Global session data
  20. session_t session;
  21. Log logger;
  22. Timing timer;
  23. /*!
  24. * A small command line argument parser
  25. * \return The status of the operation
  26. */
  27. bool get_options(int argc, char* argv[]){
  28. bool status =true;
  29. // iterate over the passed arguments
  30. for (int i=1 ; i<argc ; ++i) {
  31. std::string arg(argv[i]); // get current argument
  32. if (arg == "-c" || arg == "--corpus") {
  33. if (i+2 < argc) {
  34. session.corpusMtxFile = std::string(argv[++i]);
  35. session.corpusDataSet = std::string(argv[++i]);
  36. }
  37. else
  38. status = false;
  39. }
  40. else if (arg == "-o" || arg == "--output") {
  41. if (i+3 < argc) {
  42. session.outMtxFile = std::string(argv[++i]);
  43. session.outMtxIdxDataSet = std::string(argv[++i]);
  44. session.outMtxDstDataSet = std::string(argv[++i]);
  45. }
  46. else
  47. status = false;
  48. }
  49. else if (arg == "-q" || arg == "--query") {
  50. if (i+2 < argc) {
  51. session.queryMtxFile = std::string(argv[++i]);
  52. session.queryDataSet = std::string(argv[++i]);
  53. session.queryMtx = true;
  54. }
  55. else
  56. status = false;
  57. }
  58. else if (arg == "-k") {
  59. session.k = (i+1 < argc) ? std::atoi(argv[++i]) : session.k;
  60. }
  61. else if (arg == "-n" || arg == "--max_threads") {
  62. session.max_threads = (i+1 < argc) ? std::atoi(argv[++i]) : session.max_threads;
  63. }
  64. else if (arg == "-s" || arg == "--slices") {
  65. session.slices = (i+1 < argc) ? std::atoi(argv[++i]) : session.slices;
  66. }
  67. else if (arg == "-a" || arg == "--accuracy") {
  68. session.accuracy = (i+1 < argc) ? std::atoi(argv[++i]) : session.accuracy;
  69. }
  70. else if (arg == "-t" || arg == "--timing")
  71. session.timing = true;
  72. else if (arg == "-v" || arg == "--verbose")
  73. session.verbose = true;
  74. else if (arg == "-h" || arg == "--help") {
  75. std::cout << "annsearch - an aproximation knnsearch utility\n\n";
  76. std::cout << "annsearch -c <file> [-k <N>] [-o <file>] [-q <file>] [-n <threads>] [-t] [-v]\n";
  77. std::cout << '\n';
  78. std::cout << "Options:\n\n";
  79. std::cout << " -c | --corpus <file> <dataset>\n";
  80. std::cout << " Path to hdf5 file to open and name of the dataset to load\n\n";
  81. std::cout << " -o | --output <file> <idx-dataset> <dst-dataset> \n";
  82. std::cout << " Path to <file> to store the data and the names of the datasets.\n\n";
  83. std::cout << " -q | --query <file> <dataset>\n";
  84. std::cout << " Path to hdf5 file to open and name of the dataset to load\n";
  85. std::cout << " If not defined, the corpus is used\n\n";
  86. std::cout << " -k <number>\n";
  87. std::cout << " Set the number of closest neighbors to find. \n\n";
  88. std::cout << " -n | --max_trheads <threads>\n";
  89. std::cout << " Reduce the thread number for the execution to <threads>. <threads> should be less or equal to available CPUs.\n\n";
  90. std::cout << " -s | --slices <slices/threads>\n";
  91. std::cout << " The number of slices to the Corpus matrix. In the parallel version this setting affects the number of threads\n";
  92. std::cout << " <threads> should be less or equal to available CPUs\n\n";
  93. std::cout << " -a | --accuracy <accuracy>\n";
  94. std::cout << " Reduce the accuracy of neighbor finding. The accuracy should be between 1-100 \n\n";
  95. std::cout << " -t | --timing\n";
  96. std::cout << " Request timing measurements output to stdout.\n\n";
  97. std::cout << " -v | --verbose\n";
  98. std::cout << " Request a more verbose output to stdout.\n\n";
  99. std::cout << " -h | --help <size>\n";
  100. std::cout << " Prints this and exit.\n\n";
  101. std::cout << "Examples:\n\n";
  102. std::cout << " ...Example case...:\n";
  103. std::cout << " > ./annsearch -i <MFILE> ... \n\n";
  104. exit(0);
  105. }
  106. else { // parse error
  107. std::cout << "Invocation error. Try -h for details.\n";
  108. status = false;
  109. }
  110. }
  111. return status;
  112. }
  113. /*!
  114. * Matrix load
  115. *
  116. * \fn void loadMtx(MatrixDst&, MatrixDst&)
  117. * \param Corpus matrix to load to
  118. * \param Query matrix to load to
  119. */
  120. void loadMtx(MatrixDst& Corpus, MatrixDst& Query) {
  121. if (access(session.outMtxFile.c_str(), F_OK) == 0)
  122. std::remove(session.outMtxFile.c_str());
  123. // timer.start();
  124. Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Corpus);
  125. if (session.queryMtx)
  126. Mtx::load<MatrixDst, DstHDF5Type>(session.corpusMtxFile, session.corpusDataSet, Query);
  127. // timer.stop();
  128. // timer.print_dt("Load hdf5 files");
  129. }
  130. /*!
  131. * Matrix store
  132. *
  133. * \fn void storeMtx(MatrixIdx&, MatrixDst&)
  134. * \param Idx Index part(neighbors) of the matrix to store
  135. * \param Dst Distances part of the matrix to store
  136. */
  137. void storeMtx(MatrixIdx& Idx, MatrixDst& Dst) {
  138. // timer.start();
  139. Mtx::store<MatrixIdx, IdxHDF5Type>(session.outMtxFile, session.outMtxIdxDataSet, Idx);
  140. Mtx::store<MatrixDst, DstHDF5Type>(session.outMtxFile, session.outMtxDstDataSet, Dst);
  141. // timer.stop();
  142. // timer.print_dt("Store hdf5 files");
  143. }
  144. #ifndef TESTING
  145. int main(int argc, char* argv[]) try {
  146. // Instantiate matrixes
  147. MatrixDst Corpus;
  148. MatrixDst Query;
  149. MatrixIdx Idx;
  150. MatrixDst Dst;
  151. #if CODE_VERSION == V0
  152. using namespace v0;
  153. #else
  154. using namespace v1;
  155. #endif
  156. // try to read command line
  157. if (!get_options(argc, argv))
  158. exit(1);
  159. init_workers();
  160. // Load data
  161. loadMtx(Corpus, Query);
  162. // Prepare output memory
  163. Idx.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
  164. Dst.resize((session.queryMtx) ? Query.rows() : Corpus.rows(), session.k);
  165. // Do the search
  166. logger << "Start knnsearch ...";
  167. timer.start();
  168. size_t selected_neighbors = (size_t)(session.k*(session.accuracy/100.0));
  169. knnsearch(Corpus, (session.queryMtx) ? Query : Corpus, session.slices, session.k, selected_neighbors, Idx, Dst);
  170. timer.stop();
  171. logger << " Done" << logger.endl;
  172. timer.print_dt("knnsearch");
  173. // Store data
  174. storeMtx(Idx, Dst);
  175. return 0;
  176. }
  177. catch (std::exception& e) {
  178. //we probably pollute the user's screen. Comment `cerr << ...` if you don't like it.
  179. std::cerr << "Error: " << e.what() << '\n';
  180. exit(1);
  181. }
  182. #elif defined TESTING
  183. #include <gtest/gtest.h>
  184. #include <exception>
  185. GTEST_API_ int main(int argc, char **argv) try {
  186. testing::InitGoogleTest(&argc, argv);
  187. return RUN_ALL_TESTS();
  188. }
  189. catch (std::exception& e) {
  190. std::cout << "Exception: " << e.what() << '\n';
  191. }
  192. #endif