Traverse directory in multi threading environment First determine all top level directory and store into vector which have following result /home/dir1 /home/dir2 more than 1000
Traverse directory in multi threading environment First determine all top level directory and store into vector which have following result:
- /home/dir1
- /home/dir2
- ... more than 1000
create number of database
create number of databasecreate number of threads
create number of threadsassign each thread to one directory
assign each thread to one directoryTravse and determine size of directory
Traverse and determine size of directoryThere is some issue regarding perforce but I feel code also need to review any better suggestion
#include<boost/tokenizer.hpp> #include<boost/asio.hpp> #include <boost/bind/bind.hpp> #include "scan_dir.h" //local file using namespace std::chrono; /*
- Process directory fucntion
- Input : Project path, maxdepth */ void process_dir(const std::string &proj, uint64_t &count, std::vectorstd::string &dirs) { std::cout<<"Creating Directory"<<std::endl; //dirs = Util::get_top_dir_depth(proj, 0); dirs = Util::traverse_dir(proj, 1); count = dirs.size(); }
int main(int argc, char *argv[]) { po::options_description desc("DiskAnalyzer Tool"); po::variables_map vm; std::string user, proj; uint64_t f_size, maxdepth=0, dir_size=0; bool show_dir; Dirs d;
desc.add_options() ("help,h", "DiskAnalyzer option") ("proj,p", po::value<string>(),"provide directory path which you would like to search data") ("user,u", po::value<string>(), "display file which is associated/Owner with user") ("dirsize,ds", po::value<uint64_t>()->default_value(1000000), "display dir which dir_size>=size by default 1000000 Byte:1MB") ("showdir,sh", po::value<bool>()->default_value(false), "show only dir which is associated with user") ("maxdepth", po::value<uint64_t>()->default_value(5), "show only dir which is associated with user") ("filesize,fs", po::value<uint64_t>()->default_value(10000), "display file which file_size>=size by default 10000 Byte:10KB"); try { po::store(po::parse_command_line(argc, argv, desc), vm); po::notify(vm); }catch(const std::exception &err) { std::cerr<<err.what()<<std::endl; std::cout<<desc<<std::endl; } catch(...) { std::cout<<"Unkown exception"<<std::endl; } if(vm.count("help")) { std::cout<<"scan -p <proj_name> -u <user_name> -maxdepth <maxdepth> -fs <file_size> -d <debug>\n\n"; std::cout<<desc<<std::endl; return 1; } if(vm.count("user")){ user = vm["user"].as<string>(); } if(vm.count("proj")){ proj = vm["proj"].as<string>(); } if(vm.count("filesize")){ f_size = vm["filesize"].as<uint64_t>(); } if(vm.count("showdir")) { show_dir = vm["showdir"].as<bool>(); } if(vm.count("dirsize")) { dir_size = vm["dirsize"].as<uint64_t>(); } if(vm.count("maxdepth")){ maxdepth = vm["maxdepth"].as<uint64_t>(); } if(show_dir) { d.scan_dir_name(proj, user, dir_size, maxdepth); return 0; } else { uint64_t count = 0; std::vector<std::string> dir; process_dir(proj, count, dir); std::cout<<"createing database["<<proj<<" "<<count<<" ]"<<std::endl; std::string db_name = Command::basename(proj); DataBase db[count]; for (uint64_t i = 0; i<count; i++){ db[i].set_db_name("DiskAnalyzer_"+ std::to_string(i)+"_" +db_name); if(!db[i].prepare_db()){ std::cerr<<"[Error] DataBase operation failed"<<std::endl; return 1; } } std::size_t max_thread = dir.size() > 1000 ? 1000 : dir.size(); //max_thread = 10; std::cout<<dir.size()<<std::endl; //contain directory information while(dir.size()){ std::size_t dir_traverse = 0, db_count = 0; boost::asio::io_service io_service; boost::asio::io_service::work work(io_service); boost::thread_group threads; for (std::size_t i = 0; i < max_thread; ++i) threads.create_thread(boost::bind(&boost::asio::io_service::run, &io_service)); for(auto it = dir.begin(); it != dir.end() && dir_traverse <max_thread; ++it){ if(db_count>=count) db_count = 0; try { //this function determine determine size of directory. I had expectation // each directory will go each thread io_service.post(boost::bind(&Dirs::scan_dir, boost::ref(d), *it, db[db_count], user)); } catch(...) { std::cerr<<"got error"<<std::endl; continue; } dir_traverse++; //dir_traverse = dir_traverse + max_thread; db_count++; //boost::this_thread::sleep(boost::posix_time::seconds(1)); } io_service.stop(); threads.join_all(); dir.erase(dir.begin(), dir.begin()+dir_traverse); std::cout<<" [Remaining Processing dir cout "<<dir.size()<<std::endl; } return 0; } std::cout<<desc<<std::endl; return 0;}
There is some issue regarding perforce but I feel code also need to review any better suggestion
#include<boost/tokenizer.hpp>
#include<boost/asio.hpp>
#include <boost/bind/bind.hpp>
#include "scan_dir.h" //local file
using namespace std::chrono;
/*
* Process directory fucntion
* Input : Project path, maxdepth
*/
void process_dir(const std::string &proj, uint64_t &count, std::vector<std::string> &dirs) {
std::cout<<"Creating Directory"<<std::endl;
//dirs = Util::get_top_dir_depth(proj, 0);
dirs = Util::traverse_dir(proj, 1);
count = dirs.size();
}
int main(int argc, char *argv[]) {
po::options_description desc("DiskAnalyzer Tool");
po::variables_map vm;
std::string user, proj;
uint64_t f_size, maxdepth=0, dir_size=0;
bool show_dir;
Dirs d;
desc.add_options()
("help,h", "DiskAnalyzer option")
("proj,p", po::value<string>(),"provide directory path which you would like to search data")
("user,u", po::value<string>(), "display file which is associated/Owner with user")
("dirsize,ds", po::value<uint64_t>()->default_value(1000000), "display dir which dir_size>=size by default 1000000 Byte:1MB")
("showdir,sh", po::value<bool>()->default_value(false), "show only dir which is associated with user")
("maxdepth", po::value<uint64_t>()->default_value(5), "show only dir which is associated with user")
("filesize,fs", po::value<uint64_t>()->default_value(10000), "display file which file_size>=size by default 10000 Byte:10KB");
try {
po::store(po::parse_command_line(argc, argv, desc), vm);
po::notify(vm);
}catch(const std::exception &err) {
std::cerr<<err.what()<<std::endl;
std::cout<<desc<<std::endl;
}
catch(...) {
std::cout<<"Unkown exception"<<std::endl;
}
if(vm.count("help")) {
std::cout<<"scan -p <proj_name> -u <user_name> -maxdepth <maxdepth> -fs <file_size> -d <debug>\n\n";
std::cout<<desc<<std::endl;
return 1;
}
if(vm.count("user")){
user = vm["user"].as<string>();
}
if(vm.count("proj")){
proj = vm["proj"].as<string>();
}
if(vm.count("filesize")){
f_size = vm["filesize"].as<uint64_t>();
}
if(vm.count("showdir")) {
show_dir = vm["showdir"].as<bool>();
}
if(vm.count("dirsize")) {
dir_size = vm["dirsize"].as<uint64_t>();
}
if(vm.count("maxdepth")){
maxdepth = vm["maxdepth"].as<uint64_t>();
}
if(show_dir) {
d.scan_dir_name(proj, user, dir_size, maxdepth);
return 0;
} else {
uint64_t count = 0;
std::vector<std::string> dir;
process_dir(proj, count, dir);
std::cout<<"createing database["<<proj<<" "<<count<<" ]"<<std::endl;
std::string db_name = Command::basename(proj);
DataBase db[count];
for (uint64_t i = 0; i<count; i++){
db[i].set_db_name("DiskAnalyzer_"+ std::to_string(i)+"_" +db_name);
if(!db[i].prepare_db()){
std::cerr<<"[Error] DataBase operation failed"<<std::endl;
return 1;
}
}
std::size_t max_thread = dir.size() > 1000 ? 1000 : dir.size();
//max_thread = 10;
std::cout<<dir.size()<<std::endl;
//contain directory information
while(dir.size()){
std::size_t dir_traverse = 0, db_count = 0;
boost::asio::io_service io_service;
boost::asio::io_service::work work(io_service);
boost::thread_group threads;
for (std::size_t i = 0; i < max_thread; ++i)
threads.create_thread(boost::bind(&boost::asio::io_service::run, &io_service));
for(auto it = dir.begin(); it != dir.end() && dir_traverse <max_thread; ++it){
if(db_count>=count)
db_count = 0;
try {
//this function determine determine size of directory. I had expectation
// each directory will go each thread
io_service.post(boost::bind(&Dirs::scan_dir, boost::ref(d), *it, db[db_count], user));
} catch(...) {
std::cerr<<"got error"<<std::endl;
continue;
}
dir_traverse++;
//dir_traverse = dir_traverse + max_thread;
db_count++;
//boost::this_thread::sleep(boost::posix_time::seconds(1));
}
io_service.stop();
threads.join_all();
dir.erase(dir.begin(), dir.begin()+dir_traverse);
std::cout<<" [Remaining Processing dir cout "<<dir.size()<<std::endl;
}
return 0;
}
std::cout<<desc<<std::endl;
return 0;
}