I am working on the Build Your Own wc Tool coding challenge in C++. I have a working implementation that seems to match the output of the wc command for different inputs. I am looking for feedback on
- The code logic as well as C++ best practices for structuring the code.
- Making it easier to test the correctness of this code with different inputs.
Dependencies
- CLI11: A command-line parser
Any suggestions are appreciated since I am relatively new to C++.
#include <CLI/CLI.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include <locale>
#include <cwchar>
struct Text_Counts
{
int characters{0}, lines{0}, words{0};
};
struct Text_Count_Flags
{
bool characters{false}, lines{false}, words{false}, multibyte_characters{false};
bool no_flags_set()
{
return !(characters || lines || words || multibyte_characters);
}
};
std::string counts_to_string(Text_Counts counts, const Text_Count_Flags &count_flags)
{
std::string output_string;
if (count_flags.lines)
{
output_string += std::to_string(counts.lines) + " ";
}
if (count_flags.words)
{
output_string += std::to_string(counts.words) + " ";
}
if (count_flags.characters || count_flags.multibyte_characters)
{
output_string += std::to_string(counts.characters) + " ";
}
return output_string;
}
std::istream &get_input_stream(const std::string &filepath)
{
if (!filepath.empty())
{
static std::ifstream input_file(filepath);
if (!input_file)
{
throw std::runtime_error("Could not open file: " + filepath);
}
return input_file;
}
else
{
// return stdin stream if filepath is empty
return std::cin;
}
}
Text_Counts get_text_counts(std::istream &input_stream, const Text_Count_Flags &count_flags)
{
std::mbstate_t state = std::mbstate_t(); // initial state for mbrtowc
Text_Counts counts;
std::string line, word;
// Iterate through each line of input stream
while (std::getline(input_stream, line))
{
if (count_flags.lines)
{
++counts.lines;
}
if (count_flags.characters || count_flags.multibyte_characters)
{
if (count_flags.multibyte_characters)
{
// Count multi-byte characters
const char *ptr = line.c_str();
const char *end = ptr + std::strlen(ptr);
int len;
wchar_t wc;
int index = 0;
while ((len = std::mbrtowc(&wc, ptr, end - ptr, &state)) > 0)
{
ptr += len;
++counts.characters;
}
++counts.characters;
}
else
{
counts.characters += line.length() + 1;
}
}
if (count_flags.words)
{
std::stringstream ss(line);
while (ss >> word)
{
++counts.words;
}
}
}
return counts;
}
int main(int argc, char *argv[])
{
std::setlocale(LC_ALL, ""); // For multi-byte char counting
CLI::App app{"wc command to count characters, word and lines"};
argv = app.ensure_utf8(argv);
Text_Count_Flags count_flags;
app.add_flag("-c", count_flags.characters, "Count Characters")->take_last();
app.add_flag("-m", count_flags.multibyte_characters, "Count Multi-Byte Characters")->take_last();
app.add_flag("-l", count_flags.lines, "Count Lines")->take_last();
app.add_flag("-w", count_flags.words, "Count Words")->take_last();
std::string filepath;
app.add_option("filepath", filepath, "Input file path");
CLI11_PARSE(app, argc, argv);
// If none of the flags are passed, use all flags
if (count_flags.no_flags_set())
{
count_flags.characters = count_flags.words = count_flags.lines = true;
}
std::istream &input_stream = get_input_stream(filepath);
Text_Counts counts = get_text_counts(input_stream, count_flags);
std::string output_string = counts_to_string(counts, count_flags) + filepath;
std::cout << output_string;
return 0;
}
<CLI/CLI.hpp>\$\endgroup\$counts.characters += line.length() + 1;The last line may not be terminated by a newline character. \$\endgroup\$the file name-` as the standard input. \$\endgroup\$-(dash) can be used. It means usestd::cinorstd::coutdepending on context. But inwcit would represent an input file. \$\endgroup\$strlenon astd::string; they know their own length (in bytes) so you don't have to waste cycles scanning for the terminating zero. There's a.length()member function, or you could take a pointer to the.cend()element. en.cppreference.com/w/cpp/string/basic_stringstd::stringis an explicit-length string, unlike traditional C strings. \$\endgroup\$