I'm building a DBMS similar to SQL based DBMS's (like mySQL for example) currently implemented creating a table and reading from a table.
no user input yet implemented for testing I create std::string variables that contain the statements to parse.
syntax for creating a table: column_type column_name PK, column_type column_name etc...
syntax for reading from a table: column_name comparison_operator rvalue) column_name comparison_operator rvalue etc...
I added a lot of comments and hope it is enough to understand the code.
in short I have a column struct which contains enum types to specify which type this column is currently I have INT DOUBLE CHAR STR NILL and a boolean to tell if this column is a primary key or not.
struct entry which is a single entry in the table (entry is a specific value it is not a row) consists of the type of the entry (which have to be the same as the column), the value the entry holds which is a std::variant of all available types inside enum types aside from NILL.
and the class table consists of a string that holds the name of the column which is the primary key and most importantly a std::map with the key is the column and the value is a std::vector of entries that are under said column.
my main concerns right now are: readability of the code, structuring of the code and how optimization.
I'm not trying to optimize the code to perfection as I'm still learning the ropes of optimizing my code but any feedback on crucial optimization mistakes would be awesome.
I will add here main.cpp but currently it is mostly for debugging my code.
table.hpp
#include <map>
#include <string>
#include <variant>
#include <vector>
namespace interpreter {
// simple tokenizer that return a vector of strings which were seperated by delim in statement
std::vector<std::string> tokenizer(const std::string &statement, char delim);
// comparing values of two std::variant base on given comparison operator
bool compare_values(const std::variant<int, double, char, std::string> &lvalue, const std::variant<int, double, char, std::string> &rvalue, std::string comp_operator);
}
// currently available value types in the table. NILL means there is no value inside or a std::string equals to NILL
enum types {
INT,
DOUBLE,
CHAR,
STR,
NILL
};
// single entry in the table, each entry is part of a vector which is the value of a column key in the table map
struct entry {
types value_type;
std::variant<int, double, char, std::string> value;
entry(types value_type, std::variant<int, double, char, std::string> value);
// using default constructor will result in an entry of type NILL with the value NI LL
entry();
};
// single column in the table, it is a key in a map to a vector of entries value
struct column {
types col_type;
std::string name;
bool is_primary_key = false;
column(types col_type, const std::string &name);
column();
// < operator for the map compatison, only compares the names
bool operator<(const column &compare_col) const {
return name < compare_col.name;
}
// == operator for comparing columns in order to perform operations only compares string names
bool operator==(const column &compare_col) const {
return name == compare_col.name;
}
};
// the table consists of a std::map with a column key and a vector of entries as it's value
class table {
private:
// name of the column which is primary key for this table
std::string primary_key;
// the table itself
std::map<column, std::vector<entry>> contents;
public:
// delimiter for reading from table statements
const static char READ_DELIM = ')';
// delimiter for create statements seperates creation of columns
const static char CREATE_DELIM = ',';
// delimiter for column creation specification
const static char COL_CREATE_DELIM = ' ';
// creating a table by providing an existing map mostly for debug purposes
table(const std::string &primary_key, std::map<column, std::vector<entry>> contents);
// intended way of creating a table by the user with the following syntax: column_type column_name PK, column_type column_name; etc...
// must be exactly one section that ends with PK to tell which column is the primary key
table(const std::string &create_statement);
std::string get_primary_key() const;
std::map<column, std::vector<entry>> get_contents() const;
int change_primary_key(const std::string &new_key);
// reading from the table. the read statement must be of this syntax: column_name comparison_operator rvalue) column_name comparison_operator rvalue
// each end of section must be seperated by ')'
// example my_double_col >= 4.5) my_char_col > D
std::map<column, std::vector<entry>> read_table(const std::string &statement) const;
};
table.cpp
#include "table.hpp"
#include <iostream>
#include <sstream>
entry::entry(types value_type, std::variant<int, double, char, std::string> value) : value_type(value_type), value(value) {
}
entry::entry() {
value_type = types::NILL;
value = {"NILL"};
}
column::column(types col_type, const std::string &name) : col_type(col_type), name(name) {
}
column::column() {
col_type = types::NILL;
name = "NILL";
}
table::table(const std::string &primary_key, std::map<column, std::vector<entry>> contents) : primary_key(primary_key), contents(contents) {
}
table::table(const std::string &create_statement) {
// creating a std::stringstream of provided statement for parsing
std::stringstream create_statement_stream(create_statement);
// a flag to check wether a primary_key was found or not
bool is_key_found = false;
std::string token;
while (std::getline(create_statement_stream, token, CREATE_DELIM)) {
// column specifiers seperated into different strings in params. size of params should be either 2 or 3
std::vector<std::string> params;
// another stream for creating a column this time
std::stringstream token_stream(token);
// current specifier in the column creation
std::string param;
while (std::getline(token_stream, param, COL_CREATE_DELIM)) { // each group divide by space
params.push_back(param);
}
// invalid column create syntax
if ((params.size() > 3 || params.size() < 2) || (params.size() == 3 && is_key_found)) {
switch (params.size()) {
case 3:
std::cout << "key has been specified more than once \n";
break;
default:
std::cout << "invalid amount of specifiers should be 2 for regular column or 3 for PK column \n";
break;
}
throw(1);
}
// the name of the column should be second after the type name
std::string col_name = params[1];
// assign the column type to the column
types col_type;
if (params[0] == "INT")
col_type = types::INT;
else if (params[0] == "DOUBLE")
col_type = types::DOUBLE;
else if (params[0] == "CHAR")
col_type = types::CHAR;
else if (params[0] == "STR")
col_type = types::STR;
else {
std::cout << "invalid column data type: " << params[0] << '\n';
throw(1);
}
// create the column with the specifiers
column current_col(col_type, col_name);
// check if current group is a primary key
if (params.size() == 3) {
if (params[2] != "PK") {
std::cout << "wrong third param.\n";
throw(1);
}
// the key is now found so set flag to true. only one PK allowed
is_key_found = true;
// set the field of PK to the column name because PK was just found
primary_key = current_col.name;
// set the boolean value of is_primary_key of column to true to indicate that this is the PK
current_col.is_primary_key = true;
}
// inserting a new key into the map of the table with empty vector of entries
contents.insert({current_col, {}});
}
// throw an error in case of user not specifying which column is the PK
if (!is_key_found) {
std::cout << "no primary key provided.\n";
throw(1);
}
}
std::map<column, std::vector<entry>> table::get_contents() const {
return contents;
}
// changing primary_key a column must be created to search the map if the new provided column name exists
int table::change_primary_key(const std::string &new_key) {
if (contents.find(column(types::NILL, new_key)) == contents.end())
return 1;
primary_key = new_key;
return 0;
}
std::vector<std::string> interpreter::tokenizer(const std::string &statement, char delim) {
std::vector<std::string> tokens;
std::stringstream token_stream(statement);
std::string current_token;
while (std::getline(token_stream, current_token, delim)) {
tokens.push_back(current_token);
}
return tokens;
}
// checking all options of comparisons and
bool interpreter::compare_values(const std::variant<int, double, char, std::string> &lvalue, const std::variant<int, double, char, std::string> &rvalue, std::string comp_operator) {
bool is_valid_comp_op = true;
bool compare_result = (comp_operator == "==") ? (lvalue == rvalue) : (comp_operator == ">") ? (lvalue > rvalue)
: (comp_operator == "<") ? (lvalue < rvalue)
: (comp_operator == "<=") ? (lvalue <= rvalue)
: (comp_operator == ">=") ? (lvalue >= rvalue)
: is_valid_comp_op = false;
if (!is_valid_comp_op) {
std::cout << "invalid comparison operator '" << comp_operator << "' \n";
throw(1);
}
}
std::map<column, std::vector<entry>> table::read_table(const std::string &statement) const {
// result_table is a new table which contains all the entries from the original table that answer the conditions
std::map<column, std::vector<entry>> result_table;
std::vector<std::string> tokens = interpreter::tokenizer(statement, table::READ_DELIM);
for (size_t i = 0; i < tokens.size(); ++i) {
std::stringstream token_stream(tokens[i]);
std::string col_name;
std::string op;
std::string rvalue;
// spliting into 3 components (lvalue) (< or < or == or >= or <=) (rvalue)
std::getline(token_stream, col_name, ' ');
std::getline(token_stream, op, ' ');
std::getline(token_stream, rvalue, ' ');
// creating a column out of the col_name to search in the map
column col_to_compare(types::STR, col_name);
// iterator pointing to the specific column under col_name
auto current_col = contents.find(col_to_compare);
// if didn't find then it is an invalid column name and throw error
if (current_col == contents.end()) {
std::cout << "invalid column name: " << col_name << '\n';
throw(1);
}
// creating a variant to compare the rvalue to the values inside the column
std::variant<int, double, char, std::string> rvalue_converted;
// using the right conversion based on type extracter
switch (current_col->first.col_type) {
case types::INT:
rvalue_converted = std::stoi(rvalue);
break;
case types::DOUBLE:
rvalue_converted = std::stod(rvalue);
break;
case types::CHAR:
rvalue_converted = rvalue[0];
break;
default:
rvalue_converted = rvalue;
break;
}
// creating a new column composed of all the entries in the original column that answer the condition
std::pair<column, std::vector<entry>> result_table_column;
result_table_column.first = current_col->first;
// going through all the entries in current column and comparing to rvalue
for (size_t j = 0; j < current_col->second.size(); ++j) {
if (interpreter::compare_values(current_col->second[j].value, rvalue_converted, op)) {
result_table_column.second.push_back(current_col->second[j]);
}
}
// inserting the newly created column into the result table if entries were found
if (!result_table_column.second.empty())
result_table.insert(result_table_column);
}
return result_table;
}
main.cpp
#include "table.hpp"
#include <iostream>
int main(int, char **) {
entry e1i(types::INT, {1});
entry e2i(types::INT, {5});
entry e1d(types::DOUBLE, {4.5});
entry e2d(types::DOUBLE, {19.34});
entry e1c(types::CHAR, {'H'});
entry e2c(types::CHAR, {'D'});
std::vector<entry> row_i = {e1i, e2i};
std::vector<entry> row_d = {e1d, e2d};
std::vector<entry> row_c = {e1c, e2c};
column ci(types::INT, "int_col");
ci.is_primary_key = true;
column cd(types::DOUBLE, "dob_col");
column cc(types::CHAR, "char_col");
std::map<column, std::vector<entry>> contents{{ci, row_i}, {cd, row_d}, {cc, row_c}};
std::string statement = "INT int_col PK, DOUBLE dob_col, CHAR ch_col, STR str_col";
table t(ci.name, contents);
std::string read_statement = "int_col > 1)dob_col < 10.0)char_col == D";
std::map<column, std::vector<entry>> result = t.read_table(read_statement);
// contents = t.get_contents();
// contents[cd] = {e1d, e2d};
// std::cout << std::get<double>(contents.at(cd)[0].value) << '\n';
std::cout << std::get<int>(result.at(ci)[0].value) << '\n';
return 0;
}