This function is about the fact that a std::wstring was used in another cpp file in order to be able to read strings with German umlauts from the console. Since it is difficult to get wstrings into a text file when a std::ofstream is already accessing the text file, this wstring was converted into a normal std::string using utf8.h. The 16-bit characters that represented umlauts are now 2 cryptic characters (which is logical, I know). A ß becomes ß, an ü becomes ü, as you often see it in everyday life. This is corrected with this .h and .cpp files.
My question is: Could you please review this function and say what you think of the code? I'm asking because the code is copying one vector to a second one a lot, and, as you see in the last lines, I need to get rid of ‘left-over’ null characters. I want to include the header and cpp file more often, so I want the two to be good.
handle_German_umlauts.cpp
#include "handle_German_umlauts.h"
Umlaute_korrigieren::Umlaute_korrigieren()
{
}
Umlaute_korrigieren::~Umlaute_korrigieren()
{
}
std::vector<char> Umlaute_korrigieren::_std__String_to_std__vectorChar_for_ANSI(std::string stdstring)
{
std::vector<char> CString(stdstring.c_str(), stdstring.c_str() + stdstring.size() + 1);
std::vector<char> copy(stdstring.c_str(), stdstring.c_str() + stdstring.size() + 1);
for (size_t i = (size_t)0; i < CString.size() - (size_t)1; i++)
{
if (CString[i] == -61 && CString[i + 1] == -97) // Pseudo-ß gefunden
{
copy[i] = '\xDF'; //ß ist DF(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i+(size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -68) // Pseudo-ü gefunden
{
copy[i] = '\xFC'; //ü ist FC(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -92) // Pseudo-ä gefunden
{
copy[i] = '\xE4'; //ä ist E4(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -74) // Pseudo-ö gefunden
{
copy[i] = '\xF6'; //ö ist F6(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -124) // Pseudo-Ä gefunden
{
copy[i] = '\xC4'; //Ä ist C4(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -106) // Pseudo-Ö gefunden
{
copy[i] = '\xD6'; //Ö ist D6(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
if (CString[i] == -61 && CString[i + 1] == -100) // Pseudo-Ü gefunden
{
copy[i] = '\xDC'; //Ü ist DC(hex) in ANSI
for (size_t j = copy.size() - (size_t)1; j > (i + (size_t)1); j--) // umkopieren
{
copy[j - 1] = CString[j];
}
CString = copy;
}
}
// crop unnecessary ‘\0’s
size_t _0Counter = 0;
for (size_t i = (size_t)0; i < CString.size(); i++)
{
if (CString[i] == '\0')
{
_0Counter += (size_t)1;
}
}
size_t original = CString.size() - (size_t)1; // because the vector gets smaller due to the deletion and the for loop is always reevaluating
size_t wie_weit = CString.size() - _0Counter;
for (size_t i = original; i > wie_weit; i--)
{
CString.erase(CString.begin() + i - 1);
}
return CString;
}
The handle_German_umlauts.h
#ifndef HANDLE_GERMAN_UMLAUTS_H_
#define HANDLE_GERMAN_UMLAUTS_H_
#include <vector>
#include <string>
class Umlaute_korrigieren
{
public:
Umlaute_korrigieren();
~Umlaute_korrigieren();
std::vector<char> _std__String_to_std__vectorChar_for_ANSI(std::string);
private:
};
#endif // !HANDLE_GERMAN_UMLAUTS_H_
The function is called as follows:
std::string Strasse_als_stdstring;
utf8::utf16to8(physical_address.street.begin(), physical_address.street.end(), back_inserter(Strasse_als_stdstring));
std::vector<char> korrigierte_Strasse = Uk._std__String_to_std__vectorChar_for_ANSI(Strasse_als_stdstring);
for (size_t h = (size_t)0; h < korrigierte_Strasse.size() - (size_t)3; h++) // write to txt. -3, so that \r\n\0 aren't printed.
{
fs8 << korrigierte_Strasse[h];
}
fs8 << " " << physical_address.house_number << std::endl;
where physical_address.street is the std::wstring (mentioned above), and the for loop serves to write the chars in the textfile (std::ofstream fs8).
