I would normalize the string and use standard methods to convert it to fload/double. It is not an easy task but doable. (xxx.xxx) - negative number.
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stddef.h>
typedef enum
{
NF_DECIMAL_AUTO = 0,
NF_DECIMAL_FORCE_DOT,
NF_DECIMAL_FORCE_COMMA,
NF_DECIMAL_RIGHTMOST
} nf_decimal_mode_t;
static int nf_is_space_like(unsigned char c)
{
return (c == ' ' || c == '\t' || c == 0xA0);
}
static int nf_is_grouping(unsigned char c)
{
return nf_is_space_like(c) || c == '\'' || c == '_';
}
/* Normalize "locale-ish" number to ASCII "[-]digits[.digits][e[+-]digits]".
Returns bytes written (no NUL), or 0 on error. */
size_t normalize_float_string_mode(const char *src, char *dst, size_t dst_cap, nf_decimal_mode_t mode)
{
size_t retLen = 0;
int ok = 1;
size_t out = 0;
if(src == NULL || dst == NULL || dst_cap == 0)
{
ok = 0;
goto done;
}
#define NF_PUTC(ch) \
do \
{ \
if(out + 1 >= dst_cap) \
{ \
ok = 0; \
goto done; \
} \
dst[out++] = (char)(ch); \
} while(0)
/* Find exponent start: first e/E that comes after at least one digit. */
size_t i = 0, first_digit_pos = (size_t)-1, exp_pos = (size_t)-1;
for(; src[i] != '\0'; ++i)
{
char c = src[i];
if(first_digit_pos == (size_t)-1 && isdigit((unsigned char)c))
{
first_digit_pos = i;
}
if((c == 'e' || c == 'E') && first_digit_pos != (size_t)-1)
{
exp_pos = i;
break;
}
}
size_t body_end = (exp_pos != (size_t)-1) ? exp_pos : i;
/* Positions of last '.' and ',' before exponent. */
size_t last_dot = (size_t)-1, last_comma = (size_t)-1;
for(size_t j = 0; j < body_end; ++j)
{
if(src[j] == '.')
{
last_dot = j;
}
else if(src[j] == ',')
{
last_comma = j;
}
}
/* Decide which source char acts as decimal (0 => none). */
char decimal_src = 0;
if(mode == NF_DECIMAL_FORCE_DOT)
{
decimal_src = (last_dot != (size_t)-1) ? '.' : 0;
}
else if(mode == NF_DECIMAL_FORCE_COMMA)
{
decimal_src = (last_comma != (size_t)-1) ? ',' : 0;
}
else if(mode == NF_DECIMAL_RIGHTMOST)
{
/* FIX: handle "only one present" correctly (no sentinel compare). */
if(last_dot != (size_t)-1 && last_comma != (size_t)-1)
{
decimal_src = (last_dot > last_comma) ? '.' : ',';
}
else if(last_dot != (size_t)-1)
{
decimal_src = '.';
}
else if(last_comma != (size_t)-1)
{
decimal_src = ',';
}
else
{
decimal_src = 0;
}
}
else /* NF_DECIMAL_AUTO */
{
if(last_dot != (size_t)-1 && last_comma != (size_t)-1)
{
decimal_src = (last_dot > last_comma) ? '.' : ',';
}
else if(last_dot != (size_t)-1 || last_comma != (size_t)-1)
{
size_t sep_pos = (last_dot != (size_t)-1) ? last_dot : last_comma;
char sep_char = (last_dot != (size_t)-1) ? '.' : ',';
size_t k = sep_pos + 1, digits_right = 0;
while(k < body_end && isdigit((unsigned char)src[k]))
{
++digits_right;
++k;
}
size_t digits_left = 0;
k = sep_pos;
while(k > 0)
{
--k;
if(isdigit((unsigned char)src[k]))
{
++digits_left;
}
else if(src[k] != '.' && src[k] != ',' && !nf_is_grouping((unsigned char)src[k]))
{
break;
}
}
if(digits_right == 3 && digits_left > 0)
{
decimal_src = 0; /* looks like thousands grouping */
}
else
{
decimal_src = sep_char;
}
}
}
/* Skip leading spaces/tabs/NBSP */
size_t pos = 0;
while(nf_is_space_like((unsigned char)src[pos]))
{
++pos;
}
/* Optional sign or accounting parentheses */
{
int negative = 0;
if(src[pos] == '+')
{
++pos;
}
else if(src[pos] == '-')
{
negative = 1;
++pos;
}
else if(src[pos] == '(')
{
negative = 1;
++pos;
while(nf_is_space_like((unsigned char)src[pos]))
{
++pos;
}
}
if(negative)
{
NF_PUTC('-');
}
}
/* Copy body; abort if alpha appears before number "starts". */
{
int wrote_digit = 0;
int wrote_decimal = 0;
int number_started = 0;
for(size_t j = pos; j < body_end; ++j)
{
unsigned char c = (unsigned char)src[j];
if(isalpha(c))
{
/* If we see letters before the number begins (e.g., "e10" / ".e10"), give up on body. */
if(!number_started)
{
break;
}
else
{
/* Once number started, letters shouldn't appear in body; stop. */
break;
}
}
if(isdigit(c))
{
NF_PUTC(c);
wrote_digit = 1;
number_started = 1;
continue;
}
if(c == '.' || c == ',')
{
if(decimal_src && c == (unsigned char)decimal_src)
{
if(!wrote_decimal)
{
NF_PUTC('.');
wrote_decimal = 1;
number_started = 1;
}
}
/* else: treat as grouping -> drop */
continue;
}
if(nf_is_grouping(c) || c == ')')
{
continue; /* drop grouping and closing ')' */
}
/* Unknown non-alpha junk: ignore. */
}
/* Remove trailing '.' if no fraction followed */
if(wrote_decimal && out > 0 && dst[out - 1] == '.')
{
--out;
wrote_decimal = 0;
}
/* Exponent (normalize). */
if(exp_pos != (size_t)-1)
{
size_t j = exp_pos;
size_t out_before_exp = out; /* FIX: roll back whole exponent if bad */
int exp_has_digit = 0;
NF_PUTC('e');
++j;
if(src[j] == '+' || src[j] == '-')
{
NF_PUTC(src[j]);
++j;
}
while(src[j] != '\0')
{
if(isdigit((unsigned char)src[j]))
{
NF_PUTC(src[j]);
exp_has_digit = 1;
++j;
}
else if(nf_is_space_like((unsigned char)src[j]))
{
++j; /* allow spaces inside exponent */
}
else
{
break;
}
}
if(!exp_has_digit)
{
out = out_before_exp; /* drop 'e' and optional sign */
}
}
if(!wrote_digit)
{
NF_PUTC('0');
}
}
if(out >= dst_cap)
{
ok = 0;
goto done;
}
dst[out] = '\0';
retLen = out;
done:
if(!ok)
{
if(dst_cap > 0)
{
dst[0] = '\0';
}
retLen = 0;
}
return retLen;
#undef NF_PUTC
}
size_t normalize_float_string(const char *src, char *dst, size_t dst_cap)
{
return normalize_float_string_mode(src, dst, dst_cap, NF_DECIMAL_AUTO);
}
typedef struct
{
const char *input;
nf_decimal_mode_t mode;
const char *expected;
} TestCase;
static const char* mode_name(nf_decimal_mode_t m)
{
switch(m)
{
case NF_DECIMAL_AUTO: return "AUTO";
case NF_DECIMAL_FORCE_DOT: return "FORCE_DOT";
case NF_DECIMAL_FORCE_COMMA: return "FORCE_COMMA";
case NF_DECIMAL_RIGHTMOST: return "RIGHTMOST";
default: return "?";
}
}
int main(void)
{
const TestCase tests[] =
{
{ "1,234.56", NF_DECIMAL_AUTO, "1234.56" },
{ "1.234,56", NF_DECIMAL_AUTO, "1234.56" },
{ "1 234,56", NF_DECIMAL_AUTO, "1234.56" },
{ "12'345", NF_DECIMAL_AUTO, "12345" },
{ "(1,234.56)", NF_DECIMAL_AUTO, "-1234.56" },
{ "1,234e+3", NF_DECIMAL_FORCE_DOT, "1234e+3" },
{ "1,234e+3", NF_DECIMAL_FORCE_COMMA, "1.234e+3" },
{ "1,234e+3", NF_DECIMAL_RIGHTMOST, "1.234e+3" },
{ "1,234e+3", NF_DECIMAL_AUTO, "1234e+3" },
{ "1234", NF_DECIMAL_AUTO, "1234" },
{ " +1,234 ", NF_DECIMAL_FORCE_DOT, "1234" },
{ " ( 1 234 ) ", NF_DECIMAL_AUTO, "-1234" },
{ "1.234", NF_DECIMAL_FORCE_DOT, "1.234" },
{ "1.234", NF_DECIMAL_AUTO, "1234" },
{ "1,234", NF_DECIMAL_AUTO, "1234" },
{ "1.234", NF_DECIMAL_RIGHTMOST, "1.234" },
{ "1,234,567.89", NF_DECIMAL_AUTO, "1234567.89" },
{ "1.234.567,89", NF_DECIMAL_AUTO, "1234567.89" },
{ "1_234_567,89", NF_DECIMAL_FORCE_COMMA, "1234567.89" },
{ "1\t234,56", NF_DECIMAL_FORCE_COMMA, "1234.56" },
{ "1\x00A0 234,56", NF_DECIMAL_FORCE_COMMA, "1234.56" },
{ "00123,4500", NF_DECIMAL_FORCE_COMMA, "00123.4500" },
{ ".5", NF_DECIMAL_FORCE_DOT, ".5" },
{ ",5", NF_DECIMAL_FORCE_COMMA, ".5" },
{ "5,", NF_DECIMAL_FORCE_COMMA, "5" },
{ "5.", NF_DECIMAL_FORCE_DOT, "5" },
{ "1,234E-02", NF_DECIMAL_FORCE_DOT, "1234e-02" },
{ "1.234E+02", NF_DECIMAL_FORCE_DOT, "1.234e+02" },
{ "1.234E", NF_DECIMAL_FORCE_DOT, "1.234" },
{ "1,234E+", NF_DECIMAL_FORCE_DOT, "1234" }, /* fixed: no trailing 'e' */
{ "(1,23)", NF_DECIMAL_FORCE_COMMA, "-1.23" },
{ " +\t1'234'567,0 ", NF_DECIMAL_FORCE_COMMA, "1234567.0" },
{ "1,2,3,4", NF_DECIMAL_FORCE_COMMA, "1.234" },
{ "1.2.3.4", NF_DECIMAL_FORCE_DOT, "1.234" },
{ "1,23,456", NF_DECIMAL_AUTO, "123456" },
{ "000", NF_DECIMAL_AUTO, "000" },
{ "", NF_DECIMAL_AUTO, "0" },
{ " ", NF_DECIMAL_AUTO, "0" },
{ "e10", NF_DECIMAL_AUTO, "0" }, /* fixed: abort on alpha before number */
{ ".e10", NF_DECIMAL_FORCE_DOT, "0" }, /* fixed: same */
{ "1,234e- 3", NF_DECIMAL_FORCE_DOT, "1234e-3" },
{ "1,234e -3", NF_DECIMAL_FORCE_DOT, "1234" },
{ "1.234,567", NF_DECIMAL_RIGHTMOST, "1234.567" },
{ "1,234.567", NF_DECIMAL_RIGHTMOST, "1234.567" },
{ "1 234 567", NF_DECIMAL_AUTO, "1234567" },
{ "1_234_567", NF_DECIMAL_AUTO, "1234567" },
{ "1'234'567", NF_DECIMAL_AUTO, "1234567" },
{ " ( 14,50 ) ", NF_DECIMAL_FORCE_COMMA, "-14.50" }, /* fixed expected */
{ "999,999,999,999,999.999999", NF_DECIMAL_AUTO, "999999999999999.999999" },
{ "( .5 )", NF_DECIMAL_FORCE_DOT, "-.5" }
};
const size_t N = sizeof(tests) / sizeof(tests[0]);
char out[256];
size_t passCount = 0;
for(size_t i = 0; i < N; ++i)
{
const TestCase *t = &tests[i];
size_t n = normalize_float_string_mode(t->input, out, sizeof(out), t->mode);
int ok = (n == strlen(t->expected)) && (strcmp(out, t->expected) == 0);
if(ok)
{
++passCount;
printf("%2zu. PASS mode=%-10s in=\"%s\" out=\"%s\"\n",
i + 1, mode_name(t->mode), t->input, out);
}
else
{
printf("%2zu. FAIL mode=%-10s in=\"%s\"\n"
" exp=\"%s\"\n"
" got=\"%s\" (len %zu)\n",
i + 1, mode_name(t->mode), t->input, t->expected, out, n);
}
}
printf("\nSummary: %zu / %zu tests passed\n", passCount, N);
return (passCount == N) ? 0 : 1;
}
https://godbolt.org/z/GWvneKoTq
stdtod_l.'.'versus','(which could be handled with a string substitution), what other numeric locale concerns do you have?#define _GNU_SOURCEand then it became available!setlocale(LC_NUMERIC, "C"). That would preserve i18n in other aspects, although it would indeed affect numeric handling everywhere, not just in your config file parser.