0

RHEL6

I'm trying to implement a perl split funciton in a C subroutine which dynamically builds the array of strings. My attempt fails with a segfault. But it does not fail if I comment out the printf statement in the for loop (perhaps implying that the segfault is in where its getting built as opposed to how)

Here it is...

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);


void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc"; 
  char **arr;


  arrsz=split(str,arr);


  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  arr = malloc(sizeof(char **));
  arr[0] = malloc(1);
  arr[0] = '\0';

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

I think the problem is in how I'm passing "arr" to the split function or how it's being received and used in the function. I say this because if I move the body of the function to main, it works there.

I tried dealing with arr inside the functions as it it was a (char ***), but that didn't work.

Can a C expert out there set me straight ?

9
  • 2
    regarding: void main(int argc, char* argv[]) regardless of what visual studio will allow, all valid signatures for the main() function have a return type of int Commented Apr 6, 2018 at 16:34
  • 2
    arr[0] = malloc(1); arr[0] = '\0'; looks highly suspect. Commented Apr 6, 2018 at 16:35
  • when the parameters to main() are not going to be used, the correct signature is: int main( void ) Commented Apr 6, 2018 at 16:35
  • regarding: ` arr[0] = malloc(1); arr[0] = '\0';` this creates a pointer to a one byte allocation then immediately overlays that pointer with a NUL byte. Probably not what you want Commented Apr 6, 2018 at 16:40
  • when calling any of the heap allocation functions (malloc calloc realloc), 1) the return type is void* so can be assigned to any pointer. Casting just clutters the code, making it more difficult to understand, debug, etc 2) always check (!=NULL) the returned value to assure the operation was successful. When calling realloc() always assign to a 'temp pointer and check for NULL. Assigning directory to the target variable will result in a memory leak when realloc fails Commented Apr 6, 2018 at 16:44

5 Answers 5

2

The main error is that you should pass a pointer to the strings list to the split function, not the strings list itself, so you should use an ***arr:

int split(char *str, char ***arr);

And you should use & to pass the pointer in main:

...
arrsz=split(str,&arr);
...

In the function you could use a double pointer to avoid confusion and at the end assign that pointer to the parameter:

int split(char *str, char ***arrreturn) {
     char **arr;    //Use this strings list to add the strings

     ...

     *arreturn = arr;
     return(arrsz);
}

-You should not call realloc anytime you need to insert a string, but you could oversize it and increment its dimension if you need.

-I cannot see the need of assign '\0' at the end of the list if you have a variable with the length

-You can use strdup instead of malloc-strcpy funcs:

char *first = "ciao";
char *str = malloc(strlen(first) * sizeof(char));
strcpy(str, first);

Is equal to:

char *first = "ciao";
char *str = strdup(first);

I corrected your code:

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *str, char ***arrreturn);


void main(int argc, char *argv[]) {
    int x;
    int arrsz;
    char str[] = "aaa:bbb:ccc";
    char **arr;


    arrsz = split(str, &arr);


    for (x = 0; x < arrsz; x++) {
        printf("%s\n", arr[x]);
    }

    exit(0);
}

/***********************************/
int split(char *str, char ***arrreturn) {

    int arrsz = 1;
    int len = 0;
    char delim[2] = ":";
    char *tok;
    char **arr;

    arr = malloc(sizeof(char **));

    tok = strtok(str, delim);
    while (tok != NULL) {
        len++;
        if (len >= arrsz) {
            arrsz *= 2;
            arr = realloc(arr, arrsz * sizeof(char **));
        }
        arr[len - 1] = strdup(tok);
        tok = strtok(NULL, delim);
    }
    *arrreturn = arr;
    return (len);
}
Sign up to request clarification or add additional context in comments.

1 Comment

char **arr; arr = malloc(sizeof(char **));... realloc(arr, arrsz * sizeof(char **)) is not using the matching type to determine size (One too many *). Use char **arr = malloc(sizeof *arr);. It is easier to code correctly, review and maintain.
2

There are a few bugs. I've annotated and [partially] fixed bugs. It will still segfault. I added a refactored version that will work correctly.

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int split(char *s, char **arr);

void main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

#if 1
#endif

  arrsz=split(str,arr);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  exit(0);
}

/***********************************/
int split(char *str, char **arr) {

  int arrsz=0;
  char delim[2] = ":";
  char *tok;

  // NOTE/BUG: this function only changes arr within the function and does
  // _not_ propagate it to the caller

  arr = malloc(sizeof(char **));

  // NOTE/BUG: this is replaced in the loop and leaks memory
#if 0
  arr[0] = malloc(1);
  arr[0] = '\0';
#endif

  tok = strtok(str,delim);
  while(tok != NULL) {
    arrsz++;

    // NOTE/BUG: this is incorrect -- it only adds a byte instead of another
    // pointer (i.e. it doesn't allocate enough)
#if 0
    arr = (char **)realloc(arr,(arrsz*sizeof(char *))+1);
#else
    arr = (char **)realloc(arr,sizeof(char *) * (arrsz + 1));
#endif

#if 0
    arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(arr[arrsz-1],tok);
#else
    arr[arrsz-1] = strdup(tok);
#endif

    // NOTE/BUG: this is wrong and leaks memory
#if 0
    arr[arrsz]=malloc(1);
    arr[arrsz]='\0';
#endif

    tok = strtok(NULL,delim);
  }

#if 1
  arr[arrsz] = NULL;
#endif

  return(arrsz);
}

But, as written, your function doesn't update caller's value of arr.

To fix your function, split would need arr to be defined as a "three star" pointer (e.g. char ***arr) which is considered cumbersome and very bad practice.

So, a better/simpler solution is to refactor the function and pass back arr as return (e.g. char **split(char *str,int *sizrtn):

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char **split(char *s, int *arsiz);

int main(int argc, char* argv[])
{
  int x;
  int arrsz;
  char str[]="aaa:bbb:ccc";
  char **arr;

  arrsz = 0;
  arr = split(str,&arrsz);

  for(x=0;x<arrsz;x++) {
    printf("%s\n",arr[x]);
  }

  return 0;
}

/***********************************/
char **split(char *str, int *sizrtn)
{
  int arrsz=0;
  const char *delim = ":";
  char *tok;
  char **arr = NULL;

  tok = strtok(str,delim);
  while (tok != NULL) {
    arrsz++;
    arr = realloc(arr,sizeof(char *) * (arrsz + 1));

    arr[arrsz - 1] = strdup(tok);

    tok = strtok(NULL,delim);
  }

  if (arr == NULL)
    arr = malloc(sizeof(*arr));

  arr[arrsz] = NULL;

  *sizrtn = arrsz;

  return arr;
}

Comments

0

To modify an object in the caller's scope you must pass a pointer to the object - so you need one more level of indirection. There is also at least one semantic error in the implementation - assigning '\0' to the pointer returned by malloc(), will both invalidate the pointer and cause a memory leak.

Change split() prototype to:

int split( char* s, char*** arr ) ;

Then call it thus:

arrsz = split( str, &arr ) ; 

And change the implementation:

int split( char* str, char*** arr ) 
{
  int arrsz = 0 ;
  char delim[2] = ":" ;
  char* tok ;

  *arr = malloc(sizeof(char**));
  *arr[0] = malloc(1);
  **arr[0] = '\0';         // <<< This is fixed too

  tok = strtok( str, delim ) ;
  while( tok != NULL ) 
  {
    arrsz++;
    *arr = (char **)realloc(*arr,(arrsz*sizeof(char *))+1);
    *arr[arrsz-1] = malloc((sizeof(char)*strlen(tok))+1);
    strcpy(*arr[arrsz-1],tok);
    *arr[arrsz]=malloc(1);
    *arr[arrsz]='\0';

    tok = strtok(NULL,delim);
  }

  return(arrsz);
}

There may be other errors I have not spotted, but that is fundamental. Best from hereon debugged using a debugger rather then Q&A.

3 Comments

I still get a segfault, only this time I get it even if I comment out the printf. IOW, seems like its in the function this time.
@daveg : As I said, the code has multiple issues, but how to return an array to a caller was the only issue you have asked about. To debug the code I'd have to build and run it, but rather then using SO as a debugging service, you should learn to debug. If I did it for you, I'd use a debugger - you can do that, and it would serve you well to learn how. Moreover, to see why the code now fails we'd need your modified code - which may differ from mine - best to post a new question - this one is answered.
Ok, I took a look at it in a debugger, mu conclusion is that it is so broken, I'd suggest starting again. Since that is not what the is question is about, I am nor suggesting anything (though other answers have done so). A solution using strdup() might help simplify things.
0

the following proposed code:

  1. cleanly compiles
  2. performs the desired functionality
  3. properly checks for errors from system functions
  4. eliminates any need to use a *** parameter -- google three star programer as to why that is bad
  5. does not include header files those contents are not used

and now, the proposed code:

//#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char ** split(char *str, size_t *arrsz);


int main( void )
{
    size_t x;
    size_t arrsz;
    char str[]="aaa:bbb:ccc";

    char **arr=split(str,&arrsz);


    for(x=0;x<arrsz;x++)
    {
        printf("%s\n",arr[x]);
    }

    exit(0);
}


/***********************************/
char ** split(char *str, size_t *arrsz)
{
    char **arr = NULL;
    size_t count = 0;

    char delim[2] = ":";
    char *tok;

    tok = strtok(str,delim);
    while(tok != NULL)
    {
        count++;
        char **temp = realloc(arr,(count*sizeof(char *)));
        if( !temp )
        {
            perror( "malloc failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        arr = temp;

        arr[count-1] = strdup( tok );
        if( !arr[count-1] )
        {
            perror( "strdup failed" );
            // perform cleanup and
            free( arr );
            exit( EXIT_FAILURE );
        }

        tok = strtok(NULL,delim);
    }

    *arrsz = count;
    return( arr );
}

Comments

0

OP's code does not return the allocated memory assigned to arr

int split(char *str, char **arr) {
  ...
  // Memory allocated and assigned to local `arr`
  // Yet `arr` is not returned.
  // Calling code never sees the result of this assignment.
  arr = malloc(sizeof(char **));
  ...
  return(arrsz);
}

Instead, I took a whole new approach to mimic split /PATTERN/,EXPR.

I really wanted to avoid all the ** and *** programming.

IMO, a split() should not change the expression so directly using strtok() is out. A common implementation of strtok() effectively does a strspn() and strcspsn(), so coding those directly avoids the strtok().

The below returns a string list type. Various other function signatures could be used, this return type seemed natural for OP's goal. Another solution might return a NULL terminated array of char * pointers.

When memory allocations fails, it is detected and then code calls TBD_Code();. Unclear how OP wants to handle that. Code could print a message and exit or attempt some recovery.

#include <stdlib.h>
#include <string.h>

typedef struct {
  size_t n;
  char **strings;
} string_list;

string_list split(const char *pattern, const char *expr) {
  string_list list = { 0, NULL };
  size_t length;

  // Find length of initial matching characters
  while ((length = strspn(expr, pattern)), expr[length]) {
    // Skip leading characters from `expr` that match the pattern
    expr += length;
    // Find length of characters NOT from the pattern
    length = strcspn(expr, pattern);
    // Allocate for 1 more pointer
    void *tmp = realloc(list.strings, sizeof *(list.strings) * (list.n + 1));
    if (tmp == NULL) TBD_Code();
    list.strings = tmp;
    //Allocate for the token and save it
    list.strings[list.n] = malloc(length + 1u);
    if (list.strings[list.n] == 0) TBD_Code();
    memcpy(list.strings[list.n], expr, length);
    list.strings[list.n][length] = '\0';
    // Advance
    list.n++;
    expr += length;
  }
  return list;
}

void string_list_free(string_list list) {
  if (list.strings) {
    for (size_t i = 0; i < list.n; i++) {
      free(list.strings[i]);
    }
    free(list.strings);
  }
}

Test code

#include <stdio.h>

void print_string_list(string_list list) {
  for (size_t i = 0; i < list.n; i++) {
    printf("%zu: <%s>\n", i, list.strings[i]);
  }
  string_list_free(list);
}

int main(void) {
  print_string_list(split(":", "aaa:bbb:ccc"));
  print_string_list(split(":b", "aaa:bbb:ccc"));
  print_string_list(split("a:", "aaa:bbb:ccc"));
  print_string_list(split(":c", "aaa:bbb:ccc"));
}

Output

0: <aaa>
1: <bbb>
2: <ccc>
0: <aaa>
1: <ccc>
0: <bbb>
1: <ccc>
0: <aaa>
1: <bbb>

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.