C: implementing str_replace to replace all occurrences of substring (zz)

C: implementing str_replace to replace all occurrences of substring

Last time, I showed how to replace PHP's str_replace in C.

//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]
The previous code was only replacing one occurrence of substr which might be sufficient in most cases... but will not do the job when the pattern appears more than once within the original string.

This new piece of code will replace ALL occurrences of substring by the replacement pattern.

The following bit of code might miss some optimization, for instance we could first check how many times the pattern is found and then do only one big allocation and enter another loop to replace all patterns, but for now, this is what I came with.

//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]
Maybe at a later stage I will come with another version a bit more optimized.

  1. /**
  2.  * vim: tabstop=2:shiftwidth=2:softtabstop=2:expandtab
  3.  *
  4.  * str_replace.c implements a str_replace PHP like function
  5.  * Copyright (C) 2010  chantra <chantra__A__debuntu__D__org>
  6.  *
  7.  * This program is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU General Public License
  9.  * as published by the Free Software Foundation; either version 2
  10.  * of the License, or (at your option) any later version.
  11.  *
  12.  * This program is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15.  * GNU General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU General Public License
  18.  * along with this program; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  20.  *
  21.  * gcc -o str_replace_all str_replace_all.c
  22.  */
  23.  
  24. #include <stdio.h>
  25. #include <string.h>
  26. #include <stdlib.h>
  27.  
  28. void usage(char *p){
  29.   fprintf(stderr, "USAGE: %s string tok replacement\n", p );
  30. }
  31.  
  32. char *
  33. str_replace ( const char *stringconst char *substr, const char*replacement ){
  34.   char *tok = NULL;
  35.   char *newstr = NULL;
  36.   char *oldstr = NULL;
  37.   /* if either substr or replacement is NULL, duplicate string a let caller handle it */
  38.   if ( substr == NULL || replacement == NULL ) return strdup (string);
  39.   newstr = strdup (string);
  40.   while ( (tok = strstr ( newstr, substr ))){
  41.     oldstr = newstr;
  42.     newstr = malloc ( strlen ( oldstr ) - strlen ( substr ) + strlen (replacement ) + 1 );
  43.     /*failed to alloc mem, free old string and return NULL */
  44.     if ( newstr == NULL ){
  45.       free (oldstr);
  46.       return NULL;
  47.     }
  48.     memcpy ( newstr, oldstr, tok - oldstr );
  49.     memcpy ( newstr + (tok - oldstr), replacement, strlen ( replacement ));
  50.     memcpy ( newstr + (tok - oldstr) + strlen( replacement ), tok +strlen ( substr ), strlen ( oldstr ) - strlen ( substr ) - ( tok - oldstr) );
  51.     memset ( newstr + strlen ( oldstr ) - strlen ( substr ) + strlen (replacement ) , 01 );
  52.     free (oldstr);
  53.   }
  54.   return newstr;
  55. }
  56.  
  57. int main( int argc, char **argv ){
  58.   char *ns = NULL;
  59.   if( argc != 4 ) {
  60.     usage(argv[0]);
  61.     return 1;
  62.   }
  63.   ns = str_replace( argv[1], argv[2], argv[3] );
  64.   fprintf( stdout, "Old string: %s\nTok: %s\nReplacement: %s\nNew string: %s\n", argv[1], argv[2], argv[3], ns );
  65.   free(ns);
  66.   return 0;
  67. }

Will output:

$ gcc -o str_replace_all str_replace_all.c
$ ./str_replace_all "(uid=%u/%u)" "%u" chantra
Old string: (uid=%u/%u)
Tok: %u
Replacement: chantra
New string: (uid=chantra/chantra)

update on your code

FYI, your code works well as long as the thing you're replacement string doesn't include the needle value, otherwise the code is trapped in an infinite loop.

ie:

./str_replace_all foobar bar bar2

*should* have returned foobar2 but instead it gets stuck in an infinite loop building foobar2222222222222222222222222...... because the strstr() check in the while() condition starts looking at the beginning of the haystack string (or new haystack) every time, so 'bar' would get replaced with 'bar2' over and over again.

A few simple changes fixes the problem:

  1.  32 char *
  2.  33 str_replace ( const char *string, const char *substr, const char *replacement ){
  3.  34   char *tok = NULL;
  4.  35   char *newstr = NULL;
  5.  36   char *oldstr = NULL;
  6. *37   char *strhead = NULL;
  7.  38   /* if either substr or replacement is NULL, duplicate string a let caller handle it */
  8.  39   if ( substr == NULL || replacement == NULL ) return strdup (string);
  9.  40   newstr = strdup (string);
  10.  41  
  11. *42   strhead = newstr ;
  12. *43   while ( (tok = strstr ( strhead, substr )) ) {
  13. *44     strhead = tok ;

Line 37 adds a new pointer
Line 42 points strhead to the beginning of your new haystack string
Line 43 changes your strstr() call to use strhead
Line 44 points strhead to the token found from strstr()

Thanks for the code!

yet another version

Tks Landouglas,

Yeah, I forgot this case :s which is really bad.

Your solution is also missing something and will only replace 1 occurrence within the string.

Here is a revised solution:

  1. char *
  2. str_replace ( const char *stringconst char *substr, const char*replacement ){
  3.   char *tok = NULL;
  4.   char *newstr = NULL;
  5.   char *oldstr = NULL;
  6.   char *head = NULL;
  7.  
  8.   /* if either substr or replacement is NULL, duplicate string a let caller handle it */
  9.   if ( substr == NULL || replacement == NULL ) return strdup(string);
  10.   newstr = strdup (string);
  11.   head = newstr;
  12.   while ( (tok = strstr ( head, substr ))){
  13.     oldstr = newstr;
  14.     newstr = malloc ( strlen ( oldstr ) - strlen ( substr ) +strlen ( replacement ) + 1 );
  15.     /*failed to alloc mem, free old string and return NULL */
  16.     if ( newstr == NULL ){
  17.       free (oldstr);
  18.       return NULL;
  19.     }
  20.     memcpy ( newstr, oldstr, tok - oldstr );
  21.     memcpy ( newstr + (tok - oldstr), replacement, strlen (replacement ) );
  22.     memcpy ( newstr + (tok - oldstr) + strlen( replacement ), tok +strlen ( substr ), strlen ( oldstr ) - strlen ( substr ) - ( tok -oldstr ) );
  23.     memset ( newstr + strlen ( oldstr ) - strlen ( substr ) +strlen ( replacement ) , 01 );
  24.     /* move back head right after the last replacement */
  25.     head = newstr + (tok - oldstr) + strlen( replacement );
  26.     free (oldstr);
  27.   }
  28.   return newstr;
  29. }

// Here is the code for unicode strings!


int mystrstr(wchar_t *txt1,wchar_t *txt2)
{
    wchar_t *posstr=wcsstr(txt1,txt2);
    if(posstr!=NULL)
    {
        return (posstr-txt1);
    }else
    {
        return -1;
    }
}

// assume: supplied buff is enough to hold generated text
void StringReplace(wchar_t *buff,wchar_t *txt1,wchar_t *txt2)
{
    wchar_t *tmp;
    wchar_t *nextStr;
    int pos;

    tmp=wcsdup(buff);

    pos=mystrstr(tmp,txt1);
    if(pos!=-1)
    {
        buff[0]=0;
        wcsncpy(buff,tmp,pos);
        buff[pos]=0;

        wcscat(buff,txt2);

        nextStr=tmp+pos+wcslen(txt1);

        while(wcslen(nextStr)!=0)
        {
            pos=mystrstr(nextStr,txt1);

            if(pos==-1)
            {
                wcscat(buff,nextStr);
                break;
            }

            wcsncat(buff,nextStr,pos);
            wcscat(buff,txt2);

            nextStr=nextStr+pos+wcslen(txt1);   
        }
    }

    free(tmp);
}
//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]




IS2120@CSDN.BG57IV3 String replace program in C /** ****************************************************| * String replace Program | ****************************************************| * Takes three string input from the user * Replaces all the occurances of the second string * with the third string from the first string * @author Swashata */ /** Include Libraries */ #include <stdio.h> #include <stdlib.h> #include <string.h> /** Define the max char length */ #define MAX_L 4096 /** Prototypes */ voidreplace (char*, char*, char*); intmain(void) { charo_string[MAX_L], s_string[MAX_L], r_string[MAX_L]; //String storing variables printf("Please enter the original string (max length %d characters): ", MAX_L); fflush(stdin); gets(o_string); printf("\nPlease enter the string to search (max length %d characters): ", MAX_L); fflush(stdin); gets(s_string); printf("\nPlease enter the replace string (max length %d characters): ", MAX_L); fflush(stdin); gets(r_string); printf("\n\nThe Original string\n*************************************\n"); puts(o_string); replace(o_string, s_string, r_string); printf("\n\nThe replaced string\n*************************************\n"); puts(o_string); return0; } /** * The replace function * * Searches all of the occurrences using recursion * and replaces with the given string * @param char * o_string The original string * @param char * s_string The string to search for * @param char * r_string The replace string * @return void The o_string passed is modified */ voidreplace(char* o_string, char* s_string, char* r_string) { //a buffer variable to do all replace things charbuffer[MAX_L]; //to store the pointer returned from strstr char* ch; //first exit condition if(!(ch = strstr(o_string, s_string))) return; //copy all the content to buffer before the first occurrence of the search string strncpy(buffer, o_string, ch-o_string); //prepare the buffer for appending by adding a null to the end of it buffer[ch-o_string] = 0; //append using sprintf function sprintf(buffer+(ch - o_string), "%s%s", r_string, ch + strlen(s_string)); //empty o_string for copying o_string[0] = 0; strcpy(o_string, buffer); //pass recursively to replace other occurrences returnreplace(o_string, s_string, r_string); }IS2120@CSDN.BG57IV3

//z 2013-10-19 18:04:32 IS2120@BG57IV3 T2080877905.K.F2560461818[T2,L61,R2,V7]

posted @ 2013-10-19 18:02  BiG5  阅读(238)  评论(0编辑  收藏  举报