2 messages in com.mysql.lists.bugsRe: UDF with regex bug
FromSent OnAttachments
Vladislav Shulgin07 Mar 2001 02:02 
Sinisa Milivojevic07 Mar 2001 05:39 
Subject:Re: UDF with regex bug
From:Sinisa Milivojevic (sin@mysql.com)
Date:03/07/2001 05:39:51 AM
List:com.mysql.lists.bugs

Vladislav Shulgin writes:

Description:

For acceleration of spell check I needed in UDF function which be able to replace substrings based on regular expressions.

Such PHP functions are ereg_replace and preg_replace.

Trying to write UDF for MySQL I has confronted with a strange problem.

At compilation in a separate executed file my function works correctly and gives out correct results.

But when the same function I compile as UDF - I recive interesting effects.

MySQL at performance UDF through times breaks off connection. And when the connection does not break off - than MySQL gives out wrong results.

How-To-Repeat:

comple the UDF:

this is source:

#ifdef STANDARD #include <stdio.h> #include <string.h> #else #include <global.h> #include <my_sys.h> #endif #include <mysql.h> #include <m_ctype.h> #include <m_string.h> #include <regex.h>

#define NS 10

#ifdef HAVE_DLOPEN

char* reg_replace(char* string, char* pattern, char* replace, int icase, int extended);

extern "C" { my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message); char* ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message); char* eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); }

my_bool ereg_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message) { if (args->arg_count != 3 || args->arg_type[0] != STRING_RESULT || args->arg_type[1] != STRING_RESULT || args->arg_type[2] != STRING_RESULT ) { strcpy(message,"ereg_replace require 3 string arguments"); return 1; } return 0; }

char *ereg_replace(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { if (result = reg_replace(args->args[0], args->args[1], args->args[2], 0, 1)) { *length = strlen(result); return result; } else { *is_null = 1; return result; } }

my_bool eregi_replace_init(UDF_INIT *initid, UDF_ARGS *args, char *message) { if (args->arg_count != 3 || args->arg_type[0] != STRING_RESULT || args->arg_type[1] != STRING_RESULT || args->arg_type[2] != STRING_RESULT ) { strcpy(message,"eregi_replace require 3 string arguments"); return 1; } return 0; }

char *eregi_replace(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) { if (result = reg_replace(args->args[0], args->args[1], args->args[2], 1, 1)) { *length = strlen(result); return result; } else { *is_null = 1; return result; } }

char* reg_replace(char* string, char* pattern, char* replace, int icase, int extended) { char *result; regex_t* re; regmatch_t* subs; char *buf, *nbuf, *walkbuf; const char *walk; int buf_len; int pos, tmp, string_len, new_l; int err, copts = 0;

string_len = strlen(string);

if (icase) copts = REG_ICASE; if (extended) copts |= REG_EXTENDED;

re = (regex_t*) malloc(sizeof(regex_t)); subs = (regmatch_t*) malloc(sizeof(regmatch_t)*NS);

err = regcomp(re, pattern, copts);

if (err) { result = (char*) malloc(strlen(string) + 1); memcpy(result, string, strlen(string)+1); free(re); free(subs); return result; }

buf_len = 2 * string_len + 1; buf = (char*) malloc(buf_len * sizeof(char)); if (!buf) { regfree(re); free(re); free(subs); return 0; }

err = pos = 0; buf[0] = '\0';

while (!err) { err = regexec(re, &string[pos], (size_t) NS, subs, (pos ? REG_NOTBOL : 0)); if (!err) { new_l = strlen(buf) + subs[0].rm_so; walk = replace; while (*walk) if ('\\' == *walk && '0' <= walk[1] && '9' >= walk[1] && subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) { new_l += subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; walk += 2; } else { new_l++; walk++; }

if (new_l + 1 > buf_len) { buf_len = 1 + buf_len + 2 * new_l; nbuf = (char*) malloc(buf_len); memcpy (nbuf, buf, strlen(buf)+1); free(buf); buf = nbuf; } tmp = strlen(buf);

buf[strlen(buf) + subs[0].rm_so + 1] = 0; memcpy(buf + strlen(buf), &string[pos], subs[0].rm_so);

walkbuf = &buf[tmp + subs[0].rm_so]; walk = replace; while (*walk) if ('\\' == *walk && '0' <= walk[1] && '9' >= walk[1] && subs[walk[1] - '0'].rm_so > -1 && subs[walk[1] - '0'].rm_eo > -1) { tmp = subs[walk[1] - '0'].rm_eo - subs[walk[1] - '0'].rm_so; memcpy (walkbuf, &string[pos + subs[walk[1] - '0'].rm_so], tmp); walkbuf += tmp; walk += 2; } else *walkbuf++ = *walk++; *walkbuf = '\0';

if (subs[0].rm_so == subs[0].rm_eo) { if (subs[0].rm_so + pos >= string_len) break; new_l = strlen (buf) + 1; if (new_l + 1 > buf_len) { buf_len = 1 + buf_len + 2 * new_l; nbuf = (char*) malloc(buf_len * sizeof(char)); memcpy (nbuf, buf, strlen(buf)+1); free(buf); buf = nbuf; } pos += subs[0].rm_eo + 1; buf [new_l-1] = string [pos-1]; buf [new_l] = '\0'; } else { pos += subs[0].rm_eo; } } else { new_l = strlen(buf) + strlen(&string[pos]); if (new_l + 1 > buf_len) { buf_len = new_l + 1; nbuf = (char*) malloc(buf_len * sizeof(char)); memcpy (nbuf, buf, strlen(buf)+1); free(buf); buf = nbuf; } memcpy(buf + strlen(buf), &string[pos], strlen(&string[pos]) + 1); } } regfree(re); free(re); free(subs);

if (strlen(buf) > 0) { result = (char*) malloc(strlen(buf) + 1); memcpy(result, buf, strlen(buf) + 1); } return result; }

#endif /* HAVE_DLOPEN */

Fix:

<how to correct or work around the problem, if known (multiple lines)>

Submitter-Id: <submitter ID>

Originator:

Organization:

<organization of PR author (multiple lines)>

MySQL support: [none | licence | email support | extended email support ]

Synopsis: <synopsis of the problem (one line)>

Severity: <[ non-critical | serious | critical ] (one line)>

Priority: <[ low | medium | high ] (one line)>

Category: mysql

Class: <[ sw-bug | doc-bug | change-request | support ] (one line)>

Release: mysql-3.23.33 (Source distribution)

Server: /usr/local/mysql/bin/mysqladmin Ver 8.15 Distrib 3.23.33, for

pc-linux-gnu on i686

Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB

This software comes with ABSOLUTELY NO WARRANTY. This is free software,

and you are welcome to modify and redistribute it under the GPL license

Server version 3.23.33-log

Protocol version 10

Connection Localhost via UNIX socket

UNIX socket /tmp/mysql.sock

Uptime: 1 day 1 hour 1 min 19 sec

Threads: 10 Questions: 961 Slow queries: 0 Opens: 57 Flush tables: 1 Open tables: 8 Queries per second avg: 0.011

Environment:

<machine, os, target, libraries (multiple lines)>

System: Linux ukrbiz 2.4.1 #2 Thu Feb 15 16:16:51 EET 2001 i686 unknown

Architecture: i686

Some paths: /usr/bin/perl /usr/bin/make /usr/bin/gmake /usr/local/bin/gcc /usr/bin/cc

GCC: Reading specs from /usr/local/lib/gcc-lib/i686-pc-linux-gnu/2.95.2/specs

gcc version 2.95.2 19991024 (release)

Compilation info: CC='gcc' CFLAGS='' CXX='c++' CXXFLAGS='' LDFLAGS=''

LIBC:

lrwxrwxrwx 1 root root 13 Feb 13 15:16 /lib/libc.so.6 -> libc-2.2.1.so

-rwxr-xr-x 1 root root 1013224 Mar 22 2000 /lib/libc-2.1.3.so

-rwxr-xr-x 1 root root 4806279 Feb 13 15:16 /lib/libc-2.2.1.so

-rw-r--r-- 1 root root 24150394 Feb 13 15:14 /usr/lib/libc.a

-rw-r--r-- 1 root root 178 Feb 13 15:14 /usr/lib/libc.so

Configure command: ./configure --prefix=/usr/local/mysql --with-charset=win1251ukr --with-mysql d-ldflags=-rdynamic

It is very likely that the error is somewhere in your code.

Sorry, we do not have time to debug your code.

You can use DBUG... macros and run mysqld with --debug option and you can get a trace with info that might help you in pinpointing a problem. Or you can use gdb debugger to accomplish the same.

Regards,

Sinisa