mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
# fallback to unoptimized version for 4.0.4 release
This commit is contained in:
parent
be895bcb96
commit
63e6b0b5bf
2 changed files with 94 additions and 152 deletions
|
@ -23,162 +23,47 @@
|
|||
#include <ctype.h>
|
||||
#include "php_string.h"
|
||||
|
||||
/* faster, but obfuscated, all operations have a cost of 1 */
|
||||
static int fastest_levdist(const char *s1, const char *s2)
|
||||
/* reference implementation, only optimized for memory usage, not speed */
|
||||
static int reference_levdist(const char *s1, int l1,
|
||||
const char *s2, int l2,
|
||||
int cost_ins, int cost_rep, int cost_del )
|
||||
{
|
||||
register char *p1,*p2;
|
||||
register int i,j,n;
|
||||
int l1=0,l2=0;
|
||||
char r[512];
|
||||
const char *tmp;
|
||||
|
||||
/* skip equal start sequence, if any */
|
||||
while(*s1==*s2) {
|
||||
if(!*s1) break;
|
||||
s1++; s2++;
|
||||
}
|
||||
int *p1,*p2,*tmp;
|
||||
int i1,i2,c0,c1,c2;
|
||||
|
||||
/* if we already used up one string, then
|
||||
the result is the length of the other */
|
||||
if(*s1=='\0') return strlen(s2);
|
||||
if(*s2=='\0') return strlen(s1);
|
||||
if(l1==0) return l2*cost_ins;
|
||||
if(l2==0) return l1*cost_del;
|
||||
|
||||
/* length count */
|
||||
while(*s1++) l1++;
|
||||
while(*s2++) l2++;
|
||||
|
||||
/* cut of equal tail sequence, if any */
|
||||
while(*--s1 == *--s2) {
|
||||
l1--; l2--;
|
||||
}
|
||||
|
||||
p1=malloc(l2*sizeof(int));
|
||||
p2=malloc(l2*sizeof(int));
|
||||
|
||||
/* reset pointers, adjust length */
|
||||
s1-=l1++;
|
||||
s2-=l2++;
|
||||
|
||||
p1[0]=(s1[0]==s2[0])?0:cost_rep;
|
||||
|
||||
/* possible dist to great? */
|
||||
if(abs(l1-l2)>=255) return -1;
|
||||
for(i2=1;i2<l2;i2++)
|
||||
p1[i2]=i2*cost_ins;
|
||||
|
||||
/* swap if l2 longer than l1 */
|
||||
if(l1<l2) {
|
||||
tmp=s1; s1=s2; s2=tmp;
|
||||
l1 ^= l2; l2 ^= l1; l1 ^= l2;
|
||||
}
|
||||
for(i1=1;i1<l1;i1++)
|
||||
{
|
||||
p2[0]=i1*cost_del;
|
||||
for(i2=1;i2<l2;i2++)
|
||||
{
|
||||
c0=p1[i2-1]+((s1[i1]==s2[i2])?0:cost_rep);
|
||||
c1=p1[i2]+cost_del; if(c1<c0) c0=c1;
|
||||
c2=p2[i2-1]+cost_ins; if(c2<c0) c0=c2;
|
||||
p2[i2]=c0;
|
||||
}
|
||||
tmp=p1; p1=p2; p2=tmp;
|
||||
}
|
||||
|
||||
|
||||
/* fill initial row */
|
||||
n=1;
|
||||
for(i=0,p1=r;i<l1;i++,*p1++=n++,p1++) {/*empty*/}
|
||||
|
||||
/* calc. rowwise */
|
||||
for(j=1;j<l2;j++) {
|
||||
/* init pointers and col#0 */
|
||||
p1 = r + !(j&1);
|
||||
p2 = r + (j&1);
|
||||
n=*p1+1;
|
||||
*p2++=n;p2++;
|
||||
s2++;
|
||||
|
||||
/* foreach column */
|
||||
for(i=1;i<l1;i++) {
|
||||
if(*p1<n) n=*p1+(*(s1+i)!=*(s2)); /* replace cheaper than delete? */
|
||||
p1++;
|
||||
if(*++p1<n) n=*p1+1; /* insert cheaper then replace ? */
|
||||
*p2++=n++; /* update field and cost for next col's delete */
|
||||
p2++;
|
||||
}
|
||||
}
|
||||
c0=p1[l2-1];
|
||||
|
||||
/* return result */
|
||||
return n-1;
|
||||
free(p1);
|
||||
free(p2);
|
||||
|
||||
return c0;
|
||||
}
|
||||
|
||||
|
||||
static int weighted_levdist( const char *s1
|
||||
, const char *s2
|
||||
, const int cost_ins
|
||||
, const int cost_rep
|
||||
, const int cost_del
|
||||
)
|
||||
{
|
||||
register int *p1,*p2;
|
||||
register int i,j,n,c;
|
||||
int l1=0,l2=0;
|
||||
int r[512];
|
||||
const char *tmp;
|
||||
|
||||
/* skip equal start sequence, if any */
|
||||
while(*s1==*s2) {
|
||||
if(!*s1) break;
|
||||
s1++; s2++;
|
||||
}
|
||||
|
||||
/* if we already used up one string, then
|
||||
the result is the length of the other */
|
||||
if(*s1=='\0') return strlen(s2);
|
||||
if(*s2=='\0') return strlen(s1);
|
||||
|
||||
/* length count */
|
||||
while(*s1++) l1++;
|
||||
while(*s2++) l2++;
|
||||
|
||||
/* cut of equal tail sequence, if any */
|
||||
while(*--s1 == *--s2) {
|
||||
l1--; l2--;
|
||||
}
|
||||
|
||||
|
||||
/* reset pointers, adjust length */
|
||||
s1-=l1++;
|
||||
s2-=l2++;
|
||||
|
||||
|
||||
/* possible dist to great? */
|
||||
if(abs(l1-l2)>=255) return -1;
|
||||
|
||||
/* swap if l2 longer than l1 */
|
||||
if(l1<l2) {
|
||||
tmp=s1; s1=s2; s2=tmp;
|
||||
l1 ^= l2; l2 ^= l1; l1 ^= l2;
|
||||
}
|
||||
|
||||
if((l1==1)&&(l2==1)) {
|
||||
n= cost_del+cost_ins;
|
||||
return n<cost_rep?n:cost_rep;
|
||||
}
|
||||
|
||||
/* fill initial row */
|
||||
n=cost_ins;
|
||||
for(i=0,p1=r;i<l1;i++,*p1++=n,p1++) {n+=cost_ins;}
|
||||
|
||||
/* calc. rowwise */
|
||||
for(j=1;j<l2;j++) {
|
||||
/* init pointers and col#0 */
|
||||
p1 = r + !(j&1);
|
||||
p2 = r + (j&1);
|
||||
n=*p1+cost_del;
|
||||
*p2++=n;p2++;
|
||||
s2++;
|
||||
|
||||
/* foreach column */
|
||||
for(i=1;i<l1;i++) {
|
||||
c = *p1; if(*(s1+i)!=*(s2)) c+=cost_rep;
|
||||
if(c<n) n=c; /* replace cheaper than delete? */
|
||||
p1++;
|
||||
c= *++p1+cost_ins;
|
||||
if(c<n) n=c; /* insert cheaper then replace ? */
|
||||
*p2++=n; /* update field and cost for next col's delete */
|
||||
n+=cost_del; /* update field and cost for next col's delete */
|
||||
p2++;
|
||||
}
|
||||
}
|
||||
|
||||
/* return result */
|
||||
return n-=cost_del;
|
||||
}
|
||||
|
||||
static int custom_levdist(char *str1,char *str2,char *callback_name)
|
||||
{
|
||||
php_error(E_WARNING,"the general Levenshtein support is not there yet");
|
||||
|
@ -202,8 +87,11 @@ PHP_FUNCTION(levenshtein)
|
|||
}
|
||||
convert_to_string_ex(str1);
|
||||
convert_to_string_ex(str2);
|
||||
|
||||
distance = fastest_levdist((*str1)->value.str.val, (*str2)->value.str.val);
|
||||
|
||||
distance = reference_levdist((*str1)->value.str.val,(*str1)->value.str.len,
|
||||
(*str2)->value.str.val,(*str2)->value.str.len,
|
||||
1,1,1);
|
||||
|
||||
break;
|
||||
|
||||
case 5: /* more general version: calc cost by ins/rep/del weights */
|
||||
|
@ -216,11 +104,11 @@ PHP_FUNCTION(levenshtein)
|
|||
convert_to_long_ex(cost_rep);
|
||||
convert_to_long_ex(cost_del);
|
||||
|
||||
distance = weighted_levdist((*str1)->value.str.val
|
||||
, (*str2)->value.str.val
|
||||
, (*cost_ins)->value.lval
|
||||
, (*cost_rep)->value.lval
|
||||
, (*cost_del)->value.lval
|
||||
distance = reference_levdist((*str1)->value.str.val,(*str1)->value.str.len,
|
||||
(*str2)->value.str.val,(*str2)->value.str.len,
|
||||
(*cost_ins)->value.lval,
|
||||
(*cost_rep)->value.lval,
|
||||
(*cost_del)->value.lval
|
||||
);
|
||||
|
||||
break;
|
||||
|
|
54
ext/standard/tests/general_functions/003.phpt
Normal file
54
ext/standard/tests/general_functions/003.phpt
Normal file
|
@ -0,0 +1,54 @@
|
|||
--TEST--
|
||||
levenshtein() function test
|
||||
--POST--
|
||||
--GET--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
function test_me($title,$expect,$text1,$text2,$cost1=0,$cost2=0,$cost3=0) {
|
||||
|
||||
if($cost1==0)
|
||||
$result=levenshtein($text1,$text2);
|
||||
else
|
||||
$result=levenshtein($text1,$text2,$cost1,$cost2,$cost3);
|
||||
|
||||
if($result==$expect) return 0;
|
||||
|
||||
echo "$title: result is $result instead of $expect ";
|
||||
echo "for '$text1'/'$text2' ";
|
||||
if($cost1) echo "($cost1:$cost2:$cost3)";
|
||||
echo "\n";
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
$n=0;
|
||||
|
||||
$n += test_me("equal" , 0, "12345", "12345");
|
||||
$n += test_me("1st empty" , 3, "", "xzy");
|
||||
$n += test_me("2nd empty" , 3, "xzy", "");
|
||||
$n += test_me("both empty" , 0, "", "");
|
||||
$n += test_me("1 char" , 1, "1", "2");
|
||||
$n += test_me("2 char swap", 2, "12", "21");
|
||||
|
||||
$n += test_me("inexpensive delete", 2, "2121", "11", 2, 1, 1);
|
||||
$n += test_me("expensive delete" , 10, "2121", "11", 2, 1, 5);
|
||||
$n += test_me("inexpensive insert", 2, "11", "2121", 1, 1, 1);
|
||||
$n += test_me("expensive insert" , 10, "11", "2121", 5, 1, 1);
|
||||
|
||||
$n += test_me("expensive replace" , 3, "111", "121", 2, 3, 2);
|
||||
$n += test_me("very expensive replace", 4, "111", "121", 2, 9, 2);
|
||||
|
||||
$n += test_me("bug #7368", 2, "13458", "12345");
|
||||
$n += test_me("bug #7368", 2, "1345", "1234");
|
||||
|
||||
$n += test_me("bug #6562", 1, "debugg", "debug");
|
||||
$n += test_me("bug #6562", 1, "ddebug", "debug");
|
||||
$n += test_me("bug #6562", 2, "debbbug", "debug");
|
||||
$n += test_me("bug #6562", 1, "debugging", "debuging");
|
||||
|
||||
echo ($n==0)?"all passed\n":"$n failed\n";
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
all passed
|
Loading…
Add table
Add a link
Reference in a new issue