源代码是由纯汇编写的,如下:
;改进型的bm匹配算法 支持中英文混合(即单字节与双字节词混合
;用nasm编译
;函数原型
;extern "C" bool sfstrstr(unsigned int td[256], const char* str1, const char* str2, char case_table[256]);
;其中:
; td内的所有元素需在最初被初始化为0
; casetable是一个大小写转换用的转换表,加快大小写转换的速度,需用如下的方法生成
; for(int i = 0; i < 256; ++i)
; casetable[i] = tolower(i);
;
;如果匹配成功返回true(1), 否则返回false(0)
;
;xieyubo@gmail.com
;
;Last Modified: 2005.10.15
[BITS 32]
[global sfstrstr]
countstrlen:
xor eax, eax
_countstrlen_loop:
cmp [esi + eax], byte 0
je _countstrlen_exit
inc eax
jmp _countstrlen_loop
_countstrlen_exit:
ret
lower:
mov al, [esp + 4]
and eax, 0xff
add eax, [ebp + 20]
mov al, [eax]
mov [esp + 4], al
ret
getnumcnword:
xor eax, eax
xor ebx, ebx
_getnumcnword_loop:
cmp ebx, [ebp - 8]
je _getnumcnword_exit
mov edx, [ebp + 12]
add edx, ebx
test [edx], byte 0x80
jz _getnumcnword_loop_goto
inc eax
_getnumcnword_loop_goto:
inc ebx
jmp _getnumcnword_loop
_getnumcnword_exit:
ret
inittd:
mov esi, [ebp + 16]
mov edi, [ebp + 8]
xor ecx, ecx
_inittd_loop:
mov eax, [ebp - 12]
cmp ecx, eax
jnl _inittd_exit
mov edx, eax
sub edx, ecx
push dword [esi + ecx]
call lower
pop eax
and eax, 0xff
mov [edi + eax * 4], edx
inc ecx
jmp _inittd_loop
_inittd_exit:
ret
resettd:
mov esi, [ebp + 16]
mov edi, [ebp + 8]
xor ecx, ecx
_resettd_loop:
cmp ecx, [ebp - 12]
jnl _resettd_exit
push dword [esi + ecx]
call lower
pop eax
and eax, 0xff
mov [edi + eax * 4], dword 0
inc ecx
jmp _resettd_loop
_resettd_exit:
ret
sfstrstr:
push ebp
mov ebp, esp
sub esp, 20
push esi
push edi
;;strlen(str1)
mov esi, [ebp + 12]
call countstrlen
mov [ebp - 16], eax
;;strlen(str2)
mov esi, [ebp + 16]
call countstrlen
mov [ebp - 12], eax
call inittd
mov esi, [ebp + 16]
mov edi, [ebp + 12]
mov [ebp - 4], dword 0
mov [ebp - 8], dword 0
_sfstrstr_loop2:
xor ecx, ecx
mov eax, [ebp - 8]
add eax, [ebp - 12]
cmp eax, [ebp - 16]
jg _sfstrstr_loop2_exit
_sfstrstr_loop3:
cmp ecx, [ebp - 12]
jnl _sfstrstr_loop3_exit
mov eax, [ebp - 8]
add eax, ecx
push dword [edi + eax]
call lower
pop ebx
push dword [esi + ecx]
call lower
pop edx
cmp bl, dl
jne _sfstrstr_loop3_exit
inc ecx
jmp _sfstrstr_loop3
_sfstrstr_loop3_exit:
cmp ecx, [ebp - 12]
jne _sfstrstr_goto1
call getnumcnword
test eax, 0x00000001
jz _sfstrstr_goto3
mov eax, [ebp - 12]
inc eax
add [ebp - 8], eax
jmp _sfstrstr_loop2
_sfstrstr_goto3:
mov [ebp - 4], dword 1
jmp _sfstrstr_loop2_exit
_sfstrstr_goto1:
mov eax, [ebp - 8]
add eax, [ebp - 12]
push dword [edi + eax]
call lower
pop eax
and eax, 0xff
shl eax, 2
add eax, [ebp + 8]
mov eax, [eax]
cmp eax, 0
je _sfstrstr_goto2
add [ebp - 8], eax
jmp _sfstrstr_loop2
_sfstrstr_goto2:
mov eax, [ebp - 12]
inc eax
add [ebp - 8], eax
jmp _sfstrstr_loop2
_sfstrstr_loop2_exit:
call resettd
mov eax, [ebp - 4]
return:
pop edi
pop esi
mov esp, ebp
pop ebp
ret