Delphi：提高FastReplace速度 (fStrRep.pas)

热点排行

提高FastReplace速度 (fStrRep.pas)

类别：Delphi 点击：0 评论：0 推荐：

{其实还可以在FastReplace中先对FindStr进行判断，如果
FindStr完全是中文的话，直接可以在FastReplace中用
FastPos，这样可以提高速度。}

unit fStrRep;

interface

Type
TFastPosProc = function(
    const aSourceString, aFindString : String;
    const aSourceLen, aFindLen, StartPos : integer
    ) : integer;

function FastReplace(
var aSourceString : String;
const aFindString, aReplaceString : String;
CaseSensitive : Boolean = False) : String;

function FastPos(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

function FastPosNoCase(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;

implementation

// This TYPE declaration will become apparent later.
//The first thing to note here is that I’m passing the SourceLength and FindL
//ength. As neither Source nor Find will alter at any point during FastReplace
//, there’s no need to call the LENGTH subroutine each time!
function FastPos(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;
var
SourceLen : integer;
begin
// Next, we determine how many bytes we need to
// scan to find the "start" of aFindString.
SourceLen := aSourceLen;
SourceLen := SourceLen - aFindLen;
if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
end;
SourceLen := SourceLen - StartPos;
SourceLen := SourceLen +2;
// The ASM starts here.
asm
    // Delphi uses ESI, EDI, and EBX a lot,
    // so we must preserve them.
    push ESI
    push EDI
    push EBX
    // Get the address of sourceString[1]
    // and Add (StartPos-1).
    // We do this for the purpose of finding
    // the NEXT occurrence, rather than
    // always the first!
    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI
    // Get the address of aFindString.
    mov ESI, aFindString
    // Note how many bytes we need to
    // look through in aSourceString
    // to find aFindString.
    mov ECX, SourceLen
    // Get the first char of aFindString;
    // note how it is done outside of the
    // main loop, as it never changes!
    Mov Al, [ESI]
    // Now the FindFirstCharacter loop!
    @ScaSB:
    // Get the value of the current
    // character in aSourceString.
    // This is equal to ah := EDI^, that
    // is what the [] are around [EDI].
    Mov Ah, [EDI]
    // Compare this character with aDestString[1].
    cmp Ah,Al
    // If they're not equal we don't
    // compare the strings.
    jne @NextChar
    // If they're equal, obviously we do!
    @CompareStrings:
    // Put the length of aFindLen in EBX.
    mov EBX, aFindLen
    // We DEC EBX to point to the end of
    // the string; that is, we don't want to
    // add 1 if aFindString is 1 in length!
    dec EBX

    // add by ShengQuanhu
    // If EBX is zero, then we've successfully
    // compared each character; i.e. it's A MATCH!
    // It will be happened when aFindLen=1
    Jz @EndOfMatch
    //add end

//Here’s another optimization tip. People at this point usually PUSH ESI and
//so on and then POP ESI and so forth at the end–instead, I opted not to chan
//ge ESI and so on at all. This saves lots of pushing and popping!
    @CompareNext:
    // Get aFindString character +
    // aFindStringLength (the last char).
    mov Al, [ESI+EBX]
    // Get aSourceString character (current
    // position + aFindStringLength).
    mov Ah, [EDI+EBX]
    // Compare them.
    cmp Al, Ah
    Jz   @Matches
    // If they don't match, we put the first char
    // of aFindString into Al again to continue
    // looking for the first character.
    Mov Al, [ESI]
    Jmp @NextChar
    @Matches:
    // If they match, we DEC EBX (point to
    // previous character to compare).
    Dec EBX
    // If EBX <> 0 ("J"ump "N"ot "Z"ero), we
    // continue comparing strings.
    Jnz @CompareNext

    //add by Shengquanhu
    @EndOfMatch:
    //add end

    // If EBX is zero, then we've successfully
    // compared each character; i.e. it's A MATCH!
    // Move the address of the *current*
    // character in EDI.
    // Note, we haven't altered EDI since
    // the first char was found.
    mov EAX, EDI
    // This is an address, so subtract the
    // address of aSourceString[1] to get
    // an actual character position.
    sub EAX, aSourceString
    // Inc EAX to make it 1-based,
    // rather than 0-based.
    inc EAX
    // Put it into result.
    mov Result, EAX
    // Finish this routine!
    jmp @TheEnd
    @NextChar:
//This is where I jump to when I want to continue searching for the first char
//acter of aFindString in aSearchString:
    // Point EDI (aFindString[X]) to
    // the next character.
    Inc EDI
    // Dec ECX tells us that we've checked
    // another character, and that we're
    // fast running out of string to check!
    dec ECX
    // If EBX <> 0, then continue scanning
    // for the first character.

    //add by shengquanhu
    //if ah is chinese char,jump again
    jz   @Result0
    cmp ah, $80
    jb   @ScaSB
    Inc EDI
    Dec ECX
    //add by shengquanhu end

jnz @ScaSB

    //add by shengquanhu
    @Result0:
    //add by shengquanhu end

    // If EBX = 0, then move 0 into RESULT.
    mov Result,0
    // Restore EBX, EDI, ESI for Delphi
    // to work correctly.
    // Note that they're POPped in the
    // opposite order they were PUSHed.
    @TheEnd:
    pop EBX
    pop EDI
    pop ESI
end;
end;

//This routine is an identical copy of FastPOS except where commented! The ide
//a is that when grabbing bytes, it ANDs them with $df, effectively making the
//m lowercase before comparing. Maybe this would be quicker if aFindString was
// made lowercase in one fell swoop at the beginning of the function, saving a
//n AND instruction each time.
function FastPosNoCase(
const aSourceString, aFindString : String;
const aSourceLen, aFindLen, StartPos : integer
) : integer;
var
SourceLen : integer;
begin
SourceLen := aSourceLen;
SourceLen := SourceLen - aFindLen;
if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
end;
SourceLen := SourceLen - StartPos;
SourceLen := SourceLen +2;
asm
    push ESI
    push EDI
    push EBX

    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI
    mov ESI, aFindString
    mov ECX, SourceLen
    Mov Al, [ESI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Al, $7A
    ja @ScaSB

    cmp Al, $61
    jb @ScaSB
    //end------------------------------------------

// Make Al uppercase.
and Al, $df

@ScaSB:
Mov Ah, [EDI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Ah, $7A
    ja @CompareChar

    cmp Ah, $61
    jb @CompareChar
    //end------------------------------------------

// Make Ah uppercase.
and Ah, $df

    @CompareChar:
    cmp Ah,Al
    jne @NextChar
    @CompareStrings:
    mov EBX, aFindLen
    dec EBX

    //add by ShengQuanhu
    Jz   @EndOfMatch
    //add end

    @CompareNext:
    mov Al, [ESI+EBX]
    mov Ah, [EDI+EBX]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Ah, $7A
    ja @LowerAh

    cmp Al, $61
    jb @LowerAh
    //end------------------------------------------

// Make Al and Ah uppercase.
and Al, $df

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    @LowerAh:
    cmp Ah, $7A
    ja @CompareChar2

    cmp Ah, $61
    jb @CompareChar2
    //end------------------------------------------

and Ah, $df

    @CompareChar2:
    cmp Al, Ah
    Jz   @Matches
    Mov Al, [ESI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Al, $7A
    ja @NextChar

    cmp Al, $61
    jb @NextChar
    //end------------------------------------------

    // Make Al uppercase.
    and Al, $df
    Jmp @NextChar
    @Matches:
    Dec EBX
    Jnz @CompareNext

    //add by Shengquanhu
    @EndOfMatch:
    //add end

    mov EAX, EDI
    sub EAX, aSourceString
    inc EAX
    mov Result, EAX
    jmp @TheEnd
    @NextChar:
    Inc EDI
    dec ECX
    //add by shengquanhu
    //if ah is chinese char,jump again
    jz   @Result0
    cmp ah, $80
    jb   @ScaSB
    Inc EDI
    Dec ECX
    //add by shengquanhu end
    jnz @ScaSB
    @Result0:
    mov Result,0
    @TheEnd:
    pop EBX
    pop EDI
    pop ESI
end;
end;

//My move isn’t as fast as MOVE when source and destination are both DWord al
//igned, but it’s certainly faster when they’re not. As we’re moving charac
//ters in a string, it isn’t very likely at all that both source and destinat
//ion are DWord aligned, so moving bytes avoids the cycle penalty of reading/w
//riting DWords across physical boundaries.
procedure MyMove(
const Source; var Dest; Count : Integer);
asm
// Note: When this function is called,
// Delphi passes the parameters as follows:
// ECX = Count
// EAX = Const Source
// EDX = Var Dest
// If there are no bytes to copy, just quit
// altogether; there's no point pushing registers.
cmp   ECX,0
Je    @JustQuit
// Preserve the critical Delphi registers.
push ESI
push EDI
// Move Source into ESI (generally the
// SOURCE register).
// Move Dest into EDI (generally the DEST
// register for string commands).
// This might not actually be necessary,
// as I'm not using MOVsb etc.
// I might be able to just use EAX and EDX;
// there could be a penalty for not using
// ESI, EDI, but I doubt it.
// This is another thing worth trying!
mov   ESI, EAX
mov   EDI, EDX
// The following loop is the same as repNZ
// MovSB, but oddly quicker!
    @Loop:
// Get the source byte.
Mov   AL, [ESI]
// Point to next byte.
Inc   ESI
// Put it into the Dest.
mov   [EDI], AL
// Point dest to next position.
Inc   EDI
// Dec ECX to note how many we have left to copy.
Dec   ECX
// If ECX <> 0, then loop.
Jnz   @Loop
// Another optimization note.
// Many people like to do this.
// Mov AL, [ESI]
// Mov [EDI], Al
// Inc ESI
// Inc ESI
//There’s a hidden problem here. I won’t go into too much detail, but the Pe
//ntium can continue processing instructions while it’s still working out the
// result of INC ESI or INC EDI. If, however, you use them while they’re stil
//l being calculated, the processor will stop until they’re calculated (a pen
//alty). Therefore, I alter ESI and EDI as far in advance as possible of using
// them.
// Pop the critical Delphi registers
// that we've altered.
pop   EDI
pop   ESI
    @JustQuit:
end;

//Point 1: I pass VAR aSourceString rather than just aSourceString. This is be
//cause I’ll just be passed a pointer to the data rather than a 10M copy of t
//he data itself, which is much quicker!
function FastReplace(
var aSourceString : String;
const aFindString, aReplaceString : String;
CaseSensitive : Boolean = False) : String;
var
// Size already passed to SetLength,
// the REAL size of RESULT.
ActualResultLen,
// Position of aFindString is aSourceString.
CurrentPos,
// Last position the aFindString was found at.
LastPos,
// Bytes to copy (that is, lastpos to this pos).
BytesToCopy,
// The "running" result length, not the actual one.
ResultLen,
// Length of aFindString, to save
// calling LENGTH repetitively.
FindLen,
// Length of aReplaceString, for the same reason.
ReplaceLen,
SourceLen         : Integer;
// This is where I explain the
// TYPE TFastPosProc from earlier!
FastPosProc       : TFastPosProc;
begin
//As this function has the option of being case-insensitive, I’d need to call
// either FastPOS or FastPOSNoCase. The problem is that you’d have to do this
// within a loop. This is a bad idea, since the result never changes throughou
//t the whole operation–in which case we can determine it in advance, like so
//:
if CaseSensitive then
    FastPosProc := FastPOS
else
    FastPOSProc := FastPOSNoCase;
// I don't think I actually need
// this, but I don't really mind!
Result := '';
// Get the lengths of the strings.
FindLen := Length(aFindString);
ReplaceLen := Length(aReplaceString);
SourceLen := Length(aSourceString);
// If we already have room for the replacements,
// then set the length of the result to
// the length of the SourceString.
if ReplaceLen <= FindLen then
    ActualResultLen := SourceLen
else
    // If not, we need to calculate the
    // worst-case scenario.
    // That is, the Source consists ONLY of
    // aFindString, and we're going to replace
    // every one of them!
    ActualResultLen :=
      SourceLen +
      (SourceLen * ReplaceLen div FindLen) +
      ReplaceLen;
// Set the length of Result; this
// will assign the memory, etc.
SetLength(Result,ActualResultLen);
CurrentPos := 1;
ResultLen := 0;
LastPos := 1;
//Again, I’m eliminating an IF statement in a loop by repeating code–this ap
//proach results in very slightly larger code, but if ever you can trade some
//memory in exchange for speed, go for it!
if ReplaceLen > 0 then begin
    repeat
      // Get the position of the first (or next)
      // aFindString in aSourceString.
      // Note that there's no If CaseSensitive,
      // I just call FastPOSProc, which is pointing
      // to the correct pre-determined routine.
      CurrentPos :=
        FastPosProc(aSourceString, aFindString,
          SourceLen, FindLen, CurrentPos);
      // If 0, then we're finished.
      if CurrentPos = 0 then break;
      // Number of bytes to copy from the
      // source string is CurrentPos - lastPos,
      // i.e. " cat " in "the cat the".
      BytesToCopy := CurrentPos-LastPos;
      // Copy chars from aSourceString
      // to the end of Result.
      MyMove(aSourceString[LastPos],
        Result[ResultLen+1], BytesToCopy);
      // Copy chars from aReplaceString to
      // the end of Result.
      MyMove(aReplaceString[1],
        Result[ResultLen+1+BytesToCopy], ReplaceLen);
      // Remember, using COPY would copy all of
      // the data over and over again.
      // Never fall into this trap (like a certain
      // software company did).
      // Set the running length to
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      // Set the position in aSourceString to where
      // we want to continue searching from.
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
end else begin
    // You might have noticed If ReplaceLen > 0.
    // Well, if ReplaceLen = 0, then we're deleting the
    // substrings, rather than replacing them, so we
    // don't need the extra MyMove from aReplaceString.
    repeat
      CurrentPos := FastPos(aSourceString,
        aFindString, SourceLen, FindLen, CurrentPos);
      if CurrentPos = 0 then break;
      BytesToCopy := CurrentPos-LastPos;
      MyMove(aSourceString[LastPos],
        Result[ResultLen+1], BytesToCopy);
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
end;
//Now that we’ve finished doing all of the replaces, I just need to adjust th
//e length of the final result:
Dec(LastPOS);
//Now I set the length to the Length plus the bit of string left. That is, " m
//at" when replacing "the" in "sat on the mat".
SetLength(Result, ResultLen + (SourceLen-LastPos));
// If there's a bit of string dangling, then
// add it to the end of our string.
if LastPOS+1 <= SourceLen then
    MyMove(aSourceString[LastPos+1],
      Result[ResultLen+1],SourceLen-LastPos);
end;

end.

本文地址：http://com.8s8s.com/it/it4808.htm