提高FastReplace速度 (fStrRep.pas)

类别:Delphi 点击:0 评论:0 推荐:

{其实还可以在FastReplace中先对FindStr进行判断,如果
FindStr完全是中文的话,直接可以在FastReplace中用
FastPos,这样可以提高速度。}

unit fStrRep;

interface

Type
  TFastPosProc = function(
    const aSourceString, aFindString : String;
    const aSourceLen, aFindLen, StartPos : integer
    ) : integer;

function FastReplace(
  var aSourceString : String;
  const aFindString, aReplaceString : String;
  CaseSensitive : Boolean = False) : String;

function FastPos(
  const aSourceString, aFindString : String;
  const aSourceLen, aFindLen, StartPos : integer
  ) : integer;

function FastPosNoCase(
  const aSourceString, aFindString : String;
  const aSourceLen, aFindLen, StartPos : integer
  ) : integer;

implementation

// This TYPE declaration will become apparent later.
//The first thing to note here is that I’m passing the SourceLength and FindL
//ength. As neither Source nor Find will alter at any point during FastReplace
//, there’s no need to call the LENGTH subroutine each time!
function FastPos(
  const aSourceString, aFindString : String;
  const aSourceLen, aFindLen, StartPos : integer
  ) : integer;
var
  SourceLen : integer;
begin
  // Next, we determine how many bytes we need to
  // scan to find the "start" of aFindString.
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen +2;
  // The ASM starts here.
  asm
    // Delphi uses ESI, EDI, and EBX a lot,
    // so we must preserve them.
    push ESI
    push EDI
    push EBX
    // Get the address of sourceString[1]
    // and Add (StartPos-1).
    // We do this for the purpose of finding
    // the NEXT occurrence, rather than
    // always the first!
    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI
    // Get the address of aFindString.
    mov ESI, aFindString
    // Note how many bytes we need to
    // look through in aSourceString
    // to find aFindString.
    mov ECX, SourceLen
    // Get the first char of aFindString;
    // note how it is done outside of the
    // main loop, as it never changes!
    Mov  Al, [ESI]
    // Now the FindFirstCharacter loop!
    @ScaSB:
    // Get the value of the current
    // character in aSourceString.
    // This is equal to ah := EDI^, that
    // is what the [] are around [EDI].
    Mov  Ah, [EDI]
    // Compare this character with aDestString[1].
    cmp  Ah,Al
    // If they're not equal we don't
    // compare the strings.
    jne  @NextChar
    // If they're equal, obviously we do!
    @CompareStrings:
    // Put the length of aFindLen in EBX.
    mov  EBX, aFindLen
    // We DEC EBX to point to the end of
    // the string; that is, we don't want to
    // add 1 if aFindString is 1 in length!
    dec  EBX

    // add by ShengQuanhu
    // If EBX is zero, then we've successfully
    // compared each character; i.e. it's A MATCH!
    // It will be happened when aFindLen=1
    Jz @EndOfMatch
    //add end

//Here’s another optimization tip. People at this point usually PUSH ESI and
//so on and then POP ESI and so forth at the end–instead, I opted not to chan
//ge ESI and so on at all. This saves lots of pushing and popping!
    @CompareNext:
    // Get aFindString character +
    // aFindStringLength (the last char).
    mov  Al, [ESI+EBX]
    // Get aSourceString character (current
    // position + aFindStringLength).
    mov  Ah, [EDI+EBX]
    // Compare them.
    cmp  Al, Ah
    Jz   @Matches
    // If they don't match, we put the first char
    // of aFindString into Al again to continue
    // looking for the first character.
    Mov  Al, [ESI]
    Jmp  @NextChar
    @Matches:
    // If they match, we DEC EBX (point to
    // previous character to compare).
    Dec  EBX
    // If EBX <> 0 ("J"ump "N"ot "Z"ero), we
    // continue comparing strings.
    Jnz  @CompareNext

    //add by Shengquanhu
    @EndOfMatch:
    //add end

    // If EBX is zero, then we've successfully
    // compared each character; i.e. it's A MATCH!
    // Move the address of the *current*
    // character in EDI.
    // Note, we haven't altered EDI since
    // the first char was found.
    mov  EAX, EDI
    // This is an address, so subtract the
    // address of aSourceString[1] to get
    // an actual character position.
    sub  EAX, aSourceString
    // Inc EAX to make it 1-based,
    // rather than 0-based.
    inc  EAX
    // Put it into result.
    mov  Result, EAX
    // Finish this routine!
    jmp  @TheEnd
    @NextChar:
//This is where I jump to when I want to continue searching for the first char
//acter of aFindString in aSearchString:
    // Point EDI (aFindString[X]) to
    // the next character.
    Inc  EDI
    // Dec ECX tells us that we've checked
    // another character, and that we're
    // fast running out of string to check!
    dec  ECX
    // If EBX <> 0, then continue scanning
    // for the first character.

    //add by shengquanhu
    //if ah is chinese char,jump again
    jz   @Result0
    cmp  ah, $80
    jb   @ScaSB
    Inc  EDI
    Dec  ECX
    //add by shengquanhu end

    jnz  @ScaSB

    //add by shengquanhu
    @Result0:
    //add by shengquanhu end

    // If EBX = 0, then move 0 into RESULT.
    mov  Result,0
    // Restore EBX, EDI, ESI for Delphi
    // to work correctly.
    // Note that they're POPped in the
    // opposite order they were PUSHed.
    @TheEnd:
    pop  EBX
    pop  EDI
    pop  ESI
  end;
end;

//This routine is an identical copy of FastPOS except where commented! The ide
//a is that when grabbing bytes, it ANDs them with $df, effectively making the
//m lowercase before comparing. Maybe this would be quicker if aFindString was
// made lowercase in one fell swoop at the beginning of the function, saving a
//n AND instruction each time.
function FastPosNoCase(
  const aSourceString, aFindString : String;
  const aSourceLen, aFindLen, StartPos : integer
  ) : integer;
var
  SourceLen : integer;
begin
  SourceLen := aSourceLen;
  SourceLen := SourceLen - aFindLen;
  if (StartPos-1) > SourceLen then begin
    Result := 0;
    Exit;
  end;
  SourceLen := SourceLen - StartPos;
  SourceLen := SourceLen +2;
  asm
    push ESI
    push EDI
    push EBX

    mov EDI, aSourceString
    add EDI, StartPos
    Dec EDI
    mov ESI, aFindString
    mov ECX, SourceLen
    Mov  Al, [ESI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Al, $7A
    ja @ScaSB

    cmp Al, $61
    jb @ScaSB
    //end------------------------------------------

    // Make Al uppercase.
    and  Al, $df

    @ScaSB:
    Mov  Ah, [EDI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Ah, $7A
    ja @CompareChar

    cmp Ah, $61
    jb @CompareChar
    //end------------------------------------------

    // Make Ah uppercase.
    and  Ah, $df

    @CompareChar:
    cmp  Ah,Al
    jne  @NextChar
    @CompareStrings:
    mov  EBX, aFindLen
    dec  EBX

    //add by ShengQuanhu
    Jz   @EndOfMatch
    //add end

    @CompareNext:
    mov  Al, [ESI+EBX]
    mov  Ah, [EDI+EBX]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Ah, $7A
    ja @LowerAh

    cmp Al, $61
    jb @LowerAh
    //end------------------------------------------

    // Make Al and Ah uppercase.
    and  Al, $df

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    @LowerAh:
    cmp Ah, $7A
    ja @CompareChar2

    cmp Ah, $61
    jb @CompareChar2
    //end------------------------------------------

    and  Ah, $df

    @CompareChar2:
    cmp  Al, Ah
    Jz   @Matches
    Mov  Al, [ESI]

    //add by shengquanhu:just modified the lowercase 'a'..'z'
    cmp Al, $7A
    ja @NextChar

    cmp Al, $61
    jb @NextChar
    //end------------------------------------------

    // Make Al uppercase.
    and  Al, $df
    Jmp  @NextChar
    @Matches:
    Dec  EBX
    Jnz  @CompareNext

    //add by Shengquanhu
    @EndOfMatch:
    //add end

    mov  EAX, EDI
    sub  EAX, aSourceString
    inc  EAX
    mov  Result, EAX
    jmp  @TheEnd
    @NextChar:
    Inc  EDI
    dec  ECX
    //add by shengquanhu
    //if ah is chinese char,jump again
    jz   @Result0
    cmp  ah, $80
    jb   @ScaSB
    Inc  EDI
    Dec  ECX
    //add by shengquanhu end
    jnz  @ScaSB
    @Result0:
    mov  Result,0
    @TheEnd:
    pop  EBX
    pop  EDI
    pop  ESI
  end;
end;

//My move isn’t as fast as MOVE when source and destination are both DWord al
//igned, but it’s certainly faster when they’re not. As we’re moving charac
//ters in a string, it isn’t very likely at all that both source and destinat
//ion are DWord aligned, so moving bytes avoids the cycle penalty of reading/w
//riting DWords across physical boundaries.
procedure MyMove(
  const Source; var Dest; Count : Integer);
asm
// Note: When this function is called,
// Delphi passes the parameters as follows:
// ECX = Count
// EAX = Const Source
// EDX = Var Dest
  // If there are no bytes to copy, just quit
  // altogether; there's no point pushing registers.
  cmp   ECX,0
  Je    @JustQuit
  // Preserve the critical Delphi registers.
  push  ESI
  push  EDI
  // Move Source into ESI (generally the
  // SOURCE register).
  // Move Dest into EDI (generally the DEST
  // register for string commands).
  // This might not actually be necessary,
  // as I'm not using MOVsb etc.
  // I might be able to just use EAX and EDX;
  // there could be a penalty for not using
  // ESI, EDI, but I doubt it.
  // This is another thing worth trying!
  mov   ESI, EAX
  mov   EDI, EDX
  // The following loop is the same as repNZ
  // MovSB, but oddly quicker!
    @Loop:
  // Get the source byte.
  Mov   AL, [ESI]
  // Point to next byte.
  Inc   ESI
  // Put it into the Dest.
  mov   [EDI], AL
  // Point dest to next position.
  Inc   EDI
  // Dec ECX to note how many we have left to copy.
  Dec   ECX
  // If ECX <> 0, then loop.
  Jnz   @Loop
  // Another optimization note.
  // Many people like to do this.
  // Mov AL, [ESI]
  // Mov [EDI], Al
  // Inc ESI
  // Inc ESI
//There’s a hidden problem here. I won’t go into too much detail, but the Pe
//ntium can continue processing instructions while it’s still working out the
// result of INC ESI or INC EDI. If, however, you use them while they’re stil
//l being calculated, the processor will stop until they’re calculated (a pen
//alty). Therefore, I alter ESI and EDI as far in advance as possible of using
// them.
  // Pop the critical Delphi registers
  // that we've altered.
  pop   EDI
  pop   ESI
    @JustQuit:
end;

//Point 1: I pass VAR aSourceString rather than just aSourceString. This is be
//cause I’ll just be passed a pointer to the data rather than a 10M copy of t
//he data itself, which is much quicker!
function FastReplace(
  var aSourceString : String;
  const aFindString, aReplaceString : String;
  CaseSensitive : Boolean = False) : String;
var
  // Size already passed to SetLength,
  // the REAL size of RESULT.
  ActualResultLen,
  // Position of aFindString is aSourceString.
  CurrentPos,
  // Last position the aFindString was found at.
  LastPos,
  // Bytes to copy (that is, lastpos to this pos).
  BytesToCopy,
  // The "running" result length, not the actual one.
  ResultLen,
  // Length of aFindString, to save
  // calling LENGTH repetitively.
  FindLen,
  // Length of aReplaceString, for the same reason.
  ReplaceLen,
  SourceLen         : Integer;
  // This is where I explain the
  // TYPE TFastPosProc from earlier!
  FastPosProc       : TFastPosProc;
begin
//As this function has the option of being case-insensitive, I’d need to call
// either FastPOS or FastPOSNoCase. The problem is that you’d have to do this
// within a loop. This is a bad idea, since the result never changes throughou
//t the whole operation–in which case we can determine it in advance, like so
//:
  if CaseSensitive then
    FastPosProc := FastPOS
  else
    FastPOSProc := FastPOSNoCase;
  // I don't think I actually need
  // this, but I don't really mind!
  Result := '';
  // Get the lengths of the strings.
  FindLen := Length(aFindString);
  ReplaceLen := Length(aReplaceString);
  SourceLen := Length(aSourceString);
  // If we already have room for the replacements,
  // then set the length of the result to
  // the length of the SourceString.
  if ReplaceLen <= FindLen then
    ActualResultLen := SourceLen
  else
    // If not, we need to calculate the
    // worst-case scenario.
    // That is, the Source consists ONLY of
    // aFindString, and we're going to replace
    // every one of them!
    ActualResultLen :=
      SourceLen +
      (SourceLen * ReplaceLen div FindLen) +
      ReplaceLen;
  // Set the length of Result; this
  // will assign the memory, etc.
  SetLength(Result,ActualResultLen);
  CurrentPos := 1;
  ResultLen := 0;
  LastPos := 1;
//Again, I’m eliminating an IF statement in a loop by repeating code–this ap
//proach results in very slightly larger code, but if ever you can trade some
//memory in exchange for speed, go for it!
  if ReplaceLen > 0 then begin
    repeat
      // Get the position of the first (or next)
      // aFindString in aSourceString.
      // Note that there's no If CaseSensitive,
      // I just call FastPOSProc, which is pointing
      // to the correct pre-determined routine.
      CurrentPos :=
        FastPosProc(aSourceString, aFindString,
          SourceLen, FindLen, CurrentPos);
      // If 0, then we're finished.
      if CurrentPos = 0 then break;
      // Number of bytes to copy from the
      // source string is CurrentPos - lastPos,
      // i.e. " cat " in "the cat the".
      BytesToCopy := CurrentPos-LastPos;
      // Copy chars from aSourceString
      // to the end of Result.
      MyMove(aSourceString[LastPos],
        Result[ResultLen+1], BytesToCopy);
      // Copy chars from aReplaceString to
      // the end of Result.
      MyMove(aReplaceString[1],
        Result[ResultLen+1+BytesToCopy], ReplaceLen);
      // Remember, using COPY would copy all of
      // the data over and over again.
      // Never fall into this trap (like a certain
      // software company did).
      // Set the running length to
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      // Set the position in aSourceString to where
      // we want to continue searching from.
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
  end else begin
    // You might have noticed If ReplaceLen > 0.
    // Well, if ReplaceLen = 0, then we're deleting the
    // substrings, rather than replacing them, so we
    // don't need the extra MyMove from aReplaceString.
    repeat
      CurrentPos := FastPos(aSourceString,
        aFindString, SourceLen, FindLen, CurrentPos);
      if CurrentPos = 0 then break;
      BytesToCopy := CurrentPos-LastPos;
      MyMove(aSourceString[LastPos],
        Result[ResultLen+1], BytesToCopy);
      ResultLen := ResultLen +
        BytesToCopy + ReplaceLen;
      CurrentPos := CurrentPos + FindLen;
      LastPos := CurrentPos;
    until false;
  end;
//Now that we’ve finished doing all of the replaces, I just need to adjust th
//e length of the final result:
  Dec(LastPOS);
//Now I set the length to the Length plus the bit of string left. That is, " m
//at" when replacing "the" in "sat on the mat".
  SetLength(Result, ResultLen + (SourceLen-LastPos));
  // If there's a bit of string dangling, then
  // add it to the end of our string.
  if LastPOS+1 <= SourceLen then
    MyMove(aSourceString[LastPos+1],
      Result[ResultLen+1],SourceLen-LastPos);
end;

end.

本文地址:http://com.8s8s.com/it/it4808.htm