{+------------------------------------------------------------
 | Unit FastMem
 |
 | Version: 1.1  Last modified: 04/28/96
 |   Changes 1.0 -> 1.1
 |     Modified the code to compile and run under Delphi 2.0
 | Author : P. Below
 | Project: Common utilities
 | Description:
 |   This Unit contains a number of routines to do memory moves,
 |   fills and swaps as fast as possible. The routines are all
 |   implemented in assembler; the Win32-version will generate
 |   different code from the Win16 version.
 |
 |   The BASM code for the Win32 version preserves all registers
 |   and uses stack calling convention to get around problems
 |   with register usage when optimization is enabled. It could
 |   be sped up by using register calling convention and saving
 |   only the required registers. I've left that for a later date
 |   since it requires some research into Delphi internals first.
 |   The Win32 version is still not fully tested! 
 |
 |   This version will compile as is in BP 7
 +------------------------------------------------------------}
Unit FastMem;
Interface
{$IFDEF VER70}
Type
  Cardinal = Word;
{$ENDIF}

  Procedure MemFill(pTarget: Pointer; numBytes: Cardinal; value: Byte );
  {$IFDEF WIN32} STDCALL; {$ENDIF}
  Procedure MemWordFill(pTarget: Pointer; numWords: Cardinal; value: Word );
  {$IFDEF WIN32} STDCALL; {$ENDIF}
  Procedure MemDWordFill(pTarget: Pointer;
                         numDWords: Cardinal; value: LongInt );
  {$IFDEF WIN32} STDCALL; {$ENDIF}
  Procedure MemMove(pSource, pTarget: Pointer; numBytes: Cardinal);
  {$IFDEF WIN32} STDCALL; {$ENDIF}
  Procedure MemSwap(pSource, pTarget: Pointer; numBytes: Cardinal );
  {$IFDEF WIN32} STDCALL; {$ENDIF}

Implementation

{************************************************************
 * Procedure MemFill
 *
 * Parameters:
 *  pTarget : pointer to memory to fill
 *  numBytes: number of bytes to fill
 *  value   : byte value to fill with
 * Description:
 *  Like System.FillChar, only faster, since it fills in the largest
 *  possible unit (word or dword) as far as possible.
 * Error Conditions:
 *  May generate a GPF if the memory area addressed by pTarget cannot
 *  take numBytes bytes!
 *
 *Created: 05/11/95 16:53:42 by P. Below
 ************************************************************}
Procedure MemFill(pTarget: Pointer; numBytes: Cardinal; value: Byte ); Assembler;
{$IFNDEF WIN32}
    Asm
      mov cx, numBytes
      jcxz @done
      mov al, value
      mov ah, al
      les di, pTarget
      push cx
      cld
      shr cx, 1       (* fill words first *)
      jz @moveone
      rep stosw
    @moveone:         (* fill remaining byte *)
      pop cx
      and cx, 1
      jz  @done
      stosb
    @done:
    End;
{$ELSE}
   { we assume flat memory model here with 32bit near pointers
     and ds=es!
     This code is UNTESTED!
   }
   asm
     pushad
     mov ecx, numBytes
     or ecx, ecx
     jz @done
     mov al, value
     mov ah, al
     mov bx,ax
     shl eax, 16
     mov ax, bx
     mov edi, pTarget
     cld
     push ecx
     shr ecx, 2   (* fill dwords first *)
     jz @fillrest
     rep stosd
   @fillrest:     (* fill rest bytewise *)
     pop ecx
     and ecx, 3
   @fill:
     jz @done
     mov [ edi ], al
     inc edi
     dec ecx
     jmp @fill
   @done:
     popad
   end;
{$ENDIF}

{************************************************************
 * Procedure MemWordFill
 *
 * Parameters:
 *  pTarget : pointer to memory to fill
 *  numWords: number of words to fill
 *  value   : word value to fill with
 * Description:
 *  Fills the target memory area with numWords copies of value.
 * Error Conditions:
 *  May generate a GPF if the memory area addressed by pTarget cannot
 *  take numWord words!
 *
 *Created: 05/11/95 16:53:42 by P. Below
 ************************************************************}
Procedure MemWordFill(pTarget: Pointer; numWords: Cardinal; value: Word );
  Assembler;
{$IFNDEF WIN32}
    Asm
      mov cx, numWords
      jcxz @done
      mov ax, value
      les di, pTarget
      cld
      rep stosw
    @done:
    End;
{$ELSE}
   { we assume flat memory model here with 32bit near pointers
     and ds=es!
     This code is UNTESTED!
   }
   asm
     pushad
     mov ecx, numWords
     or ecx, ecx
     jz @done
     mov ax, value
     mov bx,ax
     shl eax, 16
     mov ax, bx
     mov edi, pTarget
     cld
     push ecx
     shr ecx, 1   (* fill dwords first *)
     jz @fillrest
     rep stosd
   @fillrest:     (* fill rest *)
     pop ecx
     and ecx, 1
     jz @done
     mov [ edi ], ax
   @done:
     popad
   end;
{$ENDIF}

{************************************************************
 * Procedure MemDWordFill
 *
 * Parameters:
 *  pTarget  : pointer to memory to fill
 *  numDWords: number of dwords to fill
 *  value    : dword value to fill with
 * Description:
 *  Fills the target memory area with numDWords copies of value.
 * Error Conditions:
 *  May generate a GPF if the memory area addressed by pTarget cannot
 *  take numDWord dwords!
 *
 *Created: 05/11/95 16:53:42 by P. Below
 ************************************************************}
Procedure MemDWordFill(pTarget: Pointer; numDWords: Cardinal; value: LongInt );
  Assembler;
{$IFNDEF WIN32}
    Asm
      mov cx, numDWords
      jcxz @done
      push ds
      mov ax, word ptr value
      mov dx, word ptr value+2
      lds bx, pTarget
    @loop:
      mov [ bx ], ax
      inc bx
      inc bx
      mov [ bx ], dx
      inc bx
      inc bx
      dec cx
      jnz @loop
      pop ds
    @done:
    End;
{$ELSE}
   { we assume flat memory model here with 32bit near pointers
     and ds=es!
     This code is UNTESTED!
   }
   asm
     pushad
     mov ecx, numDWords
     or ecx, ecx
     jz @done
     mov eax, value
     mov edi, pTarget
     cld
     rep stosd
   @done:
     popad
   end;
{$ENDIF}

Function CheckOverlap: Boolean; Assembler;
  Asm
{$IFNDEF WIN32}
    (* assumes: ds:si source pointer, es:di target pointer
                cx number of bytes to copy.
       returns: al = 0 if no overlap,
                al = 1 if overlap, in which case the pointers
                are returned corrected! *)
    mov ax, ds     (* compare the selectors *)
    mov bx, es
    cmp ax, bx
    jne @no_overlap
    mov ax, si     (* selectors equal, check offset parts *)
    cmp ax, di     (* target <= source is never a problem *)
    jnb @no_overlap
    add ax, cx     (* check for source+count > target, which signifies *)
    cmp ax, di     (* problematic overlap *)
    jna @no_overlap
    mov ax, 1      (* we have overlap, correct pointers to point at *)
    add si, cx     (* last byte in source and target range *)
    add di, cx
    dec si
    dec di
    jmp @done
  @no_overlap:
    sub ax, ax
  @done:
{$ELSE}
    (* assumes: esi source pointer, edi target pointer
                ecx number of bytes to copy.
       returns: al = 0 if no overlap,
                al = 1 if overlap, in which case the pointers
                are returned corrected! *)
    mov eax, esi     (* selectors equal by default, check offset parts *)
    cmp eax, edi
    jnb @no_overlap
    add eax, ecx     (* for source+count > target, which signifies *)
    cmp eax, edi     (* overlap *)
    jna @no_overlap
    mov eax, 1
    add esi, ecx
    add edi, ecx
    dec esi
    dec edi
    jmp @done
  @no_overlap:
    sub eax, eax
  @done:
{$ENDIF}
  End;

{************************************************************
 * Procedure MemMove
 *
 * Parameters:
 *  pSource  : pointer to source memory
 *  pTarget  : pointer to target memory
 *  numBytes : number of bytes to copy
 * Description:
 *  Like System.Move, only faster for larger numbers of bytes,
 *  since it does the copy word or dword-wise, as far as possible.
 *  The procedure checks for overlap of source and target regions
 *  and performs the copy from highest address backwards, if the
 *  regions overlap in a problematic way. The logic is optimized
 *  for the source address beeing even (data word or dword-aligned).
 *
 * Error Conditions:
 *  May cause a GPF if the memory addressed by the pointers has
 *  a size of less than numBytes bytes.
 *
 *Created: 05/11/95 16:58:34 by P. Below
 ************************************************************}
Procedure MemMove(pSource, pTarget: Pointer; numBytes: Cardinal); Assembler;
{$IFNDEF WIN32}
  Asm
    push ds
    mov cx, numBytes
    jcxz @done
    lds si, pSource
    les di, pTarget
    call CheckOverlap
    or al,al
    jnz @overlap
    cld             (* address ascending *)
    push cx
    shr cx, 1       (* move words first *)
    jz @moveone
    rep movsw
  @moveone:         (* move remaining byte *)
    pop cx
    and cx, 1
    jz  @done
    movsb
    jmp @done

  @overlap:
    std
    test cx, 1      (* check for odd count of bytes *)
    je @moverest
    movsb           (* if odd, move one byte first *)
  @moverest:
    shr cx, 1       (* count is now in words *)
    jz @done
    dec si
    dec di
    rep movsw       (* move rest as words *)
    cld

  @done:
    pop ds
  End;
{$ELSE}
   { we assume flat memory model here with 32bit near pointers
     and ds=es!
     This code is UNTESTED!
   }
  asm
    pushad
    mov ecx, numBytes
    or ecx, ecx
    jz @done
    mov esi, pSource
    mov edi, pTarget
    call CheckOverlap
    or al,al
    jnz @overlap
    cld
    push ecx
    shr ecx, 2   (* move dwords first *)
    jz @moverest
    rep movsd
  @moverest:     (* move rest bytewise *)
    pop ecx
    and ecx, 3
    jz @done
    rep movsb
    jmp @done

  @overlap:
    std
    test ecx, 1     (* check for odd count of bytes *)
    jz @move2
    movsb           (* if odd, move one byte first *)
  @move2:
    shr ecx, 1
    dec esi
    dec edi
    jz @done
    test ecx, 1     (* check for odd count of words *)
    jz @move3
    movsw           (* if yes, move one word *)
  @move3:
    shr ecx, 1
    jz @done
    sub esi,2
    sub edi,2
    rep movsd
    cld

  @done:
    popad
  end;
{$ENDIF}

{************************************************************
 * Procedure MemSwap
 *
 * Parameters:
 *  pSource  : pointer to source memory
 *  pTarget  : pointer to target memory
 *  numBytes : number of bytes to swap
 * Description:
 *  exchanges the contents of the memory addressed by the two
 *  pointers. These areas should never overlap or the results
 *  will invariably be somewhat strange! 
 * Error Conditions:
 *  May cause a GPF if the memory addressed by the pointers has
 *  a size of less than numBytes bytes.
 *
 *Created: 05/11/95 16:58:34 by P. Below
 ************************************************************}
Procedure MemSwap(pSource, pTarget: Pointer; numBytes: Cardinal ); Assembler;
{$IFNDEF WIN32}
    Asm
      push ds
      mov cx, numBytes
      jcxz @done
      lds si, pSource
      les di, pTarget
      cmp cx, 1
      je @swapone
    @loop:
      mov ax, [ si ]
      xchg ax, es:[ di ]
      mov [ si ], ax
      inc si
      inc si
      inc di
      inc di
      dec cx
      dec cx
      jz @done
      cmp cx, 1
      ja @loop
    @swapone:
      mov al, [ si ]
      xchg al, es:[ di ]
      mov [ si ], al
    @done:
      pop ds
    End;
{$ELSE}
   { we assume flat memory model here with 32bit near pointers
     and ds=es!
     This code is UNTESTED!
   }
   asm
     pushad
     mov ecx, numBytes
     or ecx, ecx
     jz @done
     mov esi, pSource
     mov edi, pTarget
     cmp ecx, 3
     jna @swapremains
   @loop:
     mov eax, [ esi ]
     xchg eax, [ edi ]
     mov [ esi ], eax
     add esi, 4
     add edi, 4
     sub ecx, 4
     jz @done
     cmp ecx, 3
     ja @loop
   @swapremains:
     cmp ecx, 1
     je @swapone
     mov ax, [ esi ]
     xchg ax, [ edi ]
     mov [ esi ], ax
     add esi, 2
     add edi, 2
     sub ecx, 2
     jz @done
   @swapone:
     mov al, [ esi ]
     xchg al, [ edi ]
     mov [ esi ], al
   @done:
     popad
   end;
{$ENDIF}

End.
