Branch
Hash :
1a08f436
Author :
Date :
2010-08-25T09:23:17
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
\ moveslow.fth 1.5 93/10/21
\ Copyright 1985-1990 Bradley Forthware
\ Mike Saari's blazing `move' ...
\ This implements the MOVE instruction. It is optimized
\ for speed, particularly when longword stores may be used.
\ This version omits the doubleword optimization for machines where
\ doubleword operations do not work.
code (move) ( src dst cnt -- )
\ tos = Count
sp 1 /n* scr nget \ scr = Src address
sp 0 /n* sc1 nget \ sc1 = Dst address
\ sc2 = Temp. data being transferred
\ sc3 = src xor drc, low bits=0 indicates compatible
\ sc4 = Working src in loops
\ (also temp last+1 address)
\ sc5 = Working dst in loops
\ sc6 = Loop index
scr sc1 %g0 subcc \ Src > dst?
> if \ Then copy low-to-high
scr sc1 sc3 xor \ (delay) sc3 low bits=0 indicates compatible
tos h# 10 %g0 subcc \ Enough bytes to bother optimizing?
>= if \ Otherwise, just skip to byte move
sc3 1 %g0 andcc \ (delay) =0 if at least shortword aligned
0= if \ Otherwise, just skip to byte move
scr 1 %g0 andcc \ (delay) Not on halfword boundary?
0<> if \ Ensure halfword alignment (lower)
scr 0 sc2 ldub \ (delay) Load bottom byte
sc2 sc1 0 stb \ Store byte
scr 1 scr add \ Advance by one byte
sc1 1 sc1 add \ "
tos 1 tos sub \ Decrement count
then
sc3 2 %g0 andcc \ =0 if at least longword aligned
0= if \ Otherwise, skip to halfword case
scr 2 %g0 andcc \ (delay) Not on longword boundary?
0<> if \ Ensure longword alignment (lower)
scr 0 sc2 lduh \ (delay) Load bottom halfword
sc2 sc1 0 sth \ Store halfword
scr 2 scr add \ Advance by one halfword
sc1 2 sc1 add \ "
tos 2 tos sub \ Decrement count
then
\ Longword Copy Loop (low-to-high)
tos 3 sc6 andn \ Index w/ even multiples of 4
scr sc6 scr add \ src = src+index
scr 4 sc4 sub \ Working src = src+index-4
sc1 sc6 sc1 add \ dst = dst+index
sc1 4 sc5 sub \ Working dst = dst+index-4
%g0 sc6 sc6 subcc \ Negate index
begin
< while
sc6 4 sc6 addcc \ (delay) Increment index
sc4 sc6 sc2 ld \ Load longword
repeat
sc2 sc5 sc6 st \ (delay) Store longword
tos 3 tos and \ At end, adjust cnt for few remaining
else \ Halfword Copy Loop (low-to-high)
nop \ (delay)
tos 1 sc6 andn \ Index w/ even multiples of 2
scr sc6 scr add \ src = src+index
scr 2 sc4 sub \ Working src = src+index-2
sc1 sc6 sc1 add \ dst = dst+index
sc1 2 sc5 sub \ Working dst = dst+index-2
%g0 sc6 sc6 subcc \ Negate index
begin
< while
sc6 2 sc6 addcc \ (delay) Increment index
sc4 sc6 sc2 lduh \ Load halfword
repeat
sc2 sc5 sc6 sth \ (delay) Store halfword
tos 1 tos and \ At end, adjust cnt for few remaining
then
then
then \ Now do a normal byte move for all remaining bytes (at top)
\ Byte Copy Loop (low-to-high)
\ (tos = index)
scr tos scr add
scr 1 sc4 sub \ Working src = src+cnt-1
sc1 tos sc1 add
sc1 1 sc5 sub \ Working dst = dst+cnt-1
%g0 tos sc6 subcc \ Negate index
begin
< while
sc6 1 sc6 addcc \ (delay) Increment cnt
sc4 sc6 sc2 ldub \ Load byte
repeat
sc2 sc5 sc6 stb \ (delay) Store byte
else \ Copy high-to-low case
nop \ (delay)
tos h# 10 %g0 subcc \ Enough bytes to bother optimizing?
>= if \ Otherwise, just skip to byte move
sc3 1 %g0 andcc \ (delay) =0 if at least shortword aligned
0= if \ Otherwise, just skip to byte move
scr tos sc4 add \ (delay) Calculate last+1 address
sc4 1 %g0 andcc \ Not on halfword boundary? (at top)
0<> if \ Ensure halfword alignment (at top)
sc4 -1 sc2 ldub \ (delay) Load top byte
tos 1 tos sub \ Decrement count
sc2 sc1 tos stb \ Store byte
sc4 1 sc4 sub \ Recalculate last+1 address
then
sc3 2 %g0 andcc \ =0 if at least longword aligned
0= if \ Otherwise, skip to halfword case
sc4 2 %g0 andcc \ (delay) Not on longword boundary? (at top)
0<> if \ Ensure longword alignment (at top)
sc4 -2 sc2 lduh \ (delay) Load top halfword
tos 2 tos sub \ Decrement count
sc2 sc1 tos sth \ Store halfword
sc4 2 sc4 sub \ Recalculate last+1 address
then
\ Longword Copy Loop (high-to-low)
scr 4 sc4 add \ Working src = src+4
sc1 4 sc5 add \ Working dst = dst+4
tos 4 sc6 subcc \ Loop index = cnt-4
begin
>= while
sc6 4 sc6 subcc \ (delay) Decrement index
sc4 sc6 sc2 ld \ Load longword
repeat
sc2 sc5 sc6 st \ (delay) Store longword
tos 3 tos and \ At end, adjust cnt for few remaining
else \ Halfword Copy Loop (high-to-low)
nop \ (delay)
scr 2 sc4 add \ Working src = src+2
sc1 2 sc5 add \ Working dst = dst+2
tos 2 sc6 subcc \ Loop index = cnt-2
begin
>= while
sc6 2 sc6 subcc \ (delay) Decrement index
sc4 sc6 sc2 lduh \ Load halfword
repeat
sc2 sc5 sc6 sth \ (delay) Store halfword
tos 1 tos and \ At end, adjust cnt for few remaining
then
then
then \ Now do a normal byte move for all remaining bytes (at bottom)
\ Byte Copy Loop (high-to-low)
scr 1 sc4 add \ Working src = src+1
sc1 1 sc5 add \ Working dst = dst+1
tos 1 tos subcc \ Loop index = cnt-1
begin
>= while
tos 1 tos subcc \ (delay) Decrement index
sc4 tos sc2 ldub \ Load byte
repeat
sc2 sc5 tos stb \ (delay) Store byte
then
sp 2 /n* tos nget \ Delete 3 stack items
sp 3 /n* sp add \ "
c;
defer move
' (move) is move