; ; Life in 68020! Three passes. ; public _pass1 public _pass2 public _pass3 public _pass3t ; ; First pass: Calculates t+(t<<1)+(t>>1) for a huge bit string. ; ; Arguments: long *srca, *dsta, *dstb, len ; ; srca is the source. ; dsta is where the low-order destination is written ; dstb is where the high-order destination is written ; len is the total number of longwords needed ; _pass1: movem.l d1/d2/d3/d4/d5/d7/a0/a1/a2,-(sp) move.l 40(a7),a0 move.l 44(a7),a1 move.l 48(a7),a2 move.l 52(a7),d7 move.l #0,d5 bra pass1in in2: swap d7 inagain: move.l (a0)+,d0 bne inover lsr.b #1,d5 roxr.l #1,d0 move.l #0,d5 move.b (a0),d3 add.b d3,d3 addx.w d5,d0 move.l d0,(a1)+ move.l d5,(a2)+ dbra d7,inagain swap d7 dbra d7,in2 bra inov2 inover: move.l d0,d1 move.l d0,d2 lsr.b #1,d5 roxr.l #1,d1 move.l d0,d5 move.b (a0),d3 add.b d3,d3 addx.l d2,d2 move.l d0,d3 eor.l d1,d0 and.l d1,d3 move.l d2,d4 and.l d0,d4 eor.l d2,d0 eor.l d4,d3 move.l d0,(a1)+ move.l d3,(a2)+ pass1in: dbra d7,inagain swap d7 dbra d7,in2 inov2: movem.l (sp)+,d1/d2/d3/d4/d5/d7/a0/a1/a2 move.l #0,d0 rts ; ; This second pass does the main work. It's also the main time sink. ; ; Arguments: long *srca, *srcb, *srcc, *dsta, mod, len ; ; srca = main source. ; srcb = low order source from above ; srcc = high order source from above ; mod = byte offset to next/previous row ; len = number of longwords to do (again) ; _pass2: movem.l d1/d2/d3/d4/d5/d6/d7/a0/a1/a2/a3/a4/a5/a6,-(sp) move.l 60(a7),a0 move.l 64(a7),a2 move.l 68(a7),a3 move.l 72(a7),d0 move.l 76(a7),d7 move.l d0,d6 lea (a2,d6.l),a4 lea (a3,d6.l),a1 neg.l d0 lea (a2,d0.l),a5 lea (a3,d0.l),a6 bra pass2in in3: swap d7 in4: move.l (a2)+,d0 move.l d0,d1 move.l (a5)+,d2 eor.l d2,d0 and.l d2,d1 move.l (a4)+,d2 move.l d2,d3 and.l d0,d3 eor.l d2,d0 or.l d3,d1 move.l (a1)+,d2 move.l d2,d3 and.l d1,d2 eor.l d3,d1 move.l (a6)+,d3 move.l d3,d4 and.l d1,d3 eor.l d4,d1 eor.l d3,d2 move.l (a0),d5 beq in4q move.l (a3)+,d3 move.l d3,d4 and.l d1,d3 eor.l d4,d1 eor.l d3,d2 not.l d0 eor.l d1,d0 eor.l d1,d2 or.l d5,d1 and.l d1,d0 and.l d2,d0 move.l d0,(a0)+ pass2in: dbra d7,in4 swap d7 dbra d7,in3 out4q: movem.l (sp)+,d1/d2/d3/d4/d5/d6/d7/a0/a1/a2/a3/a4/a5/a6 move.l #0,d0 rts ; ; A minor optimization. If the current long-word is zero, we can make some ; simplifications. ; in4q: not.l d2 and.l d2,d0 and.l d1,d0 move.l d0,(a0)+ add.w #4,a3 dbra d7,in4 swap d7 dbra d7,in3 bra out4q ; ; Third pass. Clears the perimeter. ; ; Arguments: long *dst, width, height ; ; dst = destination (and source) to `clear' ; width = width of screen in bytes (must be a multiple of 4) ; height = height of screen in pixels ; _pass3: movem.l d1/d2/d7/a0,-(sp) move.l 20(a7),a0 move.l #0,d0 move.l 24(a7),d7 lsr.w #2,d7 bra loop5 in5: move.l d0,(a0)+ loop5: dbra d7,in5 sub.w #2,a0 move.l 28(a7),d7 sub.l #1,d7 move.l 24(a7),d2 move.l #$fffe7fff,d1 bra loop6 in6: and.l d1,(a0) add.w d2,a0 loop6: dbra d7,in6 sub.w d2,a0 add.w #2,a0 move.l 24(a7),d7 lsr.w #2,d7 bra loop7 in7: move.l d0,(a0)+ loop7: dbra d7,in7 movem.l (sp)+,d1/d2/d7/a0 move.l #0,d0 rts ; ; Third pass. Clears the perimeter, doing a torus wrap. ; ; Arguments: long *dst, width, height ; ; dst = destination (and source) to `clear' ; width = width of screen in bytes (must be a multiple of 4) ; height = height of screen in pixels ; _pass3t: movem.l d1/d2/d7/a0/a1,-(sp) move.l 24(a7),a0 move.l #0,d0 move.l 32(a7),d2 sub.l #2,d2 mulu.w 30(a7),d2 lea (a0,d2.l),a1 move.l 28(a7),d7 lsr.w #2,d7 bra loop5t in5t: move.l (a1)+,(a0)+ loop5t: dbra d7,in5t move.l 28(a7),d7 lsr.w #2,d7 bra loop5u in5u: move.l (a0)+,(a1)+ loop5u: dbra d7,in5u move.l 24(a7),a0 move.l 28(a7),d0 lea -1(a0,d0.l),a1 move.l 32(a7),d7 bra int7t int6t: btst #6,(a0) beq fb1 or.b #1,(a1) btst #1,(a1) beq fb2 fb4: or.b #$80,(a0) bra fb3 fb1: and.b #$fe,(a1) btst #1,(a1) bne fb4 fb2: and.b #$7f,(a0) fb3: add.w d0,a0 add.w d0,a1 int7t: dbra d7,int6t movem.l (sp)+,d1/d2/d7/a0/a1 move.l #0,d0 rts