macro CPU_START
ld bc,&7f10:out (c),c:ld bc,&7f55:out (c),c
mend

macro CPU_END
ld bc,&7f10:out (c),c:ld bc,&7f44:out (c),c
mend

AMP equ 14
NUM_DOTS equ 64

list
main:
nolist
	di
	ld sp,&0400

	; mode 0
	ld bc,&7f8c:out (c),c

	;load_palette
	ld de,pal0:call palSet

	ld hl,0
	ld de,889	; AMP*&3f + AMP/2 (but winape fails this so I typed manually the value)
	ld bc,-AMP
	ld ix,SINEDATA + &80
singen:
	xor a
	sub h
	ld (ix+0),a
	ld (ix-&80),h
	add hl,de
	ex de,hl
	add hl,bc
	ex de,hl
	inc ixl
	jr nz,singen

	ld de,&C000: ld hl,VRAMLINES:call generate_vram64
	ld de,&8000: ld hl,VRAMLINES+512:call generate_vram64

	; Clear Screen
	ld hl,&8000:ld (hl),0
	ld de,&8001:ld bc,32768:ldir

	;call change_width_64

	ld bc,&bc01:out(c),c
	inc b:ld a,&20:out (c),a

	ld bc,&bc02:out(c),c
	inc b:ld a,&2a:out(c),a

	ld bc,&bc06:out (c),c
	inc b:ld a,&20:out (c),a

	ld bc,&bc07:out (c),c
	inc b:ld a,&22:out (c),a

	call initPoints

frame:

ld a,(buffNum)
or a
jr z,select_page_C000
select_page_8000:
	ld bc,&bc0c:out (c),c
	ld bc,&bd20:out (c),c
jr afterSelectPage

select_page_C000:
	ld bc,&bc0c:out (c),c
	ld bc,&bd30:out (c),c
afterSelectPage:
		call wait4vsync

;CPU_START
		call erasePoints
		call movePoints

		call drawPoints

		call script
;CPU_END
jp frame

wait4vsync:
	ld b,&f5
	vsync0:
		in a,(c)
		rra
	jr c,vsync0

	vsync1:
		in a,(c)
		rra
	jr nc,vsync1
ret


move2:
	ld hl,(frameNum):sra h:rr l:ld a,l
	srl a
	ld ixl,a

	ld a,(frameNum):add a:add 128
	ld d,SINEDATA/256
	neg
	ld e,a
	ld h,POINTS_LIST/256
	add a:ld l,a

	ld a,(de):ld b,a
	ld a,e:add 64

	ld e,a
	ld a,(de):ld c,a


	push hl
	ld a,ixl
	call mul_signed8x8_shr7
	pop hl
	add 127
	inc l:ld (hl),a

	push hl
	ld a,ixl
	ld b,c:call mul_signed8x8_shr7
	pop hl
	add 127
	inc l:inc l:ld (hl),a
ret

move1:
	ld a,(frameNum)
	ld d,SINEDATA/256
	ld e,a
	ld h,POINTS_LIST/256
	add a:add a:ld l,a

	ld a,(de):sra a:add 127:ld b,a
	ld a,e:add 64

	add a
	ld e,a
	ld a,(de):sra a:add 127:ld c,a

	inc l:ld (hl),b
	inc l:inc l:ld (hl),c
ret

script:
	ld a,(frameNum+1):srl a
	or a:ret z

	ld b,a
	ld a,(secondRun):or a:jr z,noColFlip
		ld a,1:ld (autoM_andone+1),a
		ld a,b:dec a:ld (autoM_addzero+1),a
	noColFlip:

	dec b:jr nz,noMove1
		jr move1
	noMove1:
	dec b:jr nz,noMove2
		jr move2
	noMove2:

	dec b:dec b:dec b
	jr nz,move3

	call resetStuff
ret

secondRun:
db 0

move3:
	ld a,(frameNum)
	ld d,POINTS_LIST/256
	add a:add a:ld e,a

	ld hl,gpos
	ld bc,4:ldir

	ld de,(gvel):ld hl,(gpos):ld c,h:add hl,de:ld (gpos),hl

	ld a,c:xor h:bit 7,a:jr z,didntHitSides
	ld a,c:xor d:bit 7,a:jr z,didntHitSides
		ld a,c:ld (gpos+1),a
		xor a:sub e:ld e,a
		sbc a:sub d:ld d,a
		ld (gvel),de
	didntHitSides:


	ld de,(gvel+2):ld hl,(gpos+2):ld a,h:add hl,de:ld (gpos+2),hl

	ld c,32
	; carry flag check for Y over 255
	ex de,hl
	jr nc,didntHitGround
	bit 7,h:jr nz,didntHitGround
	bit 7,d:jr nz,didntHitGround
		ld (gpos+3),a
		xor a:sub l:ld l,a
		sbc a:sub h:ld h,a
		add hl,bc:add hl,bc:add hl,bc
	didntHitGround:

	add hl,bc
	ld (gvel+2),hl
ret

resetStuff:
	ld a,128:ld (gpos+1),a
	ld a,16:ld (gpos+3),a
	ld a,3:ld (gvel+1),a:ld (secondRun),a
	ld (autoM_andone+1),a:ld (autoM_lasthack1+1),a:ld (autoM_lasthack2+1),a	; value 3 instead of 7
	xor a:ld (gvel+3),a
	ld (autoM_addzero+1),a	; add 0 (color offset)
	ld (autoM_adda),a	; NOP
	ld (frameNum+1),a

	
ld a,%11000000:ld (autoM_hack2+1),a
ld a,&1f:ld (autoM_hack3),a:ld (autoM_hack4),a	


	ld hl,plotBytes2:ld de,plotBytes:ld bc,16:ldir

	ld (autoM_aman1),bc:ld (autoM_aman2),bc
	ld b,&A2	; and d
	ld (autoM_aman1+2),bc:ld (autoM_aman2+2),bc
	

	call initPoints

	ld de,pal1:call palSet
ret


erasePoints:
	erasePreviousDots:
	ld (SP_SAVE),sp

	autoM_eraseAddrPrev:
		ld sp,CLEAR_DOTS_BUFF1

		ld b,4*NUM_DOTS
		xor a
		eraseDotsLoop:
			pop de
			bit 7,d:jr z,skipErase	; if under &8000 (and definitelly under &C000) the erase storage might not be filled yet and have zero or jank
			ld (de),a
			skipErase:
		dec b
		jr nz,eraseDotsLoop

	ld sp,(SP_SAVE)
ret

movePoints:
	ld hl,POINTS_LIST
	ld ix,VELOCITIES_LIST
	ld a,2*NUM_DOTS
	movePointsLoop:
		ld e,(hl):inc hl:ld d,(hl):dec hl
		ld c,(ix):inc ix:ld b,(ix):inc ix
		ex de,hl:add hl,bc:ex de,hl
		ld (hl),e:inc hl:ld (hl),d:inc hl
	dec a
	jr nz,movePointsLoop
ret

drawPoints:
	ld hl,(frameNum):inc hl:ld (frameNum),hl
	ld a,l
	and 1:ld (buffNum),a:add a:ld b,a
	add VRAMLINES/256:ld (autoM_vramLines+1),a
	ld a,b:add CLEAR_DOTS_BUFF1/256:ld (autoM_eraseAddrPrev+2),a
	ld a,b:xor 2:add CLEAR_DOTS_BUFF1/256
	ld ixh,a:ld ixl,0

	exx:ld hl,POINTS_LIST:exx

	ld d,plotBytes/256

	ld b,NUM_DOTS
	drawPoints3DLoop:
		ld a,b

		autoM_andone:
		and 1
		autoM_addzero:
		add 0
		autoM_adda:
		add a	; nop on second autoM
		add a:add a:ld (autoM_col+1),a
		call drawPixel
	dec b
	jr nz,drawPoints3DLoop

ret

drawPixel:

	exx
autoM_hack2:
ld d,%11100000
	ld c,(hl):inc hl:ld a,(hl):dec hl
	sra a:rr c:ld a,c

	;ld a,(hl):and d
autoM_hack3:
	nop
	rra:rra:rra:rra:rra:ld e,a	; X frac
	inc hl:ld c,(hl):inc hl ; c = X

	ld b,(hl):inc hl:ld a,(hl):dec hl
	sra a:rr b:ld a,b

	;ld a,(hl):and d
autoM_hack4:
	nop
	rra:rra:rra:rra:rra:ld d,a	; Y frac
	inc hl:ld b,(hl):inc hl ; b = Y
	push bc:push bc
	exx

	ld iyh,2
doubleY:
	exx
autoM_lasthack1:
	ld a,7
	sub d:ld d,a
autoM_lasthack2:
	ld a,7:sub e
autoM_aman1:
	cp d:jr c,oof1
		ld a,d
	oof1:

	exx:ld e,a
	ld iyl,2
doubleX:
	autoM_vramLines:
	ld h,VRAMLINES/256
	exx:ld a,b:exx
	ld l,a
	ld a,(hl):inc h:ld h,(hl):ld l,a

	exx:ld a,c:inc c:inc c:exx
	srl a:ld c,a
	srl a:add l:ld l,a

	ld a,e
	autoM_col:
	add 0
	ld e,a
	ld a,(de)
	bit 0,c:jr nz,noAddLeftPixel
		add a
	noAddLeftPixel:
	or (hl)
	ld (hl),a

	ld (ix),l:inc ix:ld (ix),h:inc ix

	exx:ld a,e
autoM_aman2:
	cp d:jr c,oof2
		ld a,d
	oof2:
	exx:ld e,a

	dec iyl
	jr nz,doubleX

	exx:pop bc:inc b:exx
	dec iyh
	jr nz,doubleY

ret

initPoints:
	; Init points
	ld ix,main
	ld hl,POINTS_LIST
	ld de,VELOCITIES_LIST
	ld b,2*NUM_DOTS
	initPointsLoop:
		; position on center of screen (fixed point)
		ld (hl),0:inc hl:ld a,(ix):inc ixl
		and 63:add 96
		ld (hl),a:inc hl

		; random jank velocities (fixed point)
		ld a,(ix):inc ixl:;sub 128
		ld (de),a:inc de
		bit 7,a	; will convert signed 8bit to upper byte of signed 16bit
		ld a,0
		jr z,noneg8bit
			dec a
		noneg8bit:
		ld (de),a:inc de
	dec b
	jr nz,initPointsLoop
ret

generate_vram64:
	; DE = start vram address
	; HL = output address

	ld a,d
	ld b,32
	rows25:
		ld c,8
		lines8:
			ld (hl),e:inc h:ld (hl),d:dec h:inc l
			add 8:ld d,a
		dec c
		jr nz,lines8

		sub 64:ld d,a
		ld a,e:add 64:jr nc,no256
			inc d
		no256:
		ld e,a
		ld a,d

	dec b
	jr nz,rows25
ret

; New Input A = multiplicand, B = multiplier
; Later input H = Multiplier, E = Multiplicand, L = 0, D = 0
; Output HL = Product
; result in A = (A * B) >> 7

mul_signed8x8_shr7:
	ld d,0	; counter of neg signs
	ld l,d	; l later needs to be zero before the div loop

	bit 7,a
	jr z,no_neg_sign1
		neg
		inc d
	no_neg_sign1:
	ld e,a

	ld a,b
	bit 7,a
	jr z,no_neg_sign2
		neg
		dec d
	no_neg_sign2:
	ld h,a

	ld a,d ; 0 = positive result, -1,1 = negative sign (we will later and with 1)

	ld d,l	; d also needs to be zero at this point

	ld b,9
	bits_mul_loop:
		add hl,hl
		jr nc,mul_bits_over
			add hl,de
		mul_bits_over:
	djnz bits_mul_loop


	;add hl,hl ; like shift left 1 so later ld a,h is shift right 8 to compensate and really do >> 7
	; COmmented out as I put b to 9 from 8 above before the loop (hack,works always?)

	or a			; 0 = positive, -1,1 = negative
	ld a,h
	ret z
	neg
ret

palSet:
	ld bc,&7f00
	palloop:
		out (c),c
		ld a,(de):inc de
		out (c),a

		inc c
		ld a,c
		cp 17
	jr nz,palloop
ret


frameNum:
dw 0

buffNum:
db 0

pal0:
db &54,&44,&46,&57,&5f,&42,&59,&43
db &44,&58,&5d,&4c,&4e,&4a,&43,&4b
db &58

pal1:
db &54, &55, &57, &53
db &58, &5d, &5f, &5b
db &5c, &4c, &4e, &4a
db &44, &56, &52, &59
db &44


gpos:
dw 128*256,16*256

gvel:
dw 768,0
