il_ps_2_0
;corename=il4_nand_src_g
dcl_input_position_interp(linear_noperspective) vWinCoord0.xy__

dcl_output_generic o0.x			;#CMC,CMC_hi,CMC_CMC_mid,CMC_lo

dcl_cb cb0[3]				;key_hi,key_mid,key_lo,granularity
					;plain_lo,plain_hi,cypher_lo,cypher_hi
					;iters,rest,width

dcl_literal	l0,	0xBF0A8B1D,0x1f,32,0

dcl_literal l1, 0x5618cb1c, 0x5618cb1c, 0x5618cb1c, 0x5618cb1c
dcl_literal l2, 0xf45044d5, 0xf45044d5, 0xf45044d5, 0xf45044d5
dcl_literal l3, 0x9287be8e, 0x9287be8e, 0x9287be8e, 0x9287be8e
dcl_literal l4, 0x30bf3847, 0x30bf3847, 0x30bf3847, 0x30bf3847
dcl_literal l5, 0xcef6b200, 0xcef6b200, 0xcef6b200, 0xcef6b200
dcl_literal l6, 0x6d2e2bb9, 0x6d2e2bb9, 0x6d2e2bb9, 0x6d2e2bb9
dcl_literal l7, 0x0b65a572, 0x0b65a572, 0x0b65a572, 0x0b65a572
dcl_literal l8, 0xa99d1f2b, 0xa99d1f2b, 0xa99d1f2b, 0xa99d1f2b
dcl_literal l9, 0x47d498e4, 0x47d498e4, 0x47d498e4, 0x47d498e4
dcl_literal l10, 0xe60c129d, 0xe60c129d, 0xe60c129d, 0xe60c129d
dcl_literal l11, 0x84438c56, 0x84438c56, 0x84438c56, 0x84438c56
dcl_literal l12, 0x227b060f, 0x227b060f, 0x227b060f, 0x227b060f
dcl_literal l13, 0xc0b27fc8, 0xc0b27fc8, 0xc0b27fc8, 0xc0b27fc8
dcl_literal l14, 0x5ee9f981, 0x5ee9f981, 0x5ee9f981, 0x5ee9f981
dcl_literal l15, 0xfd21733a, 0xfd21733a, 0xfd21733a, 0xfd21733a
dcl_literal l16, 0x9b58ecf3, 0x9b58ecf3, 0x9b58ecf3, 0x9b58ecf3
dcl_literal l17, 0x399066ac, 0x399066ac, 0x399066ac, 0x399066ac
dcl_literal l18, 0xd7c7e065, 0xd7c7e065, 0xd7c7e065, 0xd7c7e065
dcl_literal l19, 0x75ff5a1e, 0x75ff5a1e, 0x75ff5a1e, 0x75ff5a1e
dcl_literal l20, 0x1436d3d7, 0x1436d3d7, 0x1436d3d7, 0x1436d3d7
dcl_literal l21, 0xb26e4d90, 0xb26e4d90, 0xb26e4d90, 0xb26e4d90
dcl_literal l22, 0x50a5c749, 0x50a5c749, 0x50a5c749, 0x50a5c749
dcl_literal l23, 0xeedd4102, 0xeedd4102, 0xeedd4102, 0xeedd4102
dcl_literal l24, 0x8d14babb, 0x8d14babb, 0x8d14babb, 0x8d14babb
dcl_literal l25, 0x2b4c3474, 0x2b4c3474, 0x2b4c3474, 0x2b4c3474
dcl_literal l26, 0x0,0x1,0x2,0x3
dcl_literal l27, 0xff,0x00ff0000,0x0000ff00,3
dcl_literal l28, 8,24,2,29
dcl_literal l29, 8.0,0x12,0x4,0x80000000
dcl_literal l30, 23,0xfc,0,0

; :
	;r100.x=key_hi,r100.y=key_mid,r100.z=key_lo
	mov	r100.x,cb0[0].x
	mov	r100.y,cb0[0].y
	mov	r100.z,cb0[0].z
	mov	r100.w,cb0[2].x	;iters


	mov	r101.x,l26.x		;N CMC
	mov	r101.y,l26.x		;current iter
	mov	r101.z,l26.x		;last CMC


	; hi     
	flr	r50.x,vWinCoord0.x
	flr	r50.y,vWinCoord0.y
	mad	r50.x,r50.y,cb0[2].z,r50.x
	ftou	r50.x,r50.x

	ishl	r50.z,r50.x,l28.z
	iadd	r100.x,r50.z,r100.x				;hi+objIndex*4,    mov

	; 
	ilt	r50.y,r50.x,cb0[2].y				;idx<rest?
	cmov_logical r50.x,r50.y,l26.x,l26.y
	iadd	r100.w,r100.w,r50.xxxx_neg(xyzw)
	
	;  
	whileloop
		break_logicalz	r100.w

		iadd	r101.y,r101.y,l29.z			;current iter+4

		ishl	r50.x,r100.y,l28.y			;t=(mid<<24)|(mid>>24);
		ushr	r50.y,r100.y,l28.y

		iand	r50.z,r100.y,l27.y					;t=t|((mid&(unsigned)0x00ff0000)>>8);
		iand	r50.w,r100.y,l27.z					;t=t|((mid&(unsigned)0x0000ff00)<<8);

		ushr	r50.z,r50.z,l28.x
		ishl	r50.w,r50.w,l28.x

		ior	r50.x,r50.x,r50.y
		ior	r50.w,r50.z,r50.w
		ior	r50.x,r50.x,r50.w			;bswap mid

		mov	r50.y,r50.x						;t3=t
	
		ushr	r51.x,r100.x,l28.x				;(hi>>8)
		iadd	r50.x,r50.x,r51.x				;t+=(hi>>8)

		ult		r51.y,r50.x,r50.y				;if(t<t3)
		if_logicalnz	r51.y
			;  lo - 
			ishl	r52.x,r100.z,l28.y		;t=(lo<<24)|(lo>>24);
			ushr	r52.y,r100.z,l28.y

			iand	r52.z,r100.z,l27.y		;t=t|((lo&(unsigned)0x00ff0000)>>8);
			iand	r52.w,r100.z,l27.z		;t=t|((lo&(unsigned)0x0000ff00)<<8);
			
			ushr	r52.z,r52.z,l28.x
			ishl	r52.w,r52.w,l28.x

			ior	r52.x,r52.x,r52.y
			ior	r52.w,r52.z,r52.w
			ior	r52.x,r52.x,r52.w		;bswap lo

			iadd	r52.x,r52.x,l26.y		;t2=t2+1
			; l0 
			ishl	r53.x,r52.x,l28.y		;t2=(lo<<24)|(lo>>24);
			ushr	r53.y,r52.x,l28.y

			iand	r53.z,r52.x,l27.y		;t2=t2|((lo&0x00ff0000)>>8);
			iand	r53.w,r52.x,l27.z		;t2=t2|((lo&0x0000ff00)<<8);

			ushr	r53.z,r53.z,l28.x
			ishl	r53.w,r53.w,l28.x

			ior	r53.x,r53.x,r53.y
			ior	r53.z,r53.z,r53.w
			ior	r100.z,r53.x,r53.z		;bswap lo		
		endif
		; mid
		ishl	r50.y,r50.x,l28.y
		ushr	r50.z,r50.x,l28.y

		iand	r51.x,r50.x,l27.y
		iand	r51.y,r50.x,l27.z

		ushr	r50.w,r51.x,l28.x
		ishl	r50.x,r51.y,l28.x

		ior	r50.y,r50.z,r50.y
		ior	r50.w,r50.x,r50.w
		ior	r100.y,r50.y,r50.w				;bswap mid

		iand	r100.x,r100.x,l27.x

		; key_hi,key_mid,key_lo
		    ;L2 = key_hi;
		    ;L1 = key_mid;
		    ;L0 = key_lo;

		;     hi:mid:lo
		mov	r0,r100.zzzz
		mov	r1,r100.yyyy
		mov	r2,r100.xxxx
	
		iadd	r2,r2,l26

		; 1 ( )
		;S0=r10,S1=r11,...,S25=r35
	
		;ROTL3(Sc0)	,  L0
		;L0=ROTL(L0+S0,S0)=ROTL(L0+S0,0x1d)
		iadd	r50,r0,l0.xxxx
		ushr	r51,r50,l27.w
		ishl	r52,r50,l28.w
		ior	r0,r51,r52
	
		;S1=ROTL3(Sc1+S0+L0)
		iadd	r56,l0.xxxx,l1.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r11,r56,r57

		;L1=ROTL(L1+S1+L0,S1+L0)
		iadd	r55,r11,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S2=ROTL3(Sc2+S1+L1)
		iadd	r56,r11,l2.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r12,r56,r57
	
		;L2=ROTL(L2+S2+L1,S2+L1)
		iadd	r55,r12,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59
	
		;S3=ROTL3(Sc3+S2+L2)
		iadd	r56,r12,l3.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r13,r56,r57

		;L0=ROTL(L0+S3+L2,S3+L2)
		iadd	r55,r13,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S4=ROTL3(Sc4+S3+L0)
		iadd	r56,r13,l4.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior		r14,r56,r57

		;L1=ROTL(L1+S4+L0,S4+L0)
		iadd	r55,r14,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S5=ROTL3(Sc5+S4+L1)
		iadd	r56,r14,l5.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior		r15,r56,r57

		;L2=ROTL(L2+S5+L1,S5+L1);
		iadd	r55,r15,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59
	
		;S6=ROTL3(Sc6+S5+L2)
		iadd	r56,r15,l6.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r16,r56,r57

		;L0=ROTL(L0+S6+L2,S6+L2)
		iadd	r55,r16,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S7=ROTL3(Sc7+S6+L0)
		iadd	r56,r16,l7.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r17,r56,r57
	
		;L1=ROTL(L1+S7+L0,S7+L0)
		iadd	r55,r17,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior		r1,r55,r59

		;S8=ROTL3(Sc8+S7+L1)
		iadd	r56,r17,l8.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r18,r56,r57

		;L2=ROTL(L2+S8+L1,S8+L1)
		iadd	r55,r18,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S9=ROTL3(Sc9+S8+L2)
		iadd	r56,r18,l9.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r19,r56,r57

		;L0=ROTL(L0+S9+L2,S9+L2)
		iadd	r55,r19,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S10=ROTL3(Sc10+S9+L0)
		iadd	r56,r19,l10.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r20,r56,r57

		;L1=ROTL(L1+S10+L0,S10+L0)
		iadd	r55,r20,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior		r1,r55,r59

		;S11=ROTL3(Sc11+S10+L1)
		iadd	r56,r20,l11.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r21,r56,r57

		;L2=ROTL(L2+S11+L1,S11+L1)
		iadd	r55,r21,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S12=ROTL3(Sc12+S11+L2)
		iadd	r56,r21,l12.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r22,r56,r57

		;L0=ROTL(L0+S12+L2,S12+L2);
		iadd	r55,r22,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S13=ROTL3(Sc13+S12+L0)
		iadd	r56,r22,l13.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r23,r56,r57

		;L1=ROTL(L1+S13+L0,S13+L0)
		iadd	r55,r23,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S14=ROTL3(Sc14+S13+L1)
		iadd	r56,r23,l14.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r24,r56,r57

		;L2=ROTL(L2+S14+L1,S14+L1)
		iadd	r55,r24,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S15=ROTL3(Sc15+S14+L2)
		iadd	r56,r24,l15.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r25,r56,r57

		;L0=ROTL(L0+S15+L2,S15+L2)
		iadd	r55,r25,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S16=ROTL3(Sc16+S15+L0)
		iadd	r56,r25,l16.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r26,r56,r57

		;L1=ROTL(L1+S16+L0,S16+L0)
		iadd	r55,r26,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S17=ROTL3(Sc17+S16+L1)
		iadd	r56,r26,l17.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r27,r56,r57

		;L2=ROTL(L2+S17+L1,S17+L1)
		iadd	r55,r27,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S18=ROTL3(Sc18+S17+L2)
		iadd	r56,r27,l18.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r28,r56,r57

		;L0=ROTL(L0+S18+L2,S18+L2)
		iadd	r55,r28,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S19=ROTL3(Sc19+S18+L0)
		iadd	r56,r28,l19.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r29,r56,r57

		;L1=ROTL(L1+S19+L0,S19+L0)
		iadd	r55,r29,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S20=ROTL3(Sc20+S19+L1)
		iadd	r56,r29,l20.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r30,r56,r57

		;L2=ROTL(L2+S20+L1,S20+L1)
		iadd	r55,r30,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S21=ROTL3(Sc21+S20+L2)
		iadd	r56,r30,l21.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r31,r56,r57

		;L0=ROTL(L0+S21+L2,S21+L2)
		iadd	r55,r31,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S22=ROTL3(Sc22+S21+L0)
		iadd	r56,r31,l22.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r32,r56,r57
	
		;L1=ROTL(L1+S22+L0,S22+L0)
		iadd	r55,r32,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S23=ROTL3(Sc23+S22+L1)
		iadd	r56,r32,l23.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r33,r56,r57

		;L2=ROTL(L2+S23+L1,S23+L1)
		iadd	r55,r33,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S24=ROTL3(Sc24+S23+L2)
		iadd	r56,r33,l24.xxxx
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r34,r56,r57

		;L0=ROTL(L0+S24+L2,S24+L2)
		iadd	r55,r34,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S25=ROTL3(Sc25+S24+L0)
		iadd	r56,r34,l25.xxxx
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r35,r56,r57

		;L1=ROTL(L1+S25+L0,S25+L0)
		iadd	r55,r35,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;// 2

		;S0=ROTL3(S0+S25+L1)
		iadd	r56,r35,l0.xxxx
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r10,r56,r57

		;L2=ROTL(L2+S0+L1,S0+L1)
		iadd	r55,r10,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S1=ROTL3(S1+S0+L2)
		iadd	r56,r10,r11
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r11,r56,r57

		;L0=ROTL(L0+S1+L2,S1+L2)
		iadd	r55,r11,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S2=ROTL3(S2+S1+L0)
		iadd	r56,r11,r12
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r12,r56,r57

		;L1=ROTL(L1+S2+L0,S2+L0)
		iadd	r55,r12,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S3=ROTL3(S3+S2+L1)
		iadd	r56,r12,r13
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r13,r56,r57

		;L2=ROTL(L2+S3+L1,S3+L1)
		iadd	r55,r13,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S4=ROTL3(S4+S3+L2)
		iadd	r56,r13,r14
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r14,r56,r57

		;L0=ROTL(L0+S4+L2,S4+L2)
		iadd	r55,r14,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S5=ROTL3(S5+S4+L0)
		iadd	r56,r14,r15
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r15,r56,r57

		;L1=ROTL(L1+S5+L0,S5+L0)
		iadd	r55,r15,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S6=ROTL3(S6+S5+L1)
		iadd	r56,r15,r16
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r16,r56,r57

		;L2=ROTL(L2+S6+L1,S6+L1)
		iadd	r55,r16,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S7=ROTL3(S7+S6+L2)
		iadd	r56,r16,r17
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r17,r56,r57

		;L0=ROTL(L0+S7+L2,S7+L2)
		iadd	r55,r17,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S8=ROTL3(S8+S7+L0)
		iadd	r56,r17,r18
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r18,r56,r57

		;L1=ROTL(L1+S8+L0,S8+L0)
		iadd	r55,r18,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S9=ROTL3(S9+S8+L1)
		iadd	r56,r18,r19
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r19,r56,r57

		;L2=ROTL(L2+S9+L1,S9+L1)
		iadd	r55,r19,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S10=ROTL3(S10+S9+L2)
		iadd	r56,r19,r20
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r20,r56,r57

		;L0=ROTL(L0+S10+L2,S10+L2)
		iadd	r55,r20,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S11=ROTL3(S11+S10+L0)
		iadd	r56,r20,r21
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r21,r56,r57

		;L1=ROTL(L1+S11+L0,S11+L0)
		iadd	r55,r21,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S12=ROTL3(S12+S11+L1)
		iadd	r56,r21,r22
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r22,r56,r57

		;L2=ROTL(L2+S12+L1,S12+L1)
		iadd	r55,r22,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S13=ROTL3(S13+S12+L2)
		iadd	r56,r22,r23
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r23,r56,r57

		;L0=ROTL(L0+S13+L2,S13+L2)
		iadd	r55,r23,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S14=ROTL3(S14+S13+L0)
		iadd	r56,r23,r24
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r24,r56,r57

		;L1=ROTL(L1+S14+L0,S14+L0)
		iadd	r55,r24,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S15=ROTL3(S15+S14+L1)
		iadd	r56,r24,r25
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r25,r56,r57

		;L2=ROTL(L2+S15+L1,S15+L1)
		iadd	r55,r25,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S16=ROTL3(S16+S15+L2)
		iadd	r56,r25,r26
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r26,r56,r57

		;L0=ROTL(L0+S16+L2,S16+L2)
		iadd	r55,r26,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S17=ROTL3(S17+S16+L0)
		iadd	r56,r26,r27
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r27,r56,r57

		;L1=ROTL(L1+S17+L0,S17+L0)
		iadd	r55,r27,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S18=ROTL3(S18+S17+L1)
		iadd	r56,r27,r28
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r28,r56,r57

		;L2=ROTL(L2+S18+L1,S18+L1)
		iadd	r55,r28,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S19=ROTL3(S19+S18+L2)
		iadd	r56,r28,r29
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r29,r56,r57

		;L0=ROTL(L0+S19+L2,S19+L2)
		iadd	r55,r29,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S20=ROTL3(S20+S19+L0)
		iadd	r56,r29,r30
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r30,r56,r57

		;L1=ROTL(L1+S20+L0,S20+L0)
		iadd	r55,r30,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S21=ROTL3(S21+S20+L1)
		iadd	r56,r30,r31
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r31,r56,r57

		;L2=ROTL(L2+S21+L1,S21+L1)
		iadd	r55,r31,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S22=ROTL3(S22+S21+L2)
		iadd	r56,r31,r32
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r32,r56,r57

		;L0=ROTL(L0+S22+L2,S22+L2)
		iadd	r55,r32,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S23=ROTL3(S23+S22+L0)
		iadd	r56,r32,r33
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r33,r56,r57

		;L1=ROTL(L1+S23+L0,S23+L0)
		iadd	r55,r33,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S24=ROTL3(S24+S23+L1)
		iadd	r56,r33,r34
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r34,r56,r57

		;L2=ROTL(L2+S24+L1,S24+L1)
		iadd	r55,r34,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S25=ROTL3(S25+S24+L2)
		iadd	r56,r34,r35
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r35,r56,r57

		;L0=ROTL(L0+S25+L2,S25+L2)
		iadd	r55,r35,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;// 3
		;S0=ROTL3(S0+S25+L0)
		iadd	r56,r35,r10
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r10,r56,r57

		;L1=ROTL(L1+S0+L0,S0+L0)
		iadd	r55,r10,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S1=ROTL3(S1+S0+L1)
		iadd	r56,r10,r11
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r11,r56,r57

		;L2=ROTL(L2+S1+L1,S1+L1)
		iadd	r55,r11,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S2=ROTL3(S2+S1+L2)
		iadd	r56,r11,r12
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r12,r56,r57

		;L0=ROTL(L0+S2+L2,S2+L2)
		iadd	r55,r12,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S3=ROTL3(S3+S2+L0)
		iadd	r56,r12,r13
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r13,r56,r57

		;L1=ROTL(L1+S3+L0,S3+L0)
		iadd	r55,r13,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S4=ROTL3(S4+S3+L1)
		iadd	r56,r13,r14
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r14,r56,r57

		;L2=ROTL(L2+S4+L1,S4+L1)
		iadd	r55,r14,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S5=ROTL3(S5+S4+L2)
		iadd	r56,r14,r15
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r15,r56,r57

		;L0=ROTL(L0+S5+L2,S5+L2)
		iadd	r55,r15,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S6=ROTL3(S6+S5+L0)
		iadd	r56,r15,r16
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r16,r56,r57

		;L1=ROTL(L1+S6+L0,S6+L0)
		iadd	r55,r16,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S7=ROTL3(S7+S6+L1)
		iadd	r56,r16,r17
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r17,r56,r57
       	
		;L2=ROTL(L2+S7+L1,S7+L1)
		iadd	r55,r17,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S8=ROTL3(S8+S7+L2)
		iadd	r56,r17,r18
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r18,r56,r57

		;L0=ROTL(L0+S8+L2,S8+L2)
		iadd	r55,r18,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S9=ROTL3(S9+S8+L0)
		iadd	r56,r18,r19
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r19,r56,r57

		;L1=ROTL(L1+S9+L0,S9+L0)
		iadd	r55,r19,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S10=ROTL3(S10+S9+L1)
		iadd	r56,r19,r20
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r20,r56,r57

		;L2=ROTL(L2+S10+L1,S10+L1)
		iadd	r55,r20,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S11=ROTL3(S11+S10+L2)
		iadd	r56,r20,r21
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r21,r56,r57

		;L0=ROTL(L0+S11+L2,S11+L2)
		iadd	r55,r21,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S12=ROTL3(S12+S11+L0)
		iadd	r56,r21,r22
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r22,r56,r57

		;L1=ROTL(L1+S12+L0,S12+L0)
		iadd	r55,r22,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S13=ROTL3(S13+S12+L1)
		iadd	r56,r22,r23
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r23,r56,r57

		;L2=ROTL(L2+S13+L1,S13+L1)
		iadd	r55,r23,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S14=ROTL3(S14+S13+L2)
		iadd	r56,r23,r24
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r24,r56,r57

		;L0=ROTL(L0+S14+L2,S14+L2)
		iadd	r55,r24,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S15=ROTL3(S15+S14+L0)
		iadd	r56,r24,r25
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r25,r56,r57

		;L1=ROTL(L1+S15+L0,S15+L0)
		iadd	r55,r25,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59
	
		;S16=ROTL3(S16+S15+L1)
		iadd	r56,r25,r26
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r26,r56,r57

		;L2=ROTL(L2+S16+L1,S16+L1)
		iadd	r55,r26,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S17=ROTL3(S17+S16+L2)
		iadd	r56,r26,r27
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r27,r56,r57

		;L0=ROTL(L0+S17+L2,S17+L2)
		iadd	r55,r27,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S18=ROTL3(S18+S17+L0)
		iadd	r56,r27,r28
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r28,r56,r57

		;L1=ROTL(L1+S18+L0,S18+L0)
		iadd	r55,r28,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S19=ROTL3(S19+S18+L1)
		iadd	r56,r28,r29
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r29,r56,r57

		;L2=ROTL(L2+S19+L1,S19+L1)
		iadd	r55,r29,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S20=ROTL3(S20+S19+L2)
		iadd	r56,r29,r30
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r30,r56,r57

		;L0=ROTL(L0+S20+L2,S20+L2)
		iadd	r55,r30,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S21=ROTL3(S21+S20+L0)
		iadd	r56,r30,r31
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r31,r56,r57

		;L1=ROTL(L1+S21+L0,S21+L0)
		iadd	r55,r31,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S22=ROTL3(S22+S21+L1)
		iadd	r56,r31,r32
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r32,r56,r57

		;L2=ROTL(L2+S22+L1,S22+L1)
		iadd	r55,r32,r1
		iadd	r56,r55,r2
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r2,r55,r59

		;S23=ROTL3(S23+S22+L2)
		iadd	r56,r32,r33
		iadd	r55,r56,r2
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r33,r56,r57

		;L0=ROTL(L0+S23+L2,S23+L2)
		iadd	r55,r33,r2
		iadd	r56,r55,r0
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r0,r55,r59

		;S24=ROTL3(S24+S23+L0)
		iadd	r56,r33,r34
		iadd	r55,r56,r0
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r34,r56,r57

		;//Encryption round
		;//setup
	
		;eA=PLAIN_LO+S0
		iadd	r10,r10,cb0[1].xxxx			;eA
		;eB=PLAIN_HI+S1;
		iadd	r11,r11,cb0[1].yyyy			;eB

		;eA=ROTL(eA^eB,eB)+S2
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r12

		;eB=ROTL(eB^eA,eA)+S3
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r13

		;eA=ROTL(eA^eB,eB)+S4
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r14

		;eB=ROTL(eB^eA,eA)+S5
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r15

		;eA=ROTL(eA^eB,eB)+S6
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r16

		;eB=ROTL(eB^eA,eA)+S7
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r17

		;eA=ROTL(eA^eB,eB)+S8
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r18

		;eB=ROTL(eB^eA,eA)+S9;
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r19

		;eA=ROTL(eA^eB,eB)+S10
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r20

		;eB=ROTL(eB^eA,eA)+S11
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r21

		;eA=ROTL(eA^eB,eB)+S12
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r22

		;eB=ROTL(eB^eA,eA)+S13
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r23

		;eA=ROTL(eA^eB,eB)+S14
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r24

		;eB=ROTL(eB^eA,eA)+S15
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r25

		;eA=ROTL(eA^eB,eB)+S16;
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r26
	
		;eB=ROTL(eB^eA,eA)+S17
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r27

		;eA=ROTL(eA^eB,eB)+S18
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r28

		;eB=ROTL(eB^eA,eA)+S19
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r29

		;eA=ROTL(eA^eB,eB)+S20
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r30

		;eB=ROTL(eB^eA,eA)+S21
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r31

		;eA=ROTL(eA^eB,eB)+S22
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r32

		;eB=ROTL(eB^eA,eA)+S23
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r33

		;eA=ROTL(eA^eB,eB)+S24
		ixor	r10,r10,r11
		iadd	r59,l0.zzzz,r11_neg(xyzw)
		ishl	r55,r10,r11
		ushr	r59,r10,r59
		ior	r10,r55,r59
		iadd	r10,r10,r34

		;checking results
		ieq	r12,r10,cb0[1].zzzz
		dp4	r12,r12,r12.1111

		mov	r50.x,r100.x
		iadd	r100.x,r100.x,cb0[0].w
		iadd	r100.w,r100.w,l26.yyyy_neg(xyzw)

		continue_logicalz r12.x

		#mov	#g[0].x,l26.y

		;L1=ROTL(L1+S24+L0,S24+L0)
		iadd	r55,r34,r0
		iadd	r56,r55,r1
		iadd	r59,l0.zzzz,r55_neg(xyzw)
		ishl	r55,r56,r55
		ushr	r59,r56,r59
		ior	r1,r55,r59

		;S25=ROTL3(S25+S24+L1)
		iadd	r56,r34,r35
		iadd	r55,r56,r1
		ishl	r56,r55,l27.w
		ushr	r57,r55,l28.w
		ior	r35,r56,r57

		;eB=ROTL(eB^eA,eA)+S25
		ixor	r11,r11,r10
		iadd	r59,l0.zzzz,r10_neg(xyzw)
		ishl	r55,r11,r10
		ushr	r59,r11,r59
		ior	r11,r55,r59
		iadd	r11,r11,r35


		ieq	r55.x,r10.x,cb0[1].z
		if_logicalnz	r55.x		;key #0
			;CMC increment
			iadd	r101.x,r101.x,l26.y
			mov	r101.z,r101.y

			ieq	r59.x,r11.x,cb0[1].w
			if_logicalnz r59.x
				;solution found
				ior	r101.z,r101.z,l29.w
				break
			endif
		endif
		ieq	r55.x,r10.y,cb0[1].z
		if_logicalnz	r55.x		;key #1
			;CMC increment
			iadd	r101.x,r101.x,l26.y
			iadd	r101.z,r101.y,l26.y

			ieq	r59.x,r11.y,cb0[1].w
			if_logicalnz r59.x
				;solution found
				ior	r101.z,r101.z,l29.w
				break
			endif
		endif
		ieq	r55.x,r10.z,cb0[1].z
		if_logicalnz	r55.x		;key #2
			;CMC increment
			iadd	r101.x,r101.x,l26.y
			iadd	r101.z,r101.y,l26.z

			ieq	r59.x,r11.z,cb0[1].w
			if_logicalnz r59.x
				;solution found
				ior	r101.z,r101.z,l29.w
				break
			endif
		endif
		ieq	r55.x,r10.w,cb0[1].z
		if_logicalnz	r55.x		;key #3
			;CMC increment
			iadd	r101.x,r101.x,l26.y
			iadd	r101.z,r101.y,l26.w

			ieq	r59.x,r11.w,cb0[1].w
			if_logicalnz r59.x
				;solution found
				ior	r101.z,r101.z,l29.w
				break
			endif
		endif
	endloop

; output
;output format:
;HIT:iters done:CMC count:iters:key offset
;1:6:5:16:2

ishl	r100.x,r101.x,l29.y		;NCMC<<18
ior	r100.x,r101.z,r100.x		;MATCH:NCMC<<18:iter
iand	r101.y,r101.y,l30.y		;mask 

ishl	r101.y,r101.y,l30.x		;0:iters_done:0<25>
ior	r100.x,r100.x,r101.y

mov	o0.x,r100.x

end
