|
Server : Apache/2.4.58 (Win64) OpenSSL/3.1.3 PHP/8.2.12 System : Windows NT SERVER-PC 10.0 build 26200 (Windows 11) AMD64 User : ServerPC ( 0) PHP Version : 8.2.12 Disable Function : NONE Directory : C:/Users/ServerPC/AppData/Roaming/NVIDIA/ComputeCache/a/8/ |
Upload File : |
A �� (� ��߀�}}�Mar 14 202517:43:53HOST64sm_61//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: UNKNOWN
// Unknown Toolkit Version
// Based on NVVM 7.0.1
//
.version 8.7
.target sm_60, texmode_independent
.address_size 64
// .globl imageWritei1D
.entry imageWritei1D(
.param .align 16 .b8 imageWritei1D_param_0[16],
.param .surfref imageWritei1D_param_1,
.param .u32 imageWritei1D_param_2,
.param .u32 imageWritei1D_param_3
)
{
.reg .pred %p<7>;
.reg .b32 %r<43>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r27, %r28, %r29, %r42}, [imageWritei1D_param_0];
ld.param.u32 %r26, [imageWritei1D_param_2];
ld.param.u32 %r31, [imageWritei1D_param_3];
mov.b32 %r32, %envreg3;
mov.u32 %r33, %ctaid.x;
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %tid.x;
add.s32 %r36, %r35, %r32;
mad.lo.s32 %r1, %r34, %r33, %r36;
setp.ge.s32 %p1, %r1, %r31;
@%p1 bra $L__BB0_10;
suq.channel_order.b32 %r37, [imageWritei1D_param_1];
setp.gt.s32 %p2, %r37, 4277;
@%p2 bra $L__BB0_5;
setp.eq.s32 %p5, %r37, 4273;
@%p5 bra $L__BB0_8;
setp.eq.s32 %p6, %r37, 4275;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p6 bra $L__BB0_4;
bra.uni $L__BB0_9;
$L__BB0_4:
mov.u32 %r39, %r27;
mov.u32 %r40, %r42;
mov.u32 %r41, %r29;
bra.uni $L__BB0_9;
$L__BB0_5:
setp.eq.s32 %p3, %r37, 4278;
mov.u32 %r39, %r29;
mov.u32 %r40, %r28;
mov.u32 %r41, %r27;
@%p3 bra $L__BB0_9;
setp.ne.s32 %p4, %r37, 4279;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p4 bra $L__BB0_9;
mov.u32 %r39, %r42;
mov.u32 %r40, %r27;
mov.u32 %r41, %r28;
mov.u32 %r42, %r29;
bra.uni $L__BB0_9;
$L__BB0_8:
mov.u32 %r39, %r42;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
$L__BB0_9:
add.s32 %r38, %r1, %r26;
sust.p.1d.v4.b32.trap [imageWritei1D_param_1, {%r38}], {%r39, %r40, %r41, %r42};
$L__BB0_10:
ret;
}
// .globl imageWritei1D_arr
.entry imageWritei1D_arr(
.param .align 16 .b8 imageWritei1D_arr_param_0[16],
.param .surfref imageWritei1D_arr_param_1,
.param .u32 imageWritei1D_arr_param_2,
.param .u32 imageWritei1D_arr_param_3,
.param .u32 imageWritei1D_arr_param_4,
.param .u32 imageWritei1D_arr_param_5
)
{
.reg .pred %p<9>;
.reg .b32 %r<52>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r29, %r30, %r31, %r51}, [imageWritei1D_arr_param_0];
ld.param.u32 %r27, [imageWritei1D_arr_param_2];
ld.param.u32 %r33, [imageWritei1D_arr_param_3];
ld.param.u32 %r28, [imageWritei1D_arr_param_4];
ld.param.u32 %r34, [imageWritei1D_arr_param_5];
mov.b32 %r35, %envreg3;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %ntid.x;
mov.u32 %r38, %tid.x;
add.s32 %r39, %r38, %r35;
mad.lo.s32 %r1, %r37, %r36, %r39;
mov.u32 %r40, %ctaid.y;
mov.u32 %r41, %ntid.y;
mov.u32 %r42, %tid.y;
mov.b32 %r43, %envreg4;
add.s32 %r44, %r42, %r43;
mad.lo.s32 %r2, %r41, %r40, %r44;
setp.ge.s32 %p1, %r1, %r33;
setp.ge.s32 %p2, %r2, %r34;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB1_10;
suq.channel_order.b32 %r45, [imageWritei1D_arr_param_1];
setp.gt.s32 %p4, %r45, 4277;
@%p4 bra $L__BB1_5;
setp.eq.s32 %p7, %r45, 4273;
@%p7 bra $L__BB1_8;
setp.eq.s32 %p8, %r45, 4275;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p8 bra $L__BB1_4;
bra.uni $L__BB1_9;
$L__BB1_4:
mov.u32 %r48, %r29;
mov.u32 %r49, %r51;
mov.u32 %r50, %r31;
bra.uni $L__BB1_9;
$L__BB1_5:
setp.eq.s32 %p5, %r45, 4278;
mov.u32 %r48, %r31;
mov.u32 %r49, %r30;
mov.u32 %r50, %r29;
@%p5 bra $L__BB1_9;
setp.ne.s32 %p6, %r45, 4279;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p6 bra $L__BB1_9;
mov.u32 %r48, %r51;
mov.u32 %r49, %r29;
mov.u32 %r50, %r30;
mov.u32 %r51, %r31;
bra.uni $L__BB1_9;
$L__BB1_8:
mov.u32 %r48, %r51;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
$L__BB1_9:
add.s32 %r46, %r2, %r28;
add.s32 %r47, %r1, %r27;
sust.p.a1d.v4.b32.trap [imageWritei1D_arr_param_1, {%r46, %r47}], {%r48, %r49, %r50, %r51};
$L__BB1_10:
ret;
}
// .globl imageWritei1D_buf
.entry imageWritei1D_buf(
.param .align 16 .b8 imageWritei1D_buf_param_0[16],
.param .u64 .ptr .surfref imageWritei1D_buf_param_1,
.param .u32 imageWritei1D_buf_param_2,
.param .u32 imageWritei1D_buf_param_3
)
{
.reg .pred %p<7>;
.reg .b32 %r<43>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r27, %r28, %r29, %r42}, [imageWritei1D_buf_param_0];
ld.param.u64 %rd1, [imageWritei1D_buf_param_1];
ld.param.u32 %r26, [imageWritei1D_buf_param_2];
ld.param.u32 %r31, [imageWritei1D_buf_param_3];
mov.b32 %r32, %envreg3;
mov.u32 %r33, %ctaid.x;
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %tid.x;
add.s32 %r36, %r35, %r32;
mad.lo.s32 %r1, %r34, %r33, %r36;
setp.ge.s32 %p1, %r1, %r31;
@%p1 bra $L__BB2_10;
suq.channel_order.b32 %r37, [%rd1];
setp.gt.s32 %p2, %r37, 4277;
@%p2 bra $L__BB2_5;
setp.eq.s32 %p5, %r37, 4273;
@%p5 bra $L__BB2_8;
setp.eq.s32 %p6, %r37, 4275;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p6 bra $L__BB2_4;
bra.uni $L__BB2_9;
$L__BB2_4:
mov.u32 %r39, %r27;
mov.u32 %r40, %r42;
mov.u32 %r41, %r29;
bra.uni $L__BB2_9;
$L__BB2_5:
setp.eq.s32 %p3, %r37, 4278;
mov.u32 %r39, %r29;
mov.u32 %r40, %r28;
mov.u32 %r41, %r27;
@%p3 bra $L__BB2_9;
setp.ne.s32 %p4, %r37, 4279;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p4 bra $L__BB2_9;
mov.u32 %r39, %r42;
mov.u32 %r40, %r27;
mov.u32 %r41, %r28;
mov.u32 %r42, %r29;
bra.uni $L__BB2_9;
$L__BB2_8:
mov.u32 %r39, %r42;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
$L__BB2_9:
add.s32 %r38, %r1, %r26;
sust.p.1d_buffer.v4.b32.trap [%rd1, {%r38}], {%r39, %r40, %r41, %r42};
$L__BB2_10:
ret;
}
// .globl imageWritef1D
.entry imageWritef1D(
.param .align 16 .b8 imageWritef1D_param_0[16],
.param .surfref imageWritef1D_param_1,
.param .u32 imageWritef1D_param_2,
.param .u32 imageWritef1D_param_3
)
{
.reg .pred %p<7>;
.reg .f32 %f<35>;
.reg .b32 %r<15>;
.reg .b64 %rd<2>;
ld.param.v4.f32 {%f25, %f26, %f27, %f34}, [imageWritef1D_param_0];
ld.param.u32 %r2, [imageWritef1D_param_2];
ld.param.u32 %r3, [imageWritef1D_param_3];
mov.b32 %r4, %envreg3;
mov.u32 %r5, %ctaid.x;
mov.u32 %r6, %ntid.x;
mov.u32 %r7, %tid.x;
add.s32 %r8, %r7, %r4;
mad.lo.s32 %r1, %r6, %r5, %r8;
setp.ge.s32 %p1, %r1, %r3;
@%p1 bra $L__BB3_10;
suq.channel_order.b32 %r9, [imageWritef1D_param_1];
setp.gt.s32 %p2, %r9, 4277;
@%p2 bra $L__BB3_5;
setp.eq.s32 %p5, %r9, 4273;
mov.f32 %f29, 0f00000000;
mov.f32 %f30, 0f00000000;
@%p5 bra $L__BB3_8;
setp.eq.s32 %p6, %r9, 4275;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p6 bra $L__BB3_4;
bra.uni $L__BB3_9;
$L__BB3_4:
mov.f32 %f31, %f25;
mov.f32 %f32, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
bra.uni $L__BB3_9;
$L__BB3_5:
setp.eq.s32 %p3, %r9, 4278;
mov.f32 %f31, %f27;
mov.f32 %f32, %f26;
mov.f32 %f33, %f25;
@%p3 bra $L__BB3_9;
setp.ne.s32 %p4, %r9, 4279;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p4 bra $L__BB3_9;
mov.f32 %f31, %f34;
mov.f32 %f32, %f25;
mov.f32 %f33, %f26;
mov.f32 %f34, %f27;
bra.uni $L__BB3_9;
$L__BB3_8:
mov.f32 %f32, 0f00000000;
mov.f32 %f31, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
$L__BB3_9:
mov.b32 %r10, %f31;
mov.b32 %r11, %f32;
mov.b32 %r12, %f33;
mov.b32 %r13, %f34;
add.s32 %r14, %r1, %r2;
sust.p.1d.v4.b32.trap [imageWritef1D_param_1, {%r14}], {%r10, %r11, %r12, %r13};
$L__BB3_10:
ret;
}
// .globl imageWritef1D_arr
.entry imageWritef1D_arr(
.param .align 16 .b8 imageWritef1D_arr_param_0[16],
.param .surfref imageWritef1D_arr_param_1,
.param .u32 imageWritef1D_arr_param_2,
.param .u32 imageWritef1D_arr_param_3,
.param .u32 imageWritef1D_arr_param_4,
.param .u32 imageWritef1D_arr_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<35>;
.reg .b32 %r<24>;
.reg .b64 %rd<2>;
ld.param.v4.f32 {%f25, %f26, %f27, %f34}, [imageWritef1D_arr_param_0];
ld.param.u32 %r3, [imageWritef1D_arr_param_2];
ld.param.u32 %r5, [imageWritef1D_arr_param_3];
ld.param.u32 %r4, [imageWritef1D_arr_param_4];
ld.param.u32 %r6, [imageWritef1D_arr_param_5];
mov.b32 %r7, %envreg3;
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
add.s32 %r11, %r10, %r7;
mad.lo.s32 %r1, %r9, %r8, %r11;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %ntid.y;
mov.u32 %r14, %tid.y;
mov.b32 %r15, %envreg4;
add.s32 %r16, %r14, %r15;
mad.lo.s32 %r2, %r13, %r12, %r16;
setp.ge.s32 %p1, %r1, %r5;
setp.ge.s32 %p2, %r2, %r6;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB4_10;
suq.channel_order.b32 %r17, [imageWritef1D_arr_param_1];
setp.gt.s32 %p4, %r17, 4277;
@%p4 bra $L__BB4_5;
setp.eq.s32 %p7, %r17, 4273;
mov.f32 %f29, 0f00000000;
mov.f32 %f30, 0f00000000;
@%p7 bra $L__BB4_8;
setp.eq.s32 %p8, %r17, 4275;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p8 bra $L__BB4_4;
bra.uni $L__BB4_9;
$L__BB4_4:
mov.f32 %f31, %f25;
mov.f32 %f32, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
bra.uni $L__BB4_9;
$L__BB4_5:
setp.eq.s32 %p5, %r17, 4278;
mov.f32 %f31, %f27;
mov.f32 %f32, %f26;
mov.f32 %f33, %f25;
@%p5 bra $L__BB4_9;
setp.ne.s32 %p6, %r17, 4279;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p6 bra $L__BB4_9;
mov.f32 %f31, %f34;
mov.f32 %f32, %f25;
mov.f32 %f33, %f26;
mov.f32 %f34, %f27;
bra.uni $L__BB4_9;
$L__BB4_8:
mov.f32 %f32, 0f00000000;
mov.f32 %f31, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
$L__BB4_9:
mov.b32 %r18, %f31;
mov.b32 %r19, %f32;
mov.b32 %r20, %f33;
mov.b32 %r21, %f34;
add.s32 %r22, %r2, %r4;
add.s32 %r23, %r1, %r3;
sust.p.a1d.v4.b32.trap [imageWritef1D_arr_param_1, {%r22, %r23}], {%r18, %r19, %r20, %r21};
$L__BB4_10:
ret;
}
// .globl imageWritef1D_buf
.entry imageWritef1D_buf(
.param .align 16 .b8 imageWritef1D_buf_param_0[16],
.param .u64 .ptr .surfref imageWritef1D_buf_param_1,
.param .u32 imageWritef1D_buf_param_2,
.param .u32 imageWritef1D_buf_param_3
)
{
.reg .pred %p<7>;
.reg .f32 %f<35>;
.reg .b32 %r<15>;
.reg .b64 %rd<2>;
ld.param.v4.f32 {%f25, %f26, %f27, %f34}, [imageWritef1D_buf_param_0];
ld.param.u64 %rd1, [imageWritef1D_buf_param_1];
ld.param.u32 %r2, [imageWritef1D_buf_param_2];
ld.param.u32 %r3, [imageWritef1D_buf_param_3];
mov.b32 %r4, %envreg3;
mov.u32 %r5, %ctaid.x;
mov.u32 %r6, %ntid.x;
mov.u32 %r7, %tid.x;
add.s32 %r8, %r7, %r4;
mad.lo.s32 %r1, %r6, %r5, %r8;
setp.ge.s32 %p1, %r1, %r3;
@%p1 bra $L__BB5_10;
suq.channel_order.b32 %r9, [%rd1];
setp.gt.s32 %p2, %r9, 4277;
@%p2 bra $L__BB5_5;
setp.eq.s32 %p5, %r9, 4273;
mov.f32 %f29, 0f00000000;
mov.f32 %f30, 0f00000000;
@%p5 bra $L__BB5_8;
setp.eq.s32 %p6, %r9, 4275;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p6 bra $L__BB5_4;
bra.uni $L__BB5_9;
$L__BB5_4:
mov.f32 %f31, %f25;
mov.f32 %f32, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
bra.uni $L__BB5_9;
$L__BB5_5:
setp.eq.s32 %p3, %r9, 4278;
mov.f32 %f31, %f27;
mov.f32 %f32, %f26;
mov.f32 %f33, %f25;
@%p3 bra $L__BB5_9;
setp.ne.s32 %p4, %r9, 4279;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p4 bra $L__BB5_9;
mov.f32 %f31, %f34;
mov.f32 %f32, %f25;
mov.f32 %f33, %f26;
mov.f32 %f34, %f27;
bra.uni $L__BB5_9;
$L__BB5_8:
mov.f32 %f32, 0f00000000;
mov.f32 %f31, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
$L__BB5_9:
mov.b32 %r10, %f31;
mov.b32 %r11, %f32;
mov.b32 %r12, %f33;
mov.b32 %r13, %f34;
add.s32 %r14, %r1, %r2;
sust.p.1d_buffer.v4.b32.trap [%rd1, {%r14}], {%r10, %r11, %r12, %r13};
$L__BB5_10:
ret;
}
// .globl imageWriteui1D
.entry imageWriteui1D(
.param .align 16 .b8 imageWriteui1D_param_0[16],
.param .surfref imageWriteui1D_param_1,
.param .u32 imageWriteui1D_param_2,
.param .u32 imageWriteui1D_param_3
)
{
.reg .pred %p<7>;
.reg .b32 %r<43>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r27, %r28, %r29, %r42}, [imageWriteui1D_param_0];
ld.param.u32 %r26, [imageWriteui1D_param_2];
ld.param.u32 %r31, [imageWriteui1D_param_3];
mov.b32 %r32, %envreg3;
mov.u32 %r33, %ctaid.x;
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %tid.x;
add.s32 %r36, %r35, %r32;
mad.lo.s32 %r1, %r34, %r33, %r36;
setp.ge.s32 %p1, %r1, %r31;
@%p1 bra $L__BB6_10;
suq.channel_order.b32 %r37, [imageWriteui1D_param_1];
setp.gt.s32 %p2, %r37, 4277;
@%p2 bra $L__BB6_5;
setp.eq.s32 %p5, %r37, 4273;
@%p5 bra $L__BB6_8;
setp.eq.s32 %p6, %r37, 4275;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p6 bra $L__BB6_4;
bra.uni $L__BB6_9;
$L__BB6_4:
mov.u32 %r39, %r27;
mov.u32 %r40, %r42;
mov.u32 %r41, %r29;
bra.uni $L__BB6_9;
$L__BB6_5:
setp.eq.s32 %p3, %r37, 4278;
mov.u32 %r39, %r29;
mov.u32 %r40, %r28;
mov.u32 %r41, %r27;
@%p3 bra $L__BB6_9;
setp.ne.s32 %p4, %r37, 4279;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p4 bra $L__BB6_9;
mov.u32 %r39, %r42;
mov.u32 %r40, %r27;
mov.u32 %r41, %r28;
mov.u32 %r42, %r29;
bra.uni $L__BB6_9;
$L__BB6_8:
mov.u32 %r39, %r42;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
$L__BB6_9:
add.s32 %r38, %r1, %r26;
sust.p.1d.v4.b32.trap [imageWriteui1D_param_1, {%r38}], {%r39, %r40, %r41, %r42};
$L__BB6_10:
ret;
}
// .globl imageWriteui1D_arr
.entry imageWriteui1D_arr(
.param .align 16 .b8 imageWriteui1D_arr_param_0[16],
.param .surfref imageWriteui1D_arr_param_1,
.param .u32 imageWriteui1D_arr_param_2,
.param .u32 imageWriteui1D_arr_param_3,
.param .u32 imageWriteui1D_arr_param_4,
.param .u32 imageWriteui1D_arr_param_5
)
{
.reg .pred %p<9>;
.reg .b32 %r<52>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r29, %r30, %r31, %r51}, [imageWriteui1D_arr_param_0];
ld.param.u32 %r27, [imageWriteui1D_arr_param_2];
ld.param.u32 %r33, [imageWriteui1D_arr_param_3];
ld.param.u32 %r28, [imageWriteui1D_arr_param_4];
ld.param.u32 %r34, [imageWriteui1D_arr_param_5];
mov.b32 %r35, %envreg3;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %ntid.x;
mov.u32 %r38, %tid.x;
add.s32 %r39, %r38, %r35;
mad.lo.s32 %r1, %r37, %r36, %r39;
mov.u32 %r40, %ctaid.y;
mov.u32 %r41, %ntid.y;
mov.u32 %r42, %tid.y;
mov.b32 %r43, %envreg4;
add.s32 %r44, %r42, %r43;
mad.lo.s32 %r2, %r41, %r40, %r44;
setp.ge.s32 %p1, %r1, %r33;
setp.ge.s32 %p2, %r2, %r34;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB7_10;
suq.channel_order.b32 %r45, [imageWriteui1D_arr_param_1];
setp.gt.s32 %p4, %r45, 4277;
@%p4 bra $L__BB7_5;
setp.eq.s32 %p7, %r45, 4273;
@%p7 bra $L__BB7_8;
setp.eq.s32 %p8, %r45, 4275;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p8 bra $L__BB7_4;
bra.uni $L__BB7_9;
$L__BB7_4:
mov.u32 %r48, %r29;
mov.u32 %r49, %r51;
mov.u32 %r50, %r31;
bra.uni $L__BB7_9;
$L__BB7_5:
setp.eq.s32 %p5, %r45, 4278;
mov.u32 %r48, %r31;
mov.u32 %r49, %r30;
mov.u32 %r50, %r29;
@%p5 bra $L__BB7_9;
setp.ne.s32 %p6, %r45, 4279;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p6 bra $L__BB7_9;
mov.u32 %r48, %r51;
mov.u32 %r49, %r29;
mov.u32 %r50, %r30;
mov.u32 %r51, %r31;
bra.uni $L__BB7_9;
$L__BB7_8:
mov.u32 %r48, %r51;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
$L__BB7_9:
add.s32 %r46, %r2, %r28;
add.s32 %r47, %r1, %r27;
sust.p.a1d.v4.b32.trap [imageWriteui1D_arr_param_1, {%r46, %r47}], {%r48, %r49, %r50, %r51};
$L__BB7_10:
ret;
}
// .globl imageWriteui1D_buf
.entry imageWriteui1D_buf(
.param .align 16 .b8 imageWriteui1D_buf_param_0[16],
.param .u64 .ptr .surfref imageWriteui1D_buf_param_1,
.param .u32 imageWriteui1D_buf_param_2,
.param .u32 imageWriteui1D_buf_param_3
)
{
.reg .pred %p<7>;
.reg .b32 %r<43>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r27, %r28, %r29, %r42}, [imageWriteui1D_buf_param_0];
ld.param.u64 %rd1, [imageWriteui1D_buf_param_1];
ld.param.u32 %r26, [imageWriteui1D_buf_param_2];
ld.param.u32 %r31, [imageWriteui1D_buf_param_3];
mov.b32 %r32, %envreg3;
mov.u32 %r33, %ctaid.x;
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %tid.x;
add.s32 %r36, %r35, %r32;
mad.lo.s32 %r1, %r34, %r33, %r36;
setp.ge.s32 %p1, %r1, %r31;
@%p1 bra $L__BB8_10;
suq.channel_order.b32 %r37, [%rd1];
setp.gt.s32 %p2, %r37, 4277;
@%p2 bra $L__BB8_5;
setp.eq.s32 %p5, %r37, 4273;
@%p5 bra $L__BB8_8;
setp.eq.s32 %p6, %r37, 4275;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p6 bra $L__BB8_4;
bra.uni $L__BB8_9;
$L__BB8_4:
mov.u32 %r39, %r27;
mov.u32 %r40, %r42;
mov.u32 %r41, %r29;
bra.uni $L__BB8_9;
$L__BB8_5:
setp.eq.s32 %p3, %r37, 4278;
mov.u32 %r39, %r29;
mov.u32 %r40, %r28;
mov.u32 %r41, %r27;
@%p3 bra $L__BB8_9;
setp.ne.s32 %p4, %r37, 4279;
mov.u32 %r39, %r27;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
@%p4 bra $L__BB8_9;
mov.u32 %r39, %r42;
mov.u32 %r40, %r27;
mov.u32 %r41, %r28;
mov.u32 %r42, %r29;
bra.uni $L__BB8_9;
$L__BB8_8:
mov.u32 %r39, %r42;
mov.u32 %r40, %r28;
mov.u32 %r41, %r29;
$L__BB8_9:
add.s32 %r38, %r1, %r26;
sust.p.1d_buffer.v4.b32.trap [%rd1, {%r38}], {%r39, %r40, %r41, %r42};
$L__BB8_10:
ret;
}
// .globl imageWritei2D
.entry imageWritei2D(
.param .align 16 .b8 imageWritei2D_param_0[16],
.param .surfref imageWritei2D_param_1,
.param .u32 imageWritei2D_param_2,
.param .u32 imageWritei2D_param_3,
.param .u32 imageWritei2D_param_4,
.param .u32 imageWritei2D_param_5
)
{
.reg .pred %p<9>;
.reg .b32 %r<52>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r29, %r30, %r31, %r51}, [imageWritei2D_param_0];
ld.param.u32 %r27, [imageWritei2D_param_2];
ld.param.u32 %r33, [imageWritei2D_param_3];
ld.param.u32 %r28, [imageWritei2D_param_4];
ld.param.u32 %r34, [imageWritei2D_param_5];
mov.b32 %r35, %envreg3;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %ntid.x;
mov.u32 %r38, %tid.x;
add.s32 %r39, %r38, %r35;
mad.lo.s32 %r1, %r37, %r36, %r39;
mov.u32 %r40, %ctaid.y;
mov.u32 %r41, %ntid.y;
mov.u32 %r42, %tid.y;
mov.b32 %r43, %envreg4;
add.s32 %r44, %r42, %r43;
mad.lo.s32 %r2, %r41, %r40, %r44;
setp.ge.s32 %p1, %r1, %r33;
setp.ge.s32 %p2, %r2, %r34;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB9_10;
suq.channel_order.b32 %r45, [imageWritei2D_param_1];
setp.gt.s32 %p4, %r45, 4277;
@%p4 bra $L__BB9_5;
setp.eq.s32 %p7, %r45, 4273;
@%p7 bra $L__BB9_8;
setp.eq.s32 %p8, %r45, 4275;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p8 bra $L__BB9_4;
bra.uni $L__BB9_9;
$L__BB9_4:
mov.u32 %r48, %r29;
mov.u32 %r49, %r51;
mov.u32 %r50, %r31;
bra.uni $L__BB9_9;
$L__BB9_5:
setp.eq.s32 %p5, %r45, 4278;
mov.u32 %r48, %r31;
mov.u32 %r49, %r30;
mov.u32 %r50, %r29;
@%p5 bra $L__BB9_9;
setp.ne.s32 %p6, %r45, 4279;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p6 bra $L__BB9_9;
mov.u32 %r48, %r51;
mov.u32 %r49, %r29;
mov.u32 %r50, %r30;
mov.u32 %r51, %r31;
bra.uni $L__BB9_9;
$L__BB9_8:
mov.u32 %r48, %r51;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
$L__BB9_9:
add.s32 %r46, %r2, %r28;
add.s32 %r47, %r1, %r27;
sust.p.2d.v4.b32.trap [imageWritei2D_param_1, {%r47, %r46}], {%r48, %r49, %r50, %r51};
$L__BB9_10:
ret;
}
// .globl imageWritei2D_arr
.entry imageWritei2D_arr(
.param .align 16 .b8 imageWritei2D_arr_param_0[16],
.param .surfref imageWritei2D_arr_param_1,
.param .u32 imageWritei2D_arr_param_2,
.param .u32 imageWritei2D_arr_param_3,
.param .u32 imageWritei2D_arr_param_4,
.param .u32 imageWritei2D_arr_param_5,
.param .u32 imageWritei2D_arr_param_6,
.param .u32 imageWritei2D_arr_param_7
)
{
.reg .pred %p<11>;
.reg .b32 %r<61>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r31, %r32, %r33, %r60}, [imageWritei2D_arr_param_0];
ld.param.u32 %r28, [imageWritei2D_arr_param_2];
ld.param.u32 %r35, [imageWritei2D_arr_param_3];
ld.param.u32 %r29, [imageWritei2D_arr_param_4];
ld.param.u32 %r36, [imageWritei2D_arr_param_5];
ld.param.u32 %r30, [imageWritei2D_arr_param_6];
ld.param.u32 %r37, [imageWritei2D_arr_param_7];
mov.b32 %r38, %envreg3;
mov.u32 %r39, %ctaid.x;
mov.u32 %r40, %ntid.x;
mov.u32 %r41, %tid.x;
add.s32 %r42, %r41, %r38;
mad.lo.s32 %r1, %r40, %r39, %r42;
mov.u32 %r43, %ctaid.y;
mov.u32 %r44, %ntid.y;
mov.u32 %r45, %tid.y;
mov.b32 %r46, %envreg4;
add.s32 %r47, %r45, %r46;
mad.lo.s32 %r2, %r44, %r43, %r47;
mov.u32 %r48, %ctaid.z;
mov.u32 %r49, %ntid.z;
mov.u32 %r50, %tid.z;
mov.b32 %r51, %envreg5;
add.s32 %r52, %r50, %r51;
mad.lo.s32 %r3, %r49, %r48, %r52;
setp.ge.s32 %p1, %r1, %r35;
setp.ge.s32 %p2, %r2, %r36;
or.pred %p3, %p1, %p2;
setp.ge.s32 %p4, %r3, %r37;
or.pred %p5, %p3, %p4;
@%p5 bra $L__BB10_10;
suq.channel_order.b32 %r53, [imageWritei2D_arr_param_1];
setp.gt.s32 %p6, %r53, 4277;
@%p6 bra $L__BB10_5;
setp.eq.s32 %p9, %r53, 4273;
@%p9 bra $L__BB10_8;
setp.eq.s32 %p10, %r53, 4275;
mov.u32 %r57, %r31;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
@%p10 bra $L__BB10_4;
bra.uni $L__BB10_9;
$L__BB10_4:
mov.u32 %r57, %r31;
mov.u32 %r58, %r60;
mov.u32 %r59, %r33;
bra.uni $L__BB10_9;
$L__BB10_5:
setp.eq.s32 %p7, %r53, 4278;
mov.u32 %r57, %r33;
mov.u32 %r58, %r32;
mov.u32 %r59, %r31;
@%p7 bra $L__BB10_9;
setp.ne.s32 %p8, %r53, 4279;
mov.u32 %r57, %r31;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
@%p8 bra $L__BB10_9;
mov.u32 %r57, %r60;
mov.u32 %r58, %r31;
mov.u32 %r59, %r32;
mov.u32 %r60, %r33;
bra.uni $L__BB10_9;
$L__BB10_8:
mov.u32 %r57, %r60;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
$L__BB10_9:
add.s32 %r54, %r3, %r30;
add.s32 %r55, %r2, %r29;
add.s32 %r56, %r1, %r28;
sust.p.a2d.v4.b32.trap [imageWritei2D_arr_param_1, {%r54, %r56, %r55, %r55}], {%r57, %r58, %r59, %r60};
$L__BB10_10:
ret;
}
// .globl imageWritef2D
.entry imageWritef2D(
.param .align 16 .b8 imageWritef2D_param_0[16],
.param .surfref imageWritef2D_param_1,
.param .u32 imageWritef2D_param_2,
.param .u32 imageWritef2D_param_3,
.param .u32 imageWritef2D_param_4,
.param .u32 imageWritef2D_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<35>;
.reg .b32 %r<24>;
.reg .b64 %rd<2>;
ld.param.v4.f32 {%f25, %f26, %f27, %f34}, [imageWritef2D_param_0];
ld.param.u32 %r3, [imageWritef2D_param_2];
ld.param.u32 %r5, [imageWritef2D_param_3];
ld.param.u32 %r4, [imageWritef2D_param_4];
ld.param.u32 %r6, [imageWritef2D_param_5];
mov.b32 %r7, %envreg3;
mov.u32 %r8, %ctaid.x;
mov.u32 %r9, %ntid.x;
mov.u32 %r10, %tid.x;
add.s32 %r11, %r10, %r7;
mad.lo.s32 %r1, %r9, %r8, %r11;
mov.u32 %r12, %ctaid.y;
mov.u32 %r13, %ntid.y;
mov.u32 %r14, %tid.y;
mov.b32 %r15, %envreg4;
add.s32 %r16, %r14, %r15;
mad.lo.s32 %r2, %r13, %r12, %r16;
setp.ge.s32 %p1, %r1, %r5;
setp.ge.s32 %p2, %r2, %r6;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB11_10;
suq.channel_order.b32 %r17, [imageWritef2D_param_1];
setp.gt.s32 %p4, %r17, 4277;
@%p4 bra $L__BB11_5;
setp.eq.s32 %p7, %r17, 4273;
mov.f32 %f29, 0f00000000;
mov.f32 %f30, 0f00000000;
@%p7 bra $L__BB11_8;
setp.eq.s32 %p8, %r17, 4275;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p8 bra $L__BB11_4;
bra.uni $L__BB11_9;
$L__BB11_4:
mov.f32 %f31, %f25;
mov.f32 %f32, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
bra.uni $L__BB11_9;
$L__BB11_5:
setp.eq.s32 %p5, %r17, 4278;
mov.f32 %f31, %f27;
mov.f32 %f32, %f26;
mov.f32 %f33, %f25;
@%p5 bra $L__BB11_9;
setp.ne.s32 %p6, %r17, 4279;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p6 bra $L__BB11_9;
mov.f32 %f31, %f34;
mov.f32 %f32, %f25;
mov.f32 %f33, %f26;
mov.f32 %f34, %f27;
bra.uni $L__BB11_9;
$L__BB11_8:
mov.f32 %f32, 0f00000000;
mov.f32 %f31, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
$L__BB11_9:
mov.b32 %r18, %f31;
mov.b32 %r19, %f32;
mov.b32 %r20, %f33;
mov.b32 %r21, %f34;
add.s32 %r22, %r2, %r4;
add.s32 %r23, %r1, %r3;
sust.p.2d.v4.b32.trap [imageWritef2D_param_1, {%r23, %r22}], {%r18, %r19, %r20, %r21};
$L__BB11_10:
ret;
}
// .globl imageWritef2D_arr
.entry imageWritef2D_arr(
.param .align 16 .b8 imageWritef2D_arr_param_0[16],
.param .surfref imageWritef2D_arr_param_1,
.param .u32 imageWritef2D_arr_param_2,
.param .u32 imageWritef2D_arr_param_3,
.param .u32 imageWritef2D_arr_param_4,
.param .u32 imageWritef2D_arr_param_5,
.param .u32 imageWritef2D_arr_param_6,
.param .u32 imageWritef2D_arr_param_7
)
{
.reg .pred %p<11>;
.reg .f32 %f<35>;
.reg .b32 %r<33>;
.reg .b64 %rd<2>;
ld.param.v4.f32 {%f25, %f26, %f27, %f34}, [imageWritef2D_arr_param_0];
ld.param.u32 %r4, [imageWritef2D_arr_param_2];
ld.param.u32 %r7, [imageWritef2D_arr_param_3];
ld.param.u32 %r5, [imageWritef2D_arr_param_4];
ld.param.u32 %r8, [imageWritef2D_arr_param_5];
ld.param.u32 %r6, [imageWritef2D_arr_param_6];
ld.param.u32 %r9, [imageWritef2D_arr_param_7];
mov.b32 %r10, %envreg3;
mov.u32 %r11, %ctaid.x;
mov.u32 %r12, %ntid.x;
mov.u32 %r13, %tid.x;
add.s32 %r14, %r13, %r10;
mad.lo.s32 %r1, %r12, %r11, %r14;
mov.u32 %r15, %ctaid.y;
mov.u32 %r16, %ntid.y;
mov.u32 %r17, %tid.y;
mov.b32 %r18, %envreg4;
add.s32 %r19, %r17, %r18;
mad.lo.s32 %r2, %r16, %r15, %r19;
mov.u32 %r20, %ctaid.z;
mov.u32 %r21, %ntid.z;
mov.u32 %r22, %tid.z;
mov.b32 %r23, %envreg5;
add.s32 %r24, %r22, %r23;
mad.lo.s32 %r3, %r21, %r20, %r24;
setp.ge.s32 %p1, %r1, %r7;
setp.ge.s32 %p2, %r2, %r8;
or.pred %p3, %p1, %p2;
setp.ge.s32 %p4, %r3, %r9;
or.pred %p5, %p3, %p4;
@%p5 bra $L__BB12_10;
suq.channel_order.b32 %r25, [imageWritef2D_arr_param_1];
setp.gt.s32 %p6, %r25, 4277;
@%p6 bra $L__BB12_5;
setp.eq.s32 %p9, %r25, 4273;
mov.f32 %f29, 0f00000000;
mov.f32 %f30, 0f00000000;
@%p9 bra $L__BB12_8;
setp.eq.s32 %p10, %r25, 4275;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p10 bra $L__BB12_4;
bra.uni $L__BB12_9;
$L__BB12_4:
mov.f32 %f31, %f25;
mov.f32 %f32, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
bra.uni $L__BB12_9;
$L__BB12_5:
setp.eq.s32 %p7, %r25, 4278;
mov.f32 %f31, %f27;
mov.f32 %f32, %f26;
mov.f32 %f33, %f25;
@%p7 bra $L__BB12_9;
setp.ne.s32 %p8, %r25, 4279;
mov.f32 %f31, %f25;
mov.f32 %f32, %f26;
mov.f32 %f33, %f27;
@%p8 bra $L__BB12_9;
mov.f32 %f31, %f34;
mov.f32 %f32, %f25;
mov.f32 %f33, %f26;
mov.f32 %f34, %f27;
bra.uni $L__BB12_9;
$L__BB12_8:
mov.f32 %f32, 0f00000000;
mov.f32 %f31, %f34;
mov.f32 %f33, %f29;
mov.f32 %f34, %f30;
$L__BB12_9:
mov.b32 %r26, %f31;
mov.b32 %r27, %f32;
mov.b32 %r28, %f33;
mov.b32 %r29, %f34;
add.s32 %r30, %r3, %r6;
add.s32 %r31, %r2, %r5;
add.s32 %r32, %r1, %r4;
sust.p.a2d.v4.b32.trap [imageWritef2D_arr_param_1, {%r30, %r32, %r31, %r31}], {%r26, %r27, %r28, %r29};
$L__BB12_10:
ret;
}
// .globl imageWriteui2D
.entry imageWriteui2D(
.param .align 16 .b8 imageWriteui2D_param_0[16],
.param .surfref imageWriteui2D_param_1,
.param .u32 imageWriteui2D_param_2,
.param .u32 imageWriteui2D_param_3,
.param .u32 imageWriteui2D_param_4,
.param .u32 imageWriteui2D_param_5
)
{
.reg .pred %p<9>;
.reg .b32 %r<52>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r29, %r30, %r31, %r51}, [imageWriteui2D_param_0];
ld.param.u32 %r27, [imageWriteui2D_param_2];
ld.param.u32 %r33, [imageWriteui2D_param_3];
ld.param.u32 %r28, [imageWriteui2D_param_4];
ld.param.u32 %r34, [imageWriteui2D_param_5];
mov.b32 %r35, %envreg3;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %ntid.x;
mov.u32 %r38, %tid.x;
add.s32 %r39, %r38, %r35;
mad.lo.s32 %r1, %r37, %r36, %r39;
mov.u32 %r40, %ctaid.y;
mov.u32 %r41, %ntid.y;
mov.u32 %r42, %tid.y;
mov.b32 %r43, %envreg4;
add.s32 %r44, %r42, %r43;
mad.lo.s32 %r2, %r41, %r40, %r44;
setp.ge.s32 %p1, %r1, %r33;
setp.ge.s32 %p2, %r2, %r34;
or.pred %p3, %p1, %p2;
@%p3 bra $L__BB13_10;
suq.channel_order.b32 %r45, [imageWriteui2D_param_1];
setp.gt.s32 %p4, %r45, 4277;
@%p4 bra $L__BB13_5;
setp.eq.s32 %p7, %r45, 4273;
@%p7 bra $L__BB13_8;
setp.eq.s32 %p8, %r45, 4275;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p8 bra $L__BB13_4;
bra.uni $L__BB13_9;
$L__BB13_4:
mov.u32 %r48, %r29;
mov.u32 %r49, %r51;
mov.u32 %r50, %r31;
bra.uni $L__BB13_9;
$L__BB13_5:
setp.eq.s32 %p5, %r45, 4278;
mov.u32 %r48, %r31;
mov.u32 %r49, %r30;
mov.u32 %r50, %r29;
@%p5 bra $L__BB13_9;
setp.ne.s32 %p6, %r45, 4279;
mov.u32 %r48, %r29;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
@%p6 bra $L__BB13_9;
mov.u32 %r48, %r51;
mov.u32 %r49, %r29;
mov.u32 %r50, %r30;
mov.u32 %r51, %r31;
bra.uni $L__BB13_9;
$L__BB13_8:
mov.u32 %r48, %r51;
mov.u32 %r49, %r30;
mov.u32 %r50, %r31;
$L__BB13_9:
add.s32 %r46, %r2, %r28;
add.s32 %r47, %r1, %r27;
sust.p.2d.v4.b32.trap [imageWriteui2D_param_1, {%r47, %r46}], {%r48, %r49, %r50, %r51};
$L__BB13_10:
ret;
}
// .globl imageWriteui2D_arr
.entry imageWriteui2D_arr(
.param .align 16 .b8 imageWriteui2D_arr_param_0[16],
.param .surfref imageWriteui2D_arr_param_1,
.param .u32 imageWriteui2D_arr_param_2,
.param .u32 imageWriteui2D_arr_param_3,
.param .u32 imageWriteui2D_arr_param_4,
.param .u32 imageWriteui2D_arr_param_5,
.param .u32 imageWriteui2D_arr_param_6,
.param .u32 imageWriteui2D_arr_param_7
)
{
.reg .pred %p<11>;
.reg .b32 %r<61>;
.reg .b64 %rd<2>;
ld.param.v4.u32 {%r31, %r32, %r33, %r60}, [imageWriteui2D_arr_param_0];
ld.param.u32 %r28, [imageWriteui2D_arr_param_2];
ld.param.u32 %r35, [imageWriteui2D_arr_param_3];
ld.param.u32 %r29, [imageWriteui2D_arr_param_4];
ld.param.u32 %r36, [imageWriteui2D_arr_param_5];
ld.param.u32 %r30, [imageWriteui2D_arr_param_6];
ld.param.u32 %r37, [imageWriteui2D_arr_param_7];
mov.b32 %r38, %envreg3;
mov.u32 %r39, %ctaid.x;
mov.u32 %r40, %ntid.x;
mov.u32 %r41, %tid.x;
add.s32 %r42, %r41, %r38;
mad.lo.s32 %r1, %r40, %r39, %r42;
mov.u32 %r43, %ctaid.y;
mov.u32 %r44, %ntid.y;
mov.u32 %r45, %tid.y;
mov.b32 %r46, %envreg4;
add.s32 %r47, %r45, %r46;
mad.lo.s32 %r2, %r44, %r43, %r47;
mov.u32 %r48, %ctaid.z;
mov.u32 %r49, %ntid.z;
mov.u32 %r50, %tid.z;
mov.b32 %r51, %envreg5;
add.s32 %r52, %r50, %r51;
mad.lo.s32 %r3, %r49, %r48, %r52;
setp.ge.s32 %p1, %r1, %r35;
setp.ge.s32 %p2, %r2, %r36;
or.pred %p3, %p1, %p2;
setp.ge.s32 %p4, %r3, %r37;
or.pred %p5, %p3, %p4;
@%p5 bra $L__BB14_10;
suq.channel_order.b32 %r53, [imageWriteui2D_arr_param_1];
setp.gt.s32 %p6, %r53, 4277;
@%p6 bra $L__BB14_5;
setp.eq.s32 %p9, %r53, 4273;
@%p9 bra $L__BB14_8;
setp.eq.s32 %p10, %r53, 4275;
mov.u32 %r57, %r31;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
@%p10 bra $L__BB14_4;
bra.uni $L__BB14_9;
$L__BB14_4:
mov.u32 %r57, %r31;
mov.u32 %r58, %r60;
mov.u32 %r59, %r33;
bra.uni $L__BB14_9;
$L__BB14_5:
setp.eq.s32 %p7, %r53, 4278;
mov.u32 %r57, %r33;
mov.u32 %r58, %r32;
mov.u32 %r59, %r31;
@%p7 bra $L__BB14_9;
setp.ne.s32 %p8, %r53, 4279;
mov.u32 %r57, %r31;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
@%p8 bra $L__BB14_9;
mov.u32 %r57, %r60;
mov.u32 %r58, %r31;
mov.u32 %r59, %r32;
mov.u32 %r60, %r33;
bra.uni $L__BB14_9;
$L__BB14_8:
mov.u32 %r57, %r60;
mov.u32 %r58, %r32;
mov.u32 %r59, %r33;
$L__BB14_9:
add.s32 %r54, %r3, %r30;
add.s32 %r55, %r2, %r29;
add.s32 %r56, %r1, %r28;
sust.p.a2d.v4.b32.trap [imageWriteui2D_arr_param_1, {%r54, %r56, %r55, %r55}], {%r57, %r58, %r59, %r60};
$L__BB14_10:
ret;
}
// .globl bufferFill
.entry bufferFill(
.param .u64 .ptr .global .align 1 bufferFill_param_0,
.param .align 4 .b8 bufferFill_param_1[128],
.param .u32 bufferFill_param_2,
.param .u32 bufferFill_param_3,
.param .u32 bufferFill_param_4
)
{
.reg .pred %p<14>;
.reg .b16 %rs<3>;
.reg .b32 %r<54>;
.reg .b64 %rd<27>;
ld.param.u64 %rd8, [bufferFill_param_0];
mov.b64 %rd9, bufferFill_param_1;
ld.param.u32 %r4, [bufferFill_param_2];
ld.param.u32 %r6, [bufferFill_param_3];
ld.param.u32 %r5, [bufferFill_param_4];
mov.u32 %r7, %ctaid.x;
mov.u32 %r8, %ntid.x;
mov.u32 %r9, %tid.x;
mov.b32 %r10, %envreg3;
add.s32 %r11, %r9, %r10;
mad.lo.s32 %r1, %r8, %r7, %r11;
setp.ge.s32 %p1, %r1, %r6;
@%p1 bra $L__BB15_21;
setp.gt.s32 %p2, %r4, 15;
@%p2 bra $L__BB15_9;
setp.gt.s32 %p8, %r4, 3;
@%p8 bra $L__BB15_6;
setp.eq.s32 %p11, %r4, 1;
@%p11 bra $L__BB15_20;
setp.eq.s32 %p12, %r4, 2;
@%p12 bra $L__BB15_5;
bra.uni $L__BB15_21;
$L__BB15_5:
ld.param.u16 %rs1, [%rd9];
add.s32 %r51, %r1, %r5;
mul.wide.s32 %rd21, %r51, 2;
add.s64 %rd22, %rd8, %rd21;
st.global.u16 [%rd22], %rs1;
bra.uni $L__BB15_21;
$L__BB15_9:
setp.gt.s32 %p3, %r4, 63;
@%p3 bra $L__BB15_13;
setp.eq.s32 %p6, %r4, 16;
@%p6 bra $L__BB15_18;
setp.eq.s32 %p7, %r4, 32;
@%p7 bra $L__BB15_12;
bra.uni $L__BB15_21;
$L__BB15_12:
add.s32 %r32, %r1, %r5;
mul.wide.s32 %rd13, %r32, 32;
add.s64 %rd14, %rd8, %rd13;
ld.param.u32 %r33, [%rd9];
ld.param.u32 %r34, [%rd9+4];
ld.param.u32 %r35, [%rd9+8];
ld.param.u32 %r36, [%rd9+12];
ld.param.u32 %r37, [%rd9+16];
ld.param.u32 %r38, [%rd9+20];
ld.param.u32 %r39, [%rd9+24];
ld.param.u32 %r40, [%rd9+28];
st.global.u32 [%rd14], %r33;
st.global.u32 [%rd14+4], %r34;
st.global.u32 [%rd14+8], %r35;
st.global.u32 [%rd14+12], %r36;
st.global.u32 [%rd14+16], %r37;
st.global.u32 [%rd14+20], %r38;
st.global.u32 [%rd14+24], %r39;
st.global.u32 [%rd14+28], %r40;
bra.uni $L__BB15_21;
$L__BB15_6:
setp.eq.s32 %p9, %r4, 4;
@%p9 bra $L__BB15_19;
setp.eq.s32 %p10, %r4, 8;
@%p10 bra $L__BB15_8;
bra.uni $L__BB15_21;
$L__BB15_8:
add.s32 %r46, %r1, %r5;
mul.wide.s32 %rd17, %r46, 8;
add.s64 %rd18, %rd8, %rd17;
ld.param.u32 %r47, [%rd9];
ld.param.u32 %r48, [%rd9+4];
st.global.u32 [%rd18], %r47;
st.global.u32 [%rd18+4], %r48;
bra.uni $L__BB15_21;
$L__BB15_13:
setp.eq.s32 %p4, %r4, 64;
@%p4 bra $L__BB15_17;
setp.ne.s32 %p5, %r4, 128;
@%p5 bra $L__BB15_21;
add.s32 %r13, %r1, %r5;
mul.wide.s32 %rd10, %r13, 128;
add.s64 %rd26, %rd8, %rd10;
mov.u32 %r53, 0;
$L__BB15_16:
ld.param.u32 %r14, [%rd9];
st.global.u32 [%rd26], %r14;
add.s64 %rd26, %rd26, 4;
add.s64 %rd9, %rd9, 4;
add.s32 %r53, %r53, 1;
setp.lt.u32 %p13, %r53, 32;
@%p13 bra $L__BB15_16;
bra.uni $L__BB15_21;
$L__BB15_20:
ld.param.u8 %rs2, [%rd9];
add.s32 %r52, %r1, %r5;
cvt.s64.s32 %rd23, %r52;
add.s64 %rd24, %rd8, %rd23;
st.global.u8 [%rd24], %rs2;
bra.uni $L__BB15_21;
$L__BB15_18:
add.s32 %r41, %r1, %r5;
mul.wide.s32 %rd15, %r41, 16;
add.s64 %rd16, %rd8, %rd15;
ld.param.u32 %r42, [%rd9];
ld.param.u32 %r43, [%rd9+4];
ld.param.u32 %r44, [%rd9+8];
ld.param.u32 %r45, [%rd9+12];
st.global.u32 [%rd16], %r42;
st.global.u32 [%rd16+4], %r43;
st.global.u32 [%rd16+8], %r44;
st.global.u32 [%rd16+12], %r45;
bra.uni $L__BB15_21;
$L__BB15_19:
ld.param.u32 %r49, [%rd9];
add.s32 %r50, %r1, %r5;
mul.wide.s32 %rd19, %r50, 4;
add.s64 %rd20, %rd8, %rd19;
st.global.u32 [%rd20], %r49;
bra.uni $L__BB15_21;
$L__BB15_17:
add.s32 %r15, %r1, %r5;
mul.wide.s32 %rd11, %r15, 64;
add.s64 %rd12, %rd8, %rd11;
ld.param.u32 %r16, [%rd9];
ld.param.u32 %r17, [%rd9+4];
ld.param.u32 %r18, [%rd9+8];
ld.param.u32 %r19, [%rd9+12];
ld.param.u32 %r20, [%rd9+16];
ld.param.u32 %r21, [%rd9+20];
ld.param.u32 %r22, [%rd9+24];
ld.param.u32 %r23, [%rd9+28];
ld.param.u32 %r24, [%rd9+32];
ld.param.u32 %r25, [%rd9+36];
ld.param.u32 %r26, [%rd9+40];
ld.param.u32 %r27, [%rd9+44];
ld.param.u32 %r28, [%rd9+48];
ld.param.u32 %r29, [%rd9+52];
ld.param.u32 %r30, [%rd9+56];
ld.param.u32 %r31, [%rd9+60];
st.global.u32 [%rd12], %r16;
st.global.u32 [%rd12+4], %r17;
st.global.u32 [%rd12+8], %r18;
st.global.u32 [%rd12+12], %r19;
st.global.u32 [%rd12+16], %r20;
st.global.u32 [%rd12+20], %r21;
st.global.u32 [%rd12+24], %r22;
st.global.u32 [%rd12+28], %r23;
st.global.u32 [%rd12+32], %r24;
st.global.u32 [%rd12+36], %r25;
st.global.u32 [%rd12+40], %r26;
st.global.u32 [%rd12+44], %r27;
st.global.u32 [%rd12+48], %r28;
st.global.u32 [%rd12+52], %r29;
st.global.u32 [%rd12+56], %r30;
st.global.u32 [%rd12+60], %r31;
$L__BB15_21:
ret;
}
ELF3 � � �� @v =<