|
Server : Apache/2.4.58 (Win64) OpenSSL/3.1.3 PHP/8.2.12 System : Windows NT SERVER-PC 10.0 build 26200 (Windows 11) AMD64 User : ServerPC ( 0) PHP Version : 8.2.12 Disable Function : NONE Directory : C:/Users/ServerPC/AppData/Roaming/NVIDIA/ComputeCache/8/6/ |
Upload File : |
A ~� � N�`�]OWLJun 9 202300:08:31HOST64sm_61//
// Generated by LLVM NVPTX Back-End
//
.version 3.2
.target sm_30
.address_size 64
// .globl Subsample_Bilinear_8_8
.visible .entry Subsample_Bilinear_8_8(
.param .u64 Subsample_Bilinear_8_8_param_0,
.param .u64 Subsample_Bilinear_8_8_param_1,
.param .u32 Subsample_Bilinear_8_8_param_2,
.param .u32 Subsample_Bilinear_8_8_param_3,
.param .u32 Subsample_Bilinear_8_8_param_4,
.param .u32 Subsample_Bilinear_8_8_param_5,
.param .u32 Subsample_Bilinear_8_8_param_6,
.param .u64 Subsample_Bilinear_8_8_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<41>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB0_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 255;
and.b32 %r34, %r21, 255;
add.s32 %r35, %r34, %r33;
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
shr.u32 %r40, %r39, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r40;
LBB0_2:
ret;
}
// .globl Subsample_Bilinear_8_8_c
.visible .entry Subsample_Bilinear_8_8_c(
.param .u64 Subsample_Bilinear_8_8_c_param_0,
.param .u64 Subsample_Bilinear_8_8_c_param_1,
.param .u32 Subsample_Bilinear_8_8_c_param_2,
.param .u32 Subsample_Bilinear_8_8_c_param_3,
.param .u32 Subsample_Bilinear_8_8_c_param_4,
.param .u32 Subsample_Bilinear_8_8_c_param_5,
.param .u32 Subsample_Bilinear_8_8_c_param_6,
.param .u64 Subsample_Bilinear_8_8_c_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_c_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_c_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB1_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_c_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_c_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_c_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_c_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_c_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs5, %rs6;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
add.s16 %rs12, %rs11, 2;
shr.u16 %rs13, %rs12, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %rs13;
LBB1_2:
ret;
}
// .globl Subsample_Bilinear_8_8_p2
.visible .entry Subsample_Bilinear_8_8_p2(
.param .u64 Subsample_Bilinear_8_8_p2_param_0,
.param .u64 Subsample_Bilinear_8_8_p2_param_1,
.param .u32 Subsample_Bilinear_8_8_p2_param_2,
.param .u32 Subsample_Bilinear_8_8_p2_param_3,
.param .u32 Subsample_Bilinear_8_8_p2_param_4,
.param .u32 Subsample_Bilinear_8_8_p2_param_5,
.param .u32 Subsample_Bilinear_8_8_p2_param_6,
.param .u64 Subsample_Bilinear_8_8_p2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<34>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_p2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_p2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB2_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_p2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_p2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_p2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_p2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_p2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs5, %rs6;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
add.s16 %rs12, %rs11, 2;
shr.u16 %rs13, %rs12, 2;
mul.wide.s32 %rd8, %r2, %r5;
shl.b32 %r33, %r1, 1;
cvt.s64.s32 %rd9, %r33;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %rs13;
LBB2_2:
ret;
}
// .globl Subsample_Bilinear_8_8_2
.visible .entry Subsample_Bilinear_8_8_2(
.param .u64 Subsample_Bilinear_8_8_2_param_0,
.param .u64 Subsample_Bilinear_8_8_2_param_1,
.param .u32 Subsample_Bilinear_8_8_2_param_2,
.param .u32 Subsample_Bilinear_8_8_2_param_3,
.param .u32 Subsample_Bilinear_8_8_2_param_4,
.param .u32 Subsample_Bilinear_8_8_2_param_5,
.param .u32 Subsample_Bilinear_8_8_2_param_6,
.param .u64 Subsample_Bilinear_8_8_2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<27>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB3_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs3, %r21;
cvt.u16.u32 %rs4, %r22;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs5, %r25;
cvt.u16.u32 %rs6, %r26;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs7, %r29;
cvt.u16.u32 %rs8, %r30;
and.b16 %rs9, %rs1, 255;
and.b16 %rs10, %rs3, 255;
and.b16 %rs11, %rs5, 255;
and.b16 %rs12, %rs7, 255;
add.s16 %rs13, %rs9, %rs10;
add.s16 %rs14, %rs13, %rs11;
add.s16 %rs15, %rs14, %rs12;
add.s16 %rs16, %rs15, 2;
and.b16 %rs17, %rs2, 255;
and.b16 %rs18, %rs4, 255;
and.b16 %rs19, %rs6, 255;
and.b16 %rs20, %rs8, 255;
add.s16 %rs21, %rs17, %rs18;
add.s16 %rs22, %rs21, %rs19;
add.s16 %rs23, %rs22, %rs20;
add.s16 %rs24, %rs23, 2;
shr.u16 %rs25, %rs16, 2;
shr.u16 %rs26, %rs24, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u8 [%rd15], {%rs25, %rs26};
LBB3_2:
ret;
}
// .globl Subsample_Bilinear_8_8_2_u
.visible .entry Subsample_Bilinear_8_8_2_u(
.param .u64 Subsample_Bilinear_8_8_2_u_param_0,
.param .u64 Subsample_Bilinear_8_8_2_u_param_1,
.param .u32 Subsample_Bilinear_8_8_2_u_param_2,
.param .u32 Subsample_Bilinear_8_8_2_u_param_3,
.param .u32 Subsample_Bilinear_8_8_2_u_param_4,
.param .u32 Subsample_Bilinear_8_8_2_u_param_5,
.param .u32 Subsample_Bilinear_8_8_2_u_param_6,
.param .u64 Subsample_Bilinear_8_8_2_u_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_2_u_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_2_u_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB4_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_2_u_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_2_u_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_2_u_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_2_u_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_2_u_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs5, %rs6;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
add.s16 %rs12, %rs11, 2;
shr.u16 %rs13, %rs12, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %rs13;
LBB4_2:
ret;
}
// .globl Subsample_Bilinear_8_8_2_v
.visible .entry Subsample_Bilinear_8_8_2_v(
.param .u64 Subsample_Bilinear_8_8_2_v_param_0,
.param .u64 Subsample_Bilinear_8_8_2_v_param_1,
.param .u32 Subsample_Bilinear_8_8_2_v_param_2,
.param .u32 Subsample_Bilinear_8_8_2_v_param_3,
.param .u32 Subsample_Bilinear_8_8_2_v_param_4,
.param .u32 Subsample_Bilinear_8_8_2_v_param_5,
.param .u32 Subsample_Bilinear_8_8_2_v_param_6,
.param .u64 Subsample_Bilinear_8_8_2_v_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_2_v_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_2_v_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB5_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_2_v_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_2_v_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_2_v_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_2_v_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_2_v_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r18;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r22;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r26;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r30;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs5, %rs6;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
add.s16 %rs12, %rs11, 2;
shr.u16 %rs13, %rs12, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %rs13;
LBB5_2:
ret;
}
// .globl Subsample_Bilinear_8_8_4
.visible .entry Subsample_Bilinear_8_8_4(
.param .u64 Subsample_Bilinear_8_8_4_param_0,
.param .u64 Subsample_Bilinear_8_8_4_param_1,
.param .u32 Subsample_Bilinear_8_8_4_param_2,
.param .u32 Subsample_Bilinear_8_8_4_param_3,
.param .u32 Subsample_Bilinear_8_8_4_param_4,
.param .u32 Subsample_Bilinear_8_8_4_param_5,
.param .u32 Subsample_Bilinear_8_8_4_param_6,
.param .u64 Subsample_Bilinear_8_8_4_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<53>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_8_4_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_8_4_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB6_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_8_4_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_8_4_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_8_4_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_8_4_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_8_4_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs5, %r21;
cvt.u16.u32 %rs6, %r22;
cvt.u16.u32 %rs7, %r23;
cvt.u16.u32 %rs8, %r24;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs9, %r25;
cvt.u16.u32 %rs10, %r26;
cvt.u16.u32 %rs11, %r27;
cvt.u16.u32 %rs12, %r28;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs13, %r29;
cvt.u16.u32 %rs14, %r30;
cvt.u16.u32 %rs15, %r31;
cvt.u16.u32 %rs16, %r32;
and.b16 %rs17, %rs1, 255;
and.b16 %rs18, %rs5, 255;
and.b16 %rs19, %rs9, 255;
and.b16 %rs20, %rs13, 255;
add.s16 %rs21, %rs17, %rs18;
add.s16 %rs22, %rs21, %rs19;
add.s16 %rs23, %rs22, %rs20;
add.s16 %rs24, %rs23, 2;
and.b16 %rs25, %rs2, 255;
and.b16 %rs26, %rs6, 255;
and.b16 %rs27, %rs10, 255;
and.b16 %rs28, %rs14, 255;
add.s16 %rs29, %rs25, %rs26;
add.s16 %rs30, %rs29, %rs27;
add.s16 %rs31, %rs30, %rs28;
add.s16 %rs32, %rs31, 2;
and.b16 %rs33, %rs3, 255;
and.b16 %rs34, %rs7, 255;
and.b16 %rs35, %rs11, 255;
and.b16 %rs36, %rs15, 255;
add.s16 %rs37, %rs33, %rs34;
add.s16 %rs38, %rs37, %rs35;
add.s16 %rs39, %rs38, %rs36;
add.s16 %rs40, %rs39, 2;
and.b16 %rs41, %rs4, 255;
and.b16 %rs42, %rs8, 255;
and.b16 %rs43, %rs12, 255;
and.b16 %rs44, %rs16, 255;
add.s16 %rs45, %rs41, %rs42;
add.s16 %rs46, %rs45, %rs43;
add.s16 %rs47, %rs46, %rs44;
add.s16 %rs48, %rs47, 2;
shr.u16 %rs49, %rs24, 2;
shr.u16 %rs50, %rs32, 2;
shr.u16 %rs51, %rs40, 2;
shr.u16 %rs52, %rs48, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs49, %rs50, %rs51, %rs52};
LBB6_2:
ret;
}
// .globl Subsample_Bilinear_16_16
.visible .entry Subsample_Bilinear_16_16(
.param .u64 Subsample_Bilinear_16_16_param_0,
.param .u64 Subsample_Bilinear_16_16_param_1,
.param .u32 Subsample_Bilinear_16_16_param_2,
.param .u32 Subsample_Bilinear_16_16_param_3,
.param .u32 Subsample_Bilinear_16_16_param_4,
.param .u32 Subsample_Bilinear_16_16_param_5,
.param .u32 Subsample_Bilinear_16_16_param_6,
.param .u64 Subsample_Bilinear_16_16_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<41>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB7_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r34, %r33;
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
shr.u32 %r40, %r39, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r40;
LBB7_2:
ret;
}
// .globl Subsample_Bilinear_16_16_c
.visible .entry Subsample_Bilinear_16_16_c(
.param .u64 Subsample_Bilinear_16_16_c_param_0,
.param .u64 Subsample_Bilinear_16_16_c_param_1,
.param .u32 Subsample_Bilinear_16_16_c_param_2,
.param .u32 Subsample_Bilinear_16_16_c_param_3,
.param .u32 Subsample_Bilinear_16_16_c_param_4,
.param .u32 Subsample_Bilinear_16_16_c_param_5,
.param .u32 Subsample_Bilinear_16_16_c_param_6,
.param .u64 Subsample_Bilinear_16_16_c_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_c_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_c_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB8_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_c_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_c_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_c_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_c_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_c_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
LBB8_2:
ret;
}
// .globl Subsample_Bilinear_16_16_p2
.visible .entry Subsample_Bilinear_16_16_p2(
.param .u64 Subsample_Bilinear_16_16_p2_param_0,
.param .u64 Subsample_Bilinear_16_16_p2_param_1,
.param .u32 Subsample_Bilinear_16_16_p2_param_2,
.param .u32 Subsample_Bilinear_16_16_p2_param_3,
.param .u32 Subsample_Bilinear_16_16_p2_param_4,
.param .u32 Subsample_Bilinear_16_16_p2_param_5,
.param .u32 Subsample_Bilinear_16_16_p2_param_6,
.param .u64 Subsample_Bilinear_16_16_p2_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<43>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_p2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_p2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB9_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_p2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_p2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_p2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_p2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_p2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
shl.b32 %r42, %r1, 1;
cvt.s64.s32 %rd12, %r42;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
LBB9_2:
ret;
}
// .globl Subsample_Bilinear_16_16_2
.visible .entry Subsample_Bilinear_16_16_2(
.param .u64 Subsample_Bilinear_16_16_2_param_0,
.param .u64 Subsample_Bilinear_16_16_2_param_1,
.param .u32 Subsample_Bilinear_16_16_2_param_2,
.param .u32 Subsample_Bilinear_16_16_2_param_3,
.param .u32 Subsample_Bilinear_16_16_2_param_4,
.param .u32 Subsample_Bilinear_16_16_2_param_5,
.param .u32 Subsample_Bilinear_16_16_2_param_6,
.param .u64 Subsample_Bilinear_16_16_2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .f32 %f<33>;
.reg .b32 %r<51>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB10_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
and.b32 %r41, %r18, 65535;
and.b32 %r42, %r22, 65535;
and.b32 %r43, %r26, 65535;
and.b32 %r44, %r30, 65535;
add.s32 %r45, %r41, %r42;
add.s32 %r46, %r45, %r43;
add.s32 %r47, %r46, %r44;
add.s32 %r48, %r47, 2;
shr.u32 %r49, %r40, 2;
cvt.u16.u32 %rs1, %r49;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs1, %rs2};
LBB10_2:
ret;
}
// .globl Subsample_Bilinear_16_16_2_u
.visible .entry Subsample_Bilinear_16_16_2_u(
.param .u64 Subsample_Bilinear_16_16_2_u_param_0,
.param .u64 Subsample_Bilinear_16_16_2_u_param_1,
.param .u32 Subsample_Bilinear_16_16_2_u_param_2,
.param .u32 Subsample_Bilinear_16_16_2_u_param_3,
.param .u32 Subsample_Bilinear_16_16_2_u_param_4,
.param .u32 Subsample_Bilinear_16_16_2_u_param_5,
.param .u32 Subsample_Bilinear_16_16_2_u_param_6,
.param .u64 Subsample_Bilinear_16_16_2_u_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_2_u_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_2_u_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB11_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_2_u_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_2_u_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_2_u_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_2_u_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_2_u_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
LBB11_2:
ret;
}
// .globl Subsample_Bilinear_16_16_2_v
.visible .entry Subsample_Bilinear_16_16_2_v(
.param .u64 Subsample_Bilinear_16_16_2_v_param_0,
.param .u64 Subsample_Bilinear_16_16_2_v_param_1,
.param .u32 Subsample_Bilinear_16_16_2_v_param_2,
.param .u32 Subsample_Bilinear_16_16_2_v_param_3,
.param .u32 Subsample_Bilinear_16_16_2_v_param_4,
.param .u32 Subsample_Bilinear_16_16_2_v_param_5,
.param .u32 Subsample_Bilinear_16_16_2_v_param_6,
.param .u64 Subsample_Bilinear_16_16_2_v_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_2_v_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_2_v_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB12_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_2_v_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_2_v_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_2_v_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_2_v_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_2_v_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r18, 65535;
and.b32 %r34, %r22, 65535;
and.b32 %r35, %r26, 65535;
and.b32 %r36, %r30, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
LBB12_2:
ret;
}
// .globl Subsample_Bilinear_16_16_4
.visible .entry Subsample_Bilinear_16_16_4(
.param .u64 Subsample_Bilinear_16_16_4_param_0,
.param .u64 Subsample_Bilinear_16_16_4_param_1,
.param .u32 Subsample_Bilinear_16_16_4_param_2,
.param .u32 Subsample_Bilinear_16_16_4_param_3,
.param .u32 Subsample_Bilinear_16_16_4_param_4,
.param .u32 Subsample_Bilinear_16_16_4_param_5,
.param .u32 Subsample_Bilinear_16_16_4_param_6,
.param .u64 Subsample_Bilinear_16_16_4_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .f32 %f<33>;
.reg .b32 %r<69>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_16_4_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_16_4_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB13_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_16_4_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_16_4_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_16_4_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_16_4_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_16_4_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
and.b32 %r41, %r18, 65535;
and.b32 %r42, %r22, 65535;
and.b32 %r43, %r26, 65535;
and.b32 %r44, %r30, 65535;
add.s32 %r45, %r41, %r42;
add.s32 %r46, %r45, %r43;
add.s32 %r47, %r46, %r44;
add.s32 %r48, %r47, 2;
and.b32 %r49, %r19, 65535;
and.b32 %r50, %r23, 65535;
and.b32 %r51, %r27, 65535;
and.b32 %r52, %r31, 65535;
add.s32 %r53, %r49, %r50;
add.s32 %r54, %r53, %r51;
add.s32 %r55, %r54, %r52;
add.s32 %r56, %r55, 2;
and.b32 %r57, %r20, 65535;
and.b32 %r58, %r24, 65535;
and.b32 %r59, %r28, 65535;
and.b32 %r60, %r32, 65535;
add.s32 %r61, %r57, %r58;
add.s32 %r62, %r61, %r59;
add.s32 %r63, %r62, %r60;
add.s32 %r64, %r63, 2;
shr.u32 %r65, %r40, 2;
cvt.u16.u32 %rs1, %r65;
shr.u32 %r66, %r48, 2;
cvt.u16.u32 %rs2, %r66;
shr.u32 %r67, %r56, 2;
cvt.u16.u32 %rs3, %r67;
shr.u32 %r68, %r64, 2;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 3;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 3;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u16 [%rd15], {%rs1, %rs2, %rs3, %rs4};
LBB13_2:
ret;
}
// .globl Subsample_Bilinear_8_16
.visible .entry Subsample_Bilinear_8_16(
.param .u64 Subsample_Bilinear_8_16_param_0,
.param .u64 Subsample_Bilinear_8_16_param_1,
.param .u32 Subsample_Bilinear_8_16_param_2,
.param .u32 Subsample_Bilinear_8_16_param_3,
.param .u32 Subsample_Bilinear_8_16_param_4,
.param .u32 Subsample_Bilinear_8_16_param_5,
.param .u32 Subsample_Bilinear_8_16_param_6,
.param .u64 Subsample_Bilinear_8_16_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .f32 %f<33>;
.reg .b32 %r<40>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB14_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 255;
and.b32 %r34, %r21, 255;
add.s32 %r35, %r34, %r33;
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
shl.b16 %rs2, %rs1, 6;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs2;
LBB14_2:
ret;
}
// .globl Subsample_Bilinear_8_16_c
.visible .entry Subsample_Bilinear_8_16_c(
.param .u64 Subsample_Bilinear_8_16_c_param_0,
.param .u64 Subsample_Bilinear_8_16_c_param_1,
.param .u32 Subsample_Bilinear_8_16_c_param_2,
.param .u32 Subsample_Bilinear_8_16_c_param_3,
.param .u32 Subsample_Bilinear_8_16_c_param_4,
.param .u32 Subsample_Bilinear_8_16_c_param_5,
.param .u32 Subsample_Bilinear_8_16_c_param_6,
.param .u64 Subsample_Bilinear_8_16_c_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_c_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_c_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB15_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_c_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_c_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_c_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_c_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_c_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
add.s16 %rs7, %rs6, %rs5;
and.b16 %rs8, %rs3, 255;
add.s16 %rs9, %rs7, %rs8;
and.b16 %rs10, %rs4, 255;
add.s16 %rs11, %rs9, %rs10;
shl.b16 %rs12, %rs11, 6;
add.s16 %rs13, %rs12, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs13;
LBB15_2:
ret;
}
// .globl Subsample_Bilinear_8_16_p2
.visible .entry Subsample_Bilinear_8_16_p2(
.param .u64 Subsample_Bilinear_8_16_p2_param_0,
.param .u64 Subsample_Bilinear_8_16_p2_param_1,
.param .u32 Subsample_Bilinear_8_16_p2_param_2,
.param .u32 Subsample_Bilinear_8_16_p2_param_3,
.param .u32 Subsample_Bilinear_8_16_p2_param_4,
.param .u32 Subsample_Bilinear_8_16_p2_param_5,
.param .u32 Subsample_Bilinear_8_16_p2_param_6,
.param .u64 Subsample_Bilinear_8_16_p2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<34>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_p2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_p2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB16_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_p2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_p2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_p2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_p2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_p2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
add.s16 %rs7, %rs6, %rs5;
and.b16 %rs8, %rs3, 255;
add.s16 %rs9, %rs7, %rs8;
and.b16 %rs10, %rs4, 255;
add.s16 %rs11, %rs9, %rs10;
shl.b16 %rs12, %rs11, 6;
add.s16 %rs13, %rs12, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
shl.b32 %r33, %r1, 1;
cvt.s64.s32 %rd12, %r33;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs13;
LBB16_2:
ret;
}
// .globl Subsample_Bilinear_8_16_2
.visible .entry Subsample_Bilinear_8_16_2(
.param .u64 Subsample_Bilinear_8_16_2_param_0,
.param .u64 Subsample_Bilinear_8_16_2_param_1,
.param .u32 Subsample_Bilinear_8_16_2_param_2,
.param .u32 Subsample_Bilinear_8_16_2_param_3,
.param .u32 Subsample_Bilinear_8_16_2_param_4,
.param .u32 Subsample_Bilinear_8_16_2_param_5,
.param .u32 Subsample_Bilinear_8_16_2_param_6,
.param .u64 Subsample_Bilinear_8_16_2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<27>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB17_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs3, %r21;
cvt.u16.u32 %rs4, %r22;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs5, %r25;
cvt.u16.u32 %rs6, %r26;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs7, %r29;
cvt.u16.u32 %rs8, %r30;
and.b16 %rs9, %rs1, 255;
and.b16 %rs10, %rs3, 255;
add.s16 %rs11, %rs10, %rs9;
and.b16 %rs12, %rs5, 255;
add.s16 %rs13, %rs11, %rs12;
and.b16 %rs14, %rs7, 255;
add.s16 %rs15, %rs13, %rs14;
and.b16 %rs16, %rs2, 255;
and.b16 %rs17, %rs4, 255;
add.s16 %rs18, %rs17, %rs16;
and.b16 %rs19, %rs6, 255;
add.s16 %rs20, %rs18, %rs19;
and.b16 %rs21, %rs8, 255;
add.s16 %rs22, %rs20, %rs21;
shl.b16 %rs23, %rs15, 6;
add.s16 %rs24, %rs23, 128;
shl.b16 %rs25, %rs22, 6;
add.s16 %rs26, %rs25, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs24, %rs26};
LBB17_2:
ret;
}
// .globl Subsample_Bilinear_8_16_2_u
.visible .entry Subsample_Bilinear_8_16_2_u(
.param .u64 Subsample_Bilinear_8_16_2_u_param_0,
.param .u64 Subsample_Bilinear_8_16_2_u_param_1,
.param .u32 Subsample_Bilinear_8_16_2_u_param_2,
.param .u32 Subsample_Bilinear_8_16_2_u_param_3,
.param .u32 Subsample_Bilinear_8_16_2_u_param_4,
.param .u32 Subsample_Bilinear_8_16_2_u_param_5,
.param .u32 Subsample_Bilinear_8_16_2_u_param_6,
.param .u64 Subsample_Bilinear_8_16_2_u_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_2_u_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_2_u_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB18_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_2_u_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_2_u_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_2_u_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_2_u_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_2_u_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r29;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs6, %rs5;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
shl.b16 %rs12, %rs11, 6;
add.s16 %rs13, %rs12, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs13;
LBB18_2:
ret;
}
// .globl Subsample_Bilinear_8_16_2_v
.visible .entry Subsample_Bilinear_8_16_2_v(
.param .u64 Subsample_Bilinear_8_16_2_v_param_0,
.param .u64 Subsample_Bilinear_8_16_2_v_param_1,
.param .u32 Subsample_Bilinear_8_16_2_v_param_2,
.param .u32 Subsample_Bilinear_8_16_2_v_param_3,
.param .u32 Subsample_Bilinear_8_16_2_v_param_4,
.param .u32 Subsample_Bilinear_8_16_2_v_param_5,
.param .u32 Subsample_Bilinear_8_16_2_v_param_6,
.param .u64 Subsample_Bilinear_8_16_2_v_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<14>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_2_v_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_2_v_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB19_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_2_v_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_2_v_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_2_v_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_2_v_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_2_v_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r18;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs2, %r22;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs3, %r26;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs4, %r30;
and.b16 %rs5, %rs1, 255;
and.b16 %rs6, %rs2, 255;
and.b16 %rs7, %rs3, 255;
and.b16 %rs8, %rs4, 255;
add.s16 %rs9, %rs6, %rs5;
add.s16 %rs10, %rs9, %rs7;
add.s16 %rs11, %rs10, %rs8;
shl.b16 %rs12, %rs11, 6;
add.s16 %rs13, %rs12, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs13;
LBB19_2:
ret;
}
// .globl Subsample_Bilinear_8_16_4
.visible .entry Subsample_Bilinear_8_16_4(
.param .u64 Subsample_Bilinear_8_16_4_param_0,
.param .u64 Subsample_Bilinear_8_16_4_param_1,
.param .u32 Subsample_Bilinear_8_16_4_param_2,
.param .u32 Subsample_Bilinear_8_16_4_param_3,
.param .u32 Subsample_Bilinear_8_16_4_param_4,
.param .u32 Subsample_Bilinear_8_16_4_param_5,
.param .u32 Subsample_Bilinear_8_16_4_param_6,
.param .u64 Subsample_Bilinear_8_16_4_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<53>;
.reg .f32 %f<33>;
.reg .b32 %r<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_8_16_4_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_8_16_4_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB20_2;
ld.param.u32 %r7, [Subsample_Bilinear_8_16_4_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_8_16_4_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_8_16_4_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_8_16_4_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_8_16_4_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
cvt.u16.u32 %rs5, %r21;
cvt.u16.u32 %rs6, %r22;
cvt.u16.u32 %rs7, %r23;
cvt.u16.u32 %rs8, %r24;
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
cvt.u16.u32 %rs9, %r25;
cvt.u16.u32 %rs10, %r26;
cvt.u16.u32 %rs11, %r27;
cvt.u16.u32 %rs12, %r28;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
cvt.u16.u32 %rs13, %r29;
cvt.u16.u32 %rs14, %r30;
cvt.u16.u32 %rs15, %r31;
cvt.u16.u32 %rs16, %r32;
and.b16 %rs17, %rs1, 255;
and.b16 %rs18, %rs5, 255;
add.s16 %rs19, %rs18, %rs17;
and.b16 %rs20, %rs9, 255;
add.s16 %rs21, %rs19, %rs20;
and.b16 %rs22, %rs13, 255;
add.s16 %rs23, %rs21, %rs22;
and.b16 %rs24, %rs2, 255;
and.b16 %rs25, %rs6, 255;
add.s16 %rs26, %rs25, %rs24;
and.b16 %rs27, %rs10, 255;
add.s16 %rs28, %rs26, %rs27;
and.b16 %rs29, %rs14, 255;
add.s16 %rs30, %rs28, %rs29;
and.b16 %rs31, %rs3, 255;
and.b16 %rs32, %rs7, 255;
add.s16 %rs33, %rs32, %rs31;
and.b16 %rs34, %rs11, 255;
add.s16 %rs35, %rs33, %rs34;
and.b16 %rs36, %rs15, 255;
add.s16 %rs37, %rs35, %rs36;
and.b16 %rs38, %rs4, 255;
and.b16 %rs39, %rs8, 255;
add.s16 %rs40, %rs39, %rs38;
and.b16 %rs41, %rs12, 255;
add.s16 %rs42, %rs40, %rs41;
and.b16 %rs43, %rs16, 255;
add.s16 %rs44, %rs42, %rs43;
shl.b16 %rs45, %rs23, 6;
add.s16 %rs46, %rs45, 128;
shl.b16 %rs47, %rs30, 6;
add.s16 %rs48, %rs47, 128;
shl.b16 %rs49, %rs37, 6;
add.s16 %rs50, %rs49, 128;
shl.b16 %rs51, %rs44, 6;
add.s16 %rs52, %rs51, 128;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 3;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 3;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u16 [%rd15], {%rs46, %rs48, %rs50, %rs52};
LBB20_2:
ret;
}
// .globl Subsample_Bilinear_16_8
.visible .entry Subsample_Bilinear_16_8(
.param .u64 Subsample_Bilinear_16_8_param_0,
.param .u64 Subsample_Bilinear_16_8_param_1,
.param .u32 Subsample_Bilinear_16_8_param_2,
.param .u32 Subsample_Bilinear_16_8_param_3,
.param .u32 Subsample_Bilinear_16_8_param_4,
.param .u32 Subsample_Bilinear_16_8_param_5,
.param .u32 Subsample_Bilinear_16_8_param_6,
.param .u64 Subsample_Bilinear_16_8_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<35>;
.reg .b32 %r<45>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB21_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_param_4];
ld.param.u64 %rd5, [Subsample_Bilinear_16_8_param_0];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f11, %r6;
cvt.rn.f32.s32 %f12, %r3;
div.rn.f32 %f13, %f11, %f12;
cvt.rn.f32.s32 %f14, %r7;
cvt.rn.f32.s32 %f15, %r4;
div.rn.f32 %f16, %f14, %f15;
cvt.rn.f32.s32 %f17, %r1;
add.f32 %f18, %f17, 0f3F000000;
cvt.rn.f32.s32 %f19, %r2;
add.f32 %f20, %f19, 0f3F000000;
add.f32 %f21, %f13, 0fBF800000;
mul.f32 %f22, %f21, 0f3F000000;
max.f32 %f23, %f22, 0f00000000;
min.f32 %f24, %f23, 0f3F800000;
add.f32 %f25, %f16, 0fBF800000;
mul.f32 %f26, %f25, 0f3F000000;
max.f32 %f27, %f26, 0f00000000;
min.f32 %f28, %f27, 0f3F800000;
add.f32 %f29, %f24, 0f3F000000;
div.rn.f32 %f30, %f24, %f29;
add.f32 %f31, %f28, 0f3F000000;
div.rn.f32 %f32, %f28, %f31;
neg.f32 %f33, %f30;
fma.rn.f32 %f5, %f13, %f18, %f33;
neg.f32 %f34, %f32;
fma.rn.f32 %f4, %f16, %f20, %f34;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f13, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f16, %f20, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r37, %r17, 65535;
and.b32 %r38, %r21, 65535;
add.s32 %r39, %r38, %r37;
and.b32 %r40, %r25, 65535;
add.s32 %r41, %r39, %r40;
and.b32 %r42, %r29, 65535;
add.s32 %r43, %r41, %r42;
shr.u32 %r44, %r43, 10;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd1, %rd12;
st.global.u8 [%rd13], %r44;
LBB21_2:
ret;
}
// .globl Subsample_Bilinear_16_8_c
.visible .entry Subsample_Bilinear_16_8_c(
.param .u64 Subsample_Bilinear_16_8_c_param_0,
.param .u64 Subsample_Bilinear_16_8_c_param_1,
.param .u32 Subsample_Bilinear_16_8_c_param_2,
.param .u32 Subsample_Bilinear_16_8_c_param_3,
.param .u32 Subsample_Bilinear_16_8_c_param_4,
.param .u32 Subsample_Bilinear_16_8_c_param_5,
.param .u32 Subsample_Bilinear_16_8_c_param_6,
.param .u64 Subsample_Bilinear_16_8_c_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_c_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_c_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB22_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_c_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_c_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_c_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_c_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_c_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
LBB22_2:
ret;
}
// .globl Subsample_Bilinear_16_8_p2
.visible .entry Subsample_Bilinear_16_8_p2(
.param .u64 Subsample_Bilinear_16_8_p2_param_0,
.param .u64 Subsample_Bilinear_16_8_p2_param_1,
.param .u32 Subsample_Bilinear_16_8_p2_param_2,
.param .u32 Subsample_Bilinear_16_8_p2_param_3,
.param .u32 Subsample_Bilinear_16_8_p2_param_4,
.param .u32 Subsample_Bilinear_16_8_p2_param_5,
.param .u32 Subsample_Bilinear_16_8_p2_param_6,
.param .u64 Subsample_Bilinear_16_8_p2_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<43>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_p2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_p2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB23_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_p2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_p2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_p2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_p2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_p2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
shl.b32 %r42, %r1, 1;
cvt.s64.s32 %rd9, %r42;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
LBB23_2:
ret;
}
// .globl Subsample_Bilinear_16_8_2
.visible .entry Subsample_Bilinear_16_8_2(
.param .u64 Subsample_Bilinear_16_8_2_param_0,
.param .u64 Subsample_Bilinear_16_8_2_param_1,
.param .u32 Subsample_Bilinear_16_8_2_param_2,
.param .u32 Subsample_Bilinear_16_8_2_param_3,
.param .u32 Subsample_Bilinear_16_8_2_param_4,
.param .u32 Subsample_Bilinear_16_8_2_param_5,
.param .u32 Subsample_Bilinear_16_8_2_param_6,
.param .u64 Subsample_Bilinear_16_8_2_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .f32 %f<33>;
.reg .b32 %r<51>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_2_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_2_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB24_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_2_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_2_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_2_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_2_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_2_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
and.b32 %r41, %r18, 65535;
and.b32 %r42, %r22, 65535;
and.b32 %r43, %r26, 65535;
and.b32 %r44, %r30, 65535;
add.s32 %r45, %r41, %r42;
add.s32 %r46, %r45, %r43;
add.s32 %r47, %r46, %r44;
add.s32 %r48, %r47, 2;
shr.u32 %r49, %r40, 10;
cvt.u16.u32 %rs1, %r49;
shr.u32 %r50, %r48, 10;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u8 [%rd15], {%rs1, %rs2};
LBB24_2:
ret;
}
// .globl Subsample_Bilinear_16_8_2_u
.visible .entry Subsample_Bilinear_16_8_2_u(
.param .u64 Subsample_Bilinear_16_8_2_u_param_0,
.param .u64 Subsample_Bilinear_16_8_2_u_param_1,
.param .u32 Subsample_Bilinear_16_8_2_u_param_2,
.param .u32 Subsample_Bilinear_16_8_2_u_param_3,
.param .u32 Subsample_Bilinear_16_8_2_u_param_4,
.param .u32 Subsample_Bilinear_16_8_2_u_param_5,
.param .u32 Subsample_Bilinear_16_8_2_u_param_6,
.param .u64 Subsample_Bilinear_16_8_2_u_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_2_u_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_2_u_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB25_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_2_u_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_2_u_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_2_u_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_2_u_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_2_u_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
LBB25_2:
ret;
}
// .globl Subsample_Bilinear_16_8_2_v
.visible .entry Subsample_Bilinear_16_8_2_v(
.param .u64 Subsample_Bilinear_16_8_2_v_param_0,
.param .u64 Subsample_Bilinear_16_8_2_v_param_1,
.param .u32 Subsample_Bilinear_16_8_2_v_param_2,
.param .u32 Subsample_Bilinear_16_8_2_v_param_3,
.param .u32 Subsample_Bilinear_16_8_2_v_param_4,
.param .u32 Subsample_Bilinear_16_8_2_v_param_5,
.param .u32 Subsample_Bilinear_16_8_2_v_param_6,
.param .u64 Subsample_Bilinear_16_8_2_v_param_7
)
{
.reg .pred %p<4>;
.reg .f32 %f<33>;
.reg .b32 %r<42>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_2_v_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_2_v_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB26_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_2_v_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_2_v_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_2_v_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_2_v_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_2_v_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r18, 65535;
and.b32 %r34, %r22, 65535;
and.b32 %r35, %r26, 65535;
and.b32 %r36, %r30, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
LBB26_2:
ret;
}
// .globl Subsample_Bilinear_16_8_4
.visible .entry Subsample_Bilinear_16_8_4(
.param .u64 Subsample_Bilinear_16_8_4_param_0,
.param .u64 Subsample_Bilinear_16_8_4_param_1,
.param .u32 Subsample_Bilinear_16_8_4_param_2,
.param .u32 Subsample_Bilinear_16_8_4_param_3,
.param .u32 Subsample_Bilinear_16_8_4_param_4,
.param .u32 Subsample_Bilinear_16_8_4_param_5,
.param .u32 Subsample_Bilinear_16_8_4_param_6,
.param .u64 Subsample_Bilinear_16_8_4_param_7
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .f32 %f<33>;
.reg .b32 %r<69>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_16_8_4_param_3];
ld.param.u32 %r3, [Subsample_Bilinear_16_8_4_param_2];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.ge.s32 %p1, %r2, %r4;
setp.ge.s32 %p2, %r1, %r3;
or.pred %p3, %p2, %p1;
@%p3 bra LBB27_2;
ld.param.u32 %r7, [Subsample_Bilinear_16_8_4_param_6];
ld.param.u32 %r6, [Subsample_Bilinear_16_8_4_param_5];
ld.param.u32 %r5, [Subsample_Bilinear_16_8_4_param_4];
ld.param.u64 %rd4, [Subsample_Bilinear_16_8_4_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_16_8_4_param_1];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
cvt.rn.f32.s32 %f15, %r1;
add.f32 %f16, %f15, 0f3F000000;
cvt.rn.f32.s32 %f17, %r2;
add.f32 %f18, %f17, 0f3F000000;
add.f32 %f19, %f11, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
add.f32 %f23, %f14, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0f3F000000;
div.rn.f32 %f28, %f22, %f27;
add.f32 %f29, %f26, 0f3F000000;
div.rn.f32 %f30, %f26, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f16, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f18, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
fma.rn.f32 %f7, %f11, %f16, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
fma.rn.f32 %f8, %f14, %f18, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r33, %r17, 65535;
and.b32 %r34, %r21, 65535;
and.b32 %r35, %r25, 65535;
and.b32 %r36, %r29, 65535;
add.s32 %r37, %r33, %r34;
add.s32 %r38, %r37, %r35;
add.s32 %r39, %r38, %r36;
add.s32 %r40, %r39, 2;
and.b32 %r41, %r18, 65535;
and.b32 %r42, %r22, 65535;
and.b32 %r43, %r26, 65535;
and.b32 %r44, %r30, 65535;
add.s32 %r45, %r41, %r42;
add.s32 %r46, %r45, %r43;
add.s32 %r47, %r46, %r44;
add.s32 %r48, %r47, 2;
and.b32 %r49, %r19, 65535;
and.b32 %r50, %r23, 65535;
and.b32 %r51, %r27, 65535;
and.b32 %r52, %r31, 65535;
add.s32 %r53, %r49, %r50;
add.s32 %r54, %r53, %r51;
add.s32 %r55, %r54, %r52;
add.s32 %r56, %r55, 2;
and.b32 %r57, %r20, 65535;
and.b32 %r58, %r24, 65535;
and.b32 %r59, %r28, 65535;
and.b32 %r60, %r32, 65535;
add.s32 %r61, %r57, %r58;
add.s32 %r62, %r61, %r59;
add.s32 %r63, %r62, %r60;
add.s32 %r64, %r63, 2;
shr.u32 %r65, %r40, 10;
cvt.u16.u32 %rs1, %r65;
shr.u32 %r66, %r48, 10;
cvt.u16.u32 %rs2, %r66;
shr.u32 %r67, %r56, 10;
cvt.u16.u32 %rs3, %r67;
shr.u32 %r68, %r64, 10;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs1, %rs2, %rs3, %rs4};
LBB27_2:
ret;
}
ELF3 � z `� � = @ 8 @ v .shstrtab .strtab .symtab .symtab_shndx .nv.uft.entry .nv.info .text.Subsample_Bilinear_16_8_4 .nv.info.Subsample_Bilinear_16_8_4 .nv.shared.Subsample_Bilinear_16_8_4 .nv.constant2.Subsample_Bilinear_16_8_4 .nv.constant0.Subsample_Bilinear_16_8_4 .text.Subsample_Bilinear_16_8_2_v .nv.info.Subsample_Bilinear_16_8_2_v .nv.shared.Subsample_Bilinear_16_8_2_v .nv.constant2.Subsample_Bilinear_16_8_2_v .nv.constant0.Subsample_Bilinear_16_8_2_v .text.Subsample_Bilinear_16_8_2_u .nv.info.Subsample_Bilinear_16_8_2_u .nv.shared.Subsample_Bilinear_16_8_2_u .nv.constant2.Subsample_Bilinear_16_8_2_u .nv.constant0.Subsample_Bilinear_16_8_2_u .text.Subsample_Bilinear_16_8_2 .nv.info.Subsample_Bilinear_16_8_2 .nv.shared.Subsample_Bilinear_16_8_2 .nv.constant2.Subsample_Bilinear_16_8_2 .nv.constant0.Subsample_Bilinear_16_8_2 .text.Subsample_Bilinear_16_8_p2 .nv.info.Subsample_Bilinear_16_8_p2 .nv.shared.Subsample_Bilinear_16_8_p2 .nv.constant2.Subsample_Bilinear_16_8_p2 .nv.constant0.Subsample_Bilinear_16_8_p2 .text.Subsample_Bilinear_16_8_c .nv.info.Subsample_Bilinear_16_8_c .nv.shared.Subsample_Bilinear_16_8_c .nv.constant2.Subsample_Bilinear_16_8_c .nv.constant0.Subsample_Bilinear_16_8_c .text.Subsample_Bilinear_16_8 .nv.info.Subsample_Bilinear_16_8 .nv.shared.Subsample_Bilinear_16_8 .nv.constant2.Subsample_Bilinear_16_8 .nv.constant0.Subsample_Bilinear_16_8 .text.Subsample_Bilinear_8_16_4 .nv.info.Subsample_Bilinear_8_16_4 .nv.shared.Subsample_Bilinear_8_16_4 .nv.constant2.Subsample_Bilinear_8_16_4 .nv.constant0.Subsample_Bilinear_8_16_4 .text.Subsample_Bilinear_8_16_2_v .nv.info.Subsample_Bilinear_8_16_2_v .nv.shared.Subsample_Bilinear_8_16_2_v .nv.constant2.Subsample_Bilinear_8_16_2_v .nv.constant0.Subsample_Bilinear_8_16_2_v .text.Subsample_Bilinear_8_16_2_u .nv.info.Subsample_Bilinear_8_16_2_u .nv.shared.Subsample_Bilinear_8_16_2_u .nv.constant2.Subsample_Bilinear_8_16_2_u .nv.constant0.Subsample_Bilinear_8_16_2_u .text.Subsample_Bilinear_8_16_2 .nv.info.Subsample_Bilinear_8_16_2 .nv.shared.Subsample_Bilinear_8_16_2 .nv.constant2.Subsample_Bilinear_8_16_2 .nv.constant0.Subsample_Bilinear_8_16_2 .text.Subsample_Bilinear_8_16_p2 .nv.info.Subsample_Bilinear_8_16_p2 .nv.shared.Subsample_Bilinear_8_16_p2 .nv.constant2.Subsample_Bilinear_8_16_p2 .nv.constant0.Subsample_Bilinear_8_16_p2 .text.Subsample_Bilinear_8_16_c .nv.info.Subsample_Bilinear_8_16_c .nv.shared.Subsample_Bilinear_8_16_c .nv.constant2.Subsample_Bilinear_8_16_c .nv.constant0.Subsample_Bilinear_8_16_c .text.Subsample_Bilinear_8_16 .nv.info.Subsample_Bilinear_8_16 .nv.shared.Subsample_Bilinear_8_16 .nv.constant2.Subsample_Bilinear_8_16 .nv.constant0.Subsample_Bilinear_8_16 .text.Subsample_Bilinear_16_16_4 .nv.info.Subsample_Bilinear_16_16_4 .nv.shared.Subsample_Bilinear_16_16_4 .nv.constant2.Subsample_Bilinear_16_16_4 .nv.constant0.Subsample_Bilinear_16_16_4 .text.Subsample_Bilinear_16_16_2_v .nv.info.Subsample_Bilinear_16_16_2_v .nv.shared.Subsample_Bilinear_16_16_2_v .nv.constant2.Subsample_Bilinear_16_16_2_v .nv.constant0.Subsample_Bilinear_16_16_2_v .text.Subsample_Bilinear_16_16_2_u .nv.info.Subsample_Bilinear_16_16_2_u .nv.shared.Subsample_Bilinear_16_16_2_u .nv.constant2.Subsample_Bilinear_16_16_2_u .nv.constant0.Subsample_Bilinear_16_16_2_u .text.Subsample_Bilinear_16_16_2 .nv.info.Subsample_Bilinear_16_16_2 .nv.shared.Subsample_Bilinear_16_16_2 .nv.constant2.Subsample_Bilinear_16_16_2 .nv.constant0.Subsample_Bilinear_16_16_2 .text.Subsample_Bilinear_16_16_p2 .nv.info.Subsample_Bilinear_16_16_p2 .nv.shared.Subsample_Bilinear_16_16_p2 .nv.constant2.Subsample_Bilinear_16_16_p2 .nv.constant0.Subsample_Bilinear_16_16_p2 .text.Subsample_Bilinear_16_16_c .nv.info.Subsample_Bilinear_16_16_c .nv.shared.Subsample_Bilinear_16_16_c .nv.constant2.Subsample_Bilinear_16_16_c .nv.constant0.Subsample_Bilinear_16_16_c .text.Subsample_Bilinear_16_16 .nv.info.Subsample_Bilinear_16_16 .nv.shared.Subsample_Bilinear_16_16 .nv.constant2.Subsample_Bilinear_16_16 .nv.constant0.Subsample_Bilinear_16_16 .text.Subsample_Bilinear_8_8_4 .nv.info.Subsample_Bilinear_8_8_4 .nv.shared.Subsample_Bilinear_8_8_4 .nv.constant2.Subsample_Bilinear_8_8_4 .nv.constant0.Subsample_Bilinear_8_8_4 .text.Subsample_Bilinear_8_8_2_v .nv.info.Subsample_Bilinear_8_8_2_v .nv.shared.Subsample_Bilinear_8_8_2_v .nv.constant2.Subsample_Bilinear_8_8_2_v .nv.constant0.Subsample_Bilinear_8_8_2_v .text.Subsample_Bilinear_8_8_2_u .nv.info.Subsample_Bilinear_8_8_2_u .nv.shared.Subsample_Bilinear_8_8_2_u .nv.constant2.Subsample_Bilinear_8_8_2_u .nv.constant0.Subsample_Bilinear_8_8_2_u .text.Subsample_Bilinear_8_8_2 .nv.info.Subsample_Bilinear_8_8_2 .nv.shared.Subsample_Bilinear_8_8_2 .nv.constant2.Subsample_Bilinear_8_8_2 .nv.constant0.Subsample_Bilinear_8_8_2 .text.Subsample_Bilinear_8_8_p2 .nv.info.Subsample_Bilinear_8_8_p2 .nv.shared.Subsample_Bilinear_8_8_p2 .nv.constant2.Subsample_Bilinear_8_8_p2 .nv.constant0.Subsample_Bilinear_8_8_p2 .text.Subsample_Bilinear_8_8_c .nv.info.Subsample_Bilinear_8_8_c .nv.shared.Subsample_Bilinear_8_8_c .nv.constant2.Subsample_Bilinear_8_8_c .nv.constant0.Subsample_Bilinear_8_8_c .text.Subsample_Bilinear_8_8 .nv.info.Subsample_Bilinear_8_8 .nv.shared.Subsample_Bilinear_8_8 .nv.constant2.Subsample_Bilinear_8_8 .nv.constant0.Subsample_Bilinear_8_8 .nv.rel.action .shstrtab .strtab .symtab .symtab_shndx .nv.uft.entry .nv.info Subsample_Bilinear_16_8_4 .text.Subsample_Bilinear_16_8_4 .nv.info.Subsample_Bilinear_16_8_4 .nv.shared.Subsample_Bilinear_16_8_4 .nv.constant2.Subsample_Bilinear_16_8_4 __ocg_const $Subsample_Bilinear_16_8_4$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_4 _param Subsample_Bilinear_16_8_2_v .text.Subsample_Bilinear_16_8_2_v .nv.info.Subsample_Bilinear_16_8_2_v .nv.shared.Subsample_Bilinear_16_8_2_v .nv.constant2.Subsample_Bilinear_16_8_2_v $Subsample_Bilinear_16_8_2_v$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_2_v Subsample_Bilinear_16_8_2_u .text.Subsample_Bilinear_16_8_2_u .nv.info.Subsample_Bilinear_16_8_2_u .nv.shared.Subsample_Bilinear_16_8_2_u .nv.constant2.Subsample_Bilinear_16_8_2_u $Subsample_Bilinear_16_8_2_u$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_2_u Subsample_Bilinear_16_8_2 .text.Subsample_Bilinear_16_8_2 .nv.info.Subsample_Bilinear_16_8_2 .nv.shared.Subsample_Bilinear_16_8_2 .nv.constant2.Subsample_Bilinear_16_8_2 $Subsample_Bilinear_16_8_2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_2 Subsample_Bilinear_16_8_p2 .text.Subsample_Bilinear_16_8_p2 .nv.info.Subsample_Bilinear_16_8_p2 .nv.shared.Subsample_Bilinear_16_8_p2 .nv.constant2.Subsample_Bilinear_16_8_p2 $Subsample_Bilinear_16_8_p2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_p2 Subsample_Bilinear_16_8_c .text.Subsample_Bilinear_16_8_c .nv.info.Subsample_Bilinear_16_8_c .nv.shared.Subsample_Bilinear_16_8_c .nv.constant2.Subsample_Bilinear_16_8_c $Subsample_Bilinear_16_8_c$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8_c Subsample_Bilinear_16_8 .text.Subsample_Bilinear_16_8 .nv.info.Subsample_Bilinear_16_8 .nv.shared.Subsample_Bilinear_16_8 .nv.constant2.Subsample_Bilinear_16_8 $Subsample_Bilinear_16_8$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_8 Subsample_Bilinear_8_16_4 .text.Subsample_Bilinear_8_16_4 .nv.info.Subsample_Bilinear_8_16_4 .nv.shared.Subsample_Bilinear_8_16_4 .nv.constant2.Subsample_Bilinear_8_16_4 $Subsample_Bilinear_8_16_4$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_4 Subsample_Bilinear_8_16_2_v .text.Subsample_Bilinear_8_16_2_v .nv.info.Subsample_Bilinear_8_16_2_v .nv.shared.Subsample_Bilinear_8_16_2_v .nv.constant2.Subsample_Bilinear_8_16_2_v $Subsample_Bilinear_8_16_2_v$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_2_v Subsample_Bilinear_8_16_2_u .text.Subsample_Bilinear_8_16_2_u .nv.info.Subsample_Bilinear_8_16_2_u .nv.shared.Subsample_Bilinear_8_16_2_u .nv.constant2.Subsample_Bilinear_8_16_2_u $Subsample_Bilinear_8_16_2_u$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_2_u Subsample_Bilinear_8_16_2 .text.Subsample_Bilinear_8_16_2 .nv.info.Subsample_Bilinear_8_16_2 .nv.shared.Subsample_Bilinear_8_16_2 .nv.constant2.Subsample_Bilinear_8_16_2 $Subsample_Bilinear_8_16_2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_2 Subsample_Bilinear_8_16_p2 .text.Subsample_Bilinear_8_16_p2 .nv.info.Subsample_Bilinear_8_16_p2 .nv.shared.Subsample_Bilinear_8_16_p2 .nv.constant2.Subsample_Bilinear_8_16_p2 $Subsample_Bilinear_8_16_p2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_p2 Subsample_Bilinear_8_16_c .text.Subsample_Bilinear_8_16_c .nv.info.Subsample_Bilinear_8_16_c .nv.shared.Subsample_Bilinear_8_16_c .nv.constant2.Subsample_Bilinear_8_16_c $Subsample_Bilinear_8_16_c$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16_c Subsample_Bilinear_8_16 .text.Subsample_Bilinear_8_16 .nv.info.Subsample_Bilinear_8_16 .nv.shared.Subsample_Bilinear_8_16 .nv.constant2.Subsample_Bilinear_8_16 $Subsample_Bilinear_8_16$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_16 Subsample_Bilinear_16_16_4 .text.Subsample_Bilinear_16_16_4 .nv.info.Subsample_Bilinear_16_16_4 .nv.shared.Subsample_Bilinear_16_16_4 .nv.constant2.Subsample_Bilinear_16_16_4 $Subsample_Bilinear_16_16_4$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_4 Subsample_Bilinear_16_16_2_v .text.Subsample_Bilinear_16_16_2_v .nv.info.Subsample_Bilinear_16_16_2_v .nv.shared.Subsample_Bilinear_16_16_2_v .nv.constant2.Subsample_Bilinear_16_16_2_v $Subsample_Bilinear_16_16_2_v$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_2_v Subsample_Bilinear_16_16_2_u .text.Subsample_Bilinear_16_16_2_u .nv.info.Subsample_Bilinear_16_16_2_u .nv.shared.Subsample_Bilinear_16_16_2_u .nv.constant2.Subsample_Bilinear_16_16_2_u $Subsample_Bilinear_16_16_2_u$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_2_u Subsample_Bilinear_16_16_2 .text.Subsample_Bilinear_16_16_2 .nv.info.Subsample_Bilinear_16_16_2 .nv.shared.Subsample_Bilinear_16_16_2 .nv.constant2.Subsample_Bilinear_16_16_2 $Subsample_Bilinear_16_16_2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_2 Subsample_Bilinear_16_16_p2 .text.Subsample_Bilinear_16_16_p2 .nv.info.Subsample_Bilinear_16_16_p2 .nv.shared.Subsample_Bilinear_16_16_p2 .nv.constant2.Subsample_Bilinear_16_16_p2 $Subsample_Bilinear_16_16_p2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_p2 Subsample_Bilinear_16_16_c .text.Subsample_Bilinear_16_16_c .nv.info.Subsample_Bilinear_16_16_c .nv.shared.Subsample_Bilinear_16_16_c .nv.constant2.Subsample_Bilinear_16_16_c $Subsample_Bilinear_16_16_c$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16_c Subsample_Bilinear_16_16 .text.Subsample_Bilinear_16_16 .nv.info.Subsample_Bilinear_16_16 .nv.shared.Subsample_Bilinear_16_16 .nv.constant2.Subsample_Bilinear_16_16 $Subsample_Bilinear_16_16$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_16_16 Subsample_Bilinear_8_8_4 .text.Subsample_Bilinear_8_8_4 .nv.info.Subsample_Bilinear_8_8_4 .nv.shared.Subsample_Bilinear_8_8_4 .nv.constant2.Subsample_Bilinear_8_8_4 $Subsample_Bilinear_8_8_4$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_4 Subsample_Bilinear_8_8_2_v .text.Subsample_Bilinear_8_8_2_v .nv.info.Subsample_Bilinear_8_8_2_v .nv.shared.Subsample_Bilinear_8_8_2_v .nv.constant2.Subsample_Bilinear_8_8_2_v $Subsample_Bilinear_8_8_2_v$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_2_v Subsample_Bilinear_8_8_2_u .text.Subsample_Bilinear_8_8_2_u .nv.info.Subsample_Bilinear_8_8_2_u .nv.shared.Subsample_Bilinear_8_8_2_u .nv.constant2.Subsample_Bilinear_8_8_2_u $Subsample_Bilinear_8_8_2_u$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_2_u Subsample_Bilinear_8_8_2 .text.Subsample_Bilinear_8_8_2 .nv.info.Subsample_Bilinear_8_8_2 .nv.shared.Subsample_Bilinear_8_8_2 .nv.constant2.Subsample_Bilinear_8_8_2 $Subsample_Bilinear_8_8_2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_2 Subsample_Bilinear_8_8_p2 .text.Subsample_Bilinear_8_8_p2 .nv.info.Subsample_Bilinear_8_8_p2 .nv.shared.Subsample_Bilinear_8_8_p2 .nv.constant2.Subsample_Bilinear_8_8_p2 $Subsample_Bilinear_8_8_p2$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_p2 Subsample_Bilinear_8_8_c .text.Subsample_Bilinear_8_8_c .nv.info.Subsample_Bilinear_8_8_c .nv.shared.Subsample_Bilinear_8_8_c .nv.constant2.Subsample_Bilinear_8_8_c $Subsample_Bilinear_8_8_c$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8_c Subsample_Bilinear_8_8 .text.Subsample_Bilinear_8_8 .nv.info.Subsample_Bilinear_8_8 .nv.shared.Subsample_Bilinear_8_8 .nv.constant2.Subsample_Bilinear_8_8 $Subsample_Bilinear_8_8$__cuda_sm3x_div_rn_noftz_f32_slowpath .nv.constant0.Subsample_Bilinear_8_8 .nv.rel.action Z Z � "