|
Server : Apache/2.4.58 (Win64) OpenSSL/3.1.3 PHP/8.2.12 System : Windows NT SERVER-PC 10.0 build 26200 (Windows 11) AMD64 User : ServerPC ( 0) PHP Version : 8.2.12 Disable Function : NONE Directory : C:/Users/ServerPC/AppData/Roaming/NVIDIA/ComputeCache/1/a/ |
Upload File : |
A �� HY �S4�j2;Mar 13 202400:13:10HOST64sm_61//
// Generated by LLVM NVPTX Back-End
//
.version 4.2
.target sm_30
.address_size 64
// .globl Subsample_Nearest_yuv420p_yuv420p
.func (.param .align 16 .b8 func_retval0[4]) _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif
(
.param .b64 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_0,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_1,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_2,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_3,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_4,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_5,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_6
)
;
.visible .entry Subsample_Nearest_yuv420p_yuv420p(
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB0_2;
bra.uni $L__BB0_1;
$L__BB0_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB0_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_yuv420p_uv
.visible .entry Subsample_Nearest_yuv420p_yuv420p_uv(
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB1_2;
bra.uni $L__BB1_1;
$L__BB1_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv420p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv420p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r21;
$L__BB1_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv420p
.visible .entry Subsample_Nearest_nv12_yuv420p(
.param .u64 Subsample_Nearest_nv12_yuv420p_param_0,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_1,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_2,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_3,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_4,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_5,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_6,
.param .u64 Subsample_Nearest_nv12_yuv420p_param_7,
.param .u32 Subsample_Nearest_nv12_yuv420p_param_8,
.param .u32 Subsample_Nearest_nv12_yuv420p_param_9,
.param .u32 Subsample_Nearest_nv12_yuv420p_param_10,
.param .u32 Subsample_Nearest_nv12_yuv420p_param_11,
.param .u32 Subsample_Nearest_nv12_yuv420p_param_12,
.param .f32 Subsample_Nearest_nv12_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB2_2;
bra.uni $L__BB2_1;
$L__BB2_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB2_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv420p_uv
.visible .entry Subsample_Nearest_nv12_yuv420p_uv(
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_nv12_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_nv12_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_nv12_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_nv12_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_nv12_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_nv12_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_nv12_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB3_2;
bra.uni $L__BB3_1;
$L__BB3_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_nv12_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_nv12_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r17;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r18;
$L__BB3_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv420p
.visible .entry Subsample_Nearest_yuv444p_yuv420p(
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB4_2;
bra.uni $L__BB4_1;
$L__BB4_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB4_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv420p_uv
.visible .entry Subsample_Nearest_yuv444p_yuv420p_uv(
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB5_2;
bra.uni $L__BB5_1;
$L__BB5_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r21;
$L__BB5_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv420p
.visible .entry Subsample_Nearest_p010le_yuv420p(
.param .u64 Subsample_Nearest_p010le_yuv420p_param_0,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_1,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_2,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_3,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_4,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_5,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_6,
.param .u64 Subsample_Nearest_p010le_yuv420p_param_7,
.param .u32 Subsample_Nearest_p010le_yuv420p_param_8,
.param .u32 Subsample_Nearest_p010le_yuv420p_param_9,
.param .u32 Subsample_Nearest_p010le_yuv420p_param_10,
.param .u32 Subsample_Nearest_p010le_yuv420p_param_11,
.param .u32 Subsample_Nearest_p010le_yuv420p_param_12,
.param .f32 Subsample_Nearest_p010le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB6_2;
bra.uni $L__BB6_1;
$L__BB6_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB6_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv420p_uv
.visible .entry Subsample_Nearest_p010le_yuv420p_uv(
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_p010le_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_p010le_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_p010le_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_p010le_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_p010le_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_p010le_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_p010le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB7_2;
bra.uni $L__BB7_1;
$L__BB7_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p010le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p010le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r21;
shr.u32 %r22, %r18, 8;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r22;
$L__BB7_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv420p
.visible .entry Subsample_Nearest_p016le_yuv420p(
.param .u64 Subsample_Nearest_p016le_yuv420p_param_0,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_1,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_2,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_3,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_4,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_5,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_6,
.param .u64 Subsample_Nearest_p016le_yuv420p_param_7,
.param .u32 Subsample_Nearest_p016le_yuv420p_param_8,
.param .u32 Subsample_Nearest_p016le_yuv420p_param_9,
.param .u32 Subsample_Nearest_p016le_yuv420p_param_10,
.param .u32 Subsample_Nearest_p016le_yuv420p_param_11,
.param .u32 Subsample_Nearest_p016le_yuv420p_param_12,
.param .f32 Subsample_Nearest_p016le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB8_2;
bra.uni $L__BB8_1;
$L__BB8_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB8_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv420p_uv
.visible .entry Subsample_Nearest_p016le_yuv420p_uv(
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_p016le_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_p016le_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_p016le_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_p016le_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_p016le_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_p016le_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_p016le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB9_2;
bra.uni $L__BB9_1;
$L__BB9_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p016le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p016le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r21;
shr.u32 %r22, %r18, 8;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r22;
$L__BB9_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv420p
.visible .entry Subsample_Nearest_yuv444p16le_yuv420p(
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB10_2;
bra.uni $L__BB10_1;
$L__BB10_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB10_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv420p_uv
.visible .entry Subsample_Nearest_yuv444p16le_yuv420p_uv(
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<27>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB11_2;
bra.uni $L__BB11_1;
$L__BB11_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
shr.u32 %r25, %r17, 8;
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
shr.u32 %r26, %r21, 8;
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r26;
$L__BB11_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_nv12
.visible .entry Subsample_Nearest_yuv420p_nv12(
.param .u64 Subsample_Nearest_yuv420p_nv12_param_0,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_1,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_2,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_3,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_4,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_5,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_6,
.param .u64 Subsample_Nearest_yuv420p_nv12_param_7,
.param .u32 Subsample_Nearest_yuv420p_nv12_param_8,
.param .u32 Subsample_Nearest_yuv420p_nv12_param_9,
.param .u32 Subsample_Nearest_yuv420p_nv12_param_10,
.param .u32 Subsample_Nearest_yuv420p_nv12_param_11,
.param .u32 Subsample_Nearest_yuv420p_nv12_param_12,
.param .f32 Subsample_Nearest_yuv420p_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB12_2;
bra.uni $L__BB12_1;
$L__BB12_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB12_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_nv12_uv
.visible .entry Subsample_Nearest_yuv420p_nv12_uv(
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_nv12_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_nv12_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_nv12_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_nv12_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_nv12_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_nv12_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB13_2;
bra.uni $L__BB13_1;
$L__BB13_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_nv12_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u8 [%rd14], {%rs1, %rs2};
$L__BB13_2:
ret;
}
// .globl Subsample_Nearest_nv12_nv12
.visible .entry Subsample_Nearest_nv12_nv12(
.param .u64 Subsample_Nearest_nv12_nv12_param_0,
.param .u64 Subsample_Nearest_nv12_nv12_param_1,
.param .u64 Subsample_Nearest_nv12_nv12_param_2,
.param .u64 Subsample_Nearest_nv12_nv12_param_3,
.param .u64 Subsample_Nearest_nv12_nv12_param_4,
.param .u64 Subsample_Nearest_nv12_nv12_param_5,
.param .u64 Subsample_Nearest_nv12_nv12_param_6,
.param .u64 Subsample_Nearest_nv12_nv12_param_7,
.param .u32 Subsample_Nearest_nv12_nv12_param_8,
.param .u32 Subsample_Nearest_nv12_nv12_param_9,
.param .u32 Subsample_Nearest_nv12_nv12_param_10,
.param .u32 Subsample_Nearest_nv12_nv12_param_11,
.param .u32 Subsample_Nearest_nv12_nv12_param_12,
.param .f32 Subsample_Nearest_nv12_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB14_2;
bra.uni $L__BB14_1;
$L__BB14_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB14_2:
ret;
}
// .globl Subsample_Nearest_nv12_nv12_uv
.visible .entry Subsample_Nearest_nv12_nv12_uv(
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_0,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_1,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_2,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_3,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_4,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_5,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_6,
.param .u64 Subsample_Nearest_nv12_nv12_uv_param_7,
.param .u32 Subsample_Nearest_nv12_nv12_uv_param_8,
.param .u32 Subsample_Nearest_nv12_nv12_uv_param_9,
.param .u32 Subsample_Nearest_nv12_nv12_uv_param_10,
.param .u32 Subsample_Nearest_nv12_nv12_uv_param_11,
.param .u32 Subsample_Nearest_nv12_nv12_uv_param_12,
.param .f32 Subsample_Nearest_nv12_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB15_2;
bra.uni $L__BB15_1;
$L__BB15_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u8 [%rd12], {%rs1, %rs2};
$L__BB15_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_nv12
.visible .entry Subsample_Nearest_yuv444p_nv12(
.param .u64 Subsample_Nearest_yuv444p_nv12_param_0,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_1,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_2,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_3,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_4,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_5,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_6,
.param .u64 Subsample_Nearest_yuv444p_nv12_param_7,
.param .u32 Subsample_Nearest_yuv444p_nv12_param_8,
.param .u32 Subsample_Nearest_yuv444p_nv12_param_9,
.param .u32 Subsample_Nearest_yuv444p_nv12_param_10,
.param .u32 Subsample_Nearest_yuv444p_nv12_param_11,
.param .u32 Subsample_Nearest_yuv444p_nv12_param_12,
.param .f32 Subsample_Nearest_yuv444p_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB16_2;
bra.uni $L__BB16_1;
$L__BB16_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB16_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_nv12_uv
.visible .entry Subsample_Nearest_yuv444p_nv12_uv(
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_nv12_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_nv12_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_nv12_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_nv12_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_nv12_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_nv12_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB17_2;
bra.uni $L__BB17_1;
$L__BB17_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_nv12_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u8 [%rd14], {%rs1, %rs2};
$L__BB17_2:
ret;
}
// .globl Subsample_Nearest_p010le_nv12
.visible .entry Subsample_Nearest_p010le_nv12(
.param .u64 Subsample_Nearest_p010le_nv12_param_0,
.param .u64 Subsample_Nearest_p010le_nv12_param_1,
.param .u64 Subsample_Nearest_p010le_nv12_param_2,
.param .u64 Subsample_Nearest_p010le_nv12_param_3,
.param .u64 Subsample_Nearest_p010le_nv12_param_4,
.param .u64 Subsample_Nearest_p010le_nv12_param_5,
.param .u64 Subsample_Nearest_p010le_nv12_param_6,
.param .u64 Subsample_Nearest_p010le_nv12_param_7,
.param .u32 Subsample_Nearest_p010le_nv12_param_8,
.param .u32 Subsample_Nearest_p010le_nv12_param_9,
.param .u32 Subsample_Nearest_p010le_nv12_param_10,
.param .u32 Subsample_Nearest_p010le_nv12_param_11,
.param .u32 Subsample_Nearest_p010le_nv12_param_12,
.param .f32 Subsample_Nearest_p010le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB18_2;
bra.uni $L__BB18_1;
$L__BB18_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB18_2:
ret;
}
// .globl Subsample_Nearest_p010le_nv12_uv
.visible .entry Subsample_Nearest_p010le_nv12_uv(
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_0,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_1,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_2,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_3,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_4,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_5,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_6,
.param .u64 Subsample_Nearest_p010le_nv12_uv_param_7,
.param .u32 Subsample_Nearest_p010le_nv12_uv_param_8,
.param .u32 Subsample_Nearest_p010le_nv12_uv_param_9,
.param .u32 Subsample_Nearest_p010le_nv12_uv_param_10,
.param .u32 Subsample_Nearest_p010le_nv12_uv_param_11,
.param .u32 Subsample_Nearest_p010le_nv12_uv_param_12,
.param .f32 Subsample_Nearest_p010le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB19_2;
bra.uni $L__BB19_1;
$L__BB19_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
cvt.u16.u32 %rs1, %r21;
shr.u32 %r22, %r18, 8;
cvt.u16.u32 %rs2, %r22;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u8 [%rd12], {%rs1, %rs2};
$L__BB19_2:
ret;
}
// .globl Subsample_Nearest_p016le_nv12
.visible .entry Subsample_Nearest_p016le_nv12(
.param .u64 Subsample_Nearest_p016le_nv12_param_0,
.param .u64 Subsample_Nearest_p016le_nv12_param_1,
.param .u64 Subsample_Nearest_p016le_nv12_param_2,
.param .u64 Subsample_Nearest_p016le_nv12_param_3,
.param .u64 Subsample_Nearest_p016le_nv12_param_4,
.param .u64 Subsample_Nearest_p016le_nv12_param_5,
.param .u64 Subsample_Nearest_p016le_nv12_param_6,
.param .u64 Subsample_Nearest_p016le_nv12_param_7,
.param .u32 Subsample_Nearest_p016le_nv12_param_8,
.param .u32 Subsample_Nearest_p016le_nv12_param_9,
.param .u32 Subsample_Nearest_p016le_nv12_param_10,
.param .u32 Subsample_Nearest_p016le_nv12_param_11,
.param .u32 Subsample_Nearest_p016le_nv12_param_12,
.param .f32 Subsample_Nearest_p016le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB20_2;
bra.uni $L__BB20_1;
$L__BB20_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB20_2:
ret;
}
// .globl Subsample_Nearest_p016le_nv12_uv
.visible .entry Subsample_Nearest_p016le_nv12_uv(
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_0,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_1,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_2,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_3,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_4,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_5,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_6,
.param .u64 Subsample_Nearest_p016le_nv12_uv_param_7,
.param .u32 Subsample_Nearest_p016le_nv12_uv_param_8,
.param .u32 Subsample_Nearest_p016le_nv12_uv_param_9,
.param .u32 Subsample_Nearest_p016le_nv12_uv_param_10,
.param .u32 Subsample_Nearest_p016le_nv12_uv_param_11,
.param .u32 Subsample_Nearest_p016le_nv12_uv_param_12,
.param .f32 Subsample_Nearest_p016le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB21_2;
bra.uni $L__BB21_1;
$L__BB21_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
cvt.u16.u32 %rs1, %r21;
shr.u32 %r22, %r18, 8;
cvt.u16.u32 %rs2, %r22;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u8 [%rd12], {%rs1, %rs2};
$L__BB21_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_nv12
.visible .entry Subsample_Nearest_yuv444p16le_nv12(
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB22_2;
bra.uni $L__BB22_1;
$L__BB22_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB22_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_nv12_uv
.visible .entry Subsample_Nearest_yuv444p16le_nv12_uv(
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_nv12_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_nv12_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<27>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB23_2;
bra.uni $L__BB23_1;
$L__BB23_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_nv12_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
shr.u32 %r25, %r17, 8;
cvt.u16.u32 %rs1, %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
shr.u32 %r26, %r21, 8;
cvt.u16.u32 %rs2, %r26;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u8 [%rd14], {%rs1, %rs2};
$L__BB23_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_yuv444p
.visible .entry Subsample_Nearest_yuv420p_yuv444p(
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB24_2;
bra.uni $L__BB24_1;
$L__BB24_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB24_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_yuv444p_uv
.visible .entry Subsample_Nearest_yuv420p_yuv444p_uv(
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB25_2;
bra.uni $L__BB25_1;
$L__BB25_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv420p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv420p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r21;
$L__BB25_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv444p
.visible .entry Subsample_Nearest_nv12_yuv444p(
.param .u64 Subsample_Nearest_nv12_yuv444p_param_0,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_1,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_2,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_3,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_4,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_5,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_6,
.param .u64 Subsample_Nearest_nv12_yuv444p_param_7,
.param .u32 Subsample_Nearest_nv12_yuv444p_param_8,
.param .u32 Subsample_Nearest_nv12_yuv444p_param_9,
.param .u32 Subsample_Nearest_nv12_yuv444p_param_10,
.param .u32 Subsample_Nearest_nv12_yuv444p_param_11,
.param .u32 Subsample_Nearest_nv12_yuv444p_param_12,
.param .f32 Subsample_Nearest_nv12_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB26_2;
bra.uni $L__BB26_1;
$L__BB26_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB26_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv444p_uv
.visible .entry Subsample_Nearest_nv12_yuv444p_uv(
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_nv12_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_nv12_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_nv12_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_nv12_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_nv12_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_nv12_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_nv12_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB27_2;
bra.uni $L__BB27_1;
$L__BB27_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_nv12_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_nv12_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r17;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r18;
$L__BB27_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv444p
.visible .entry Subsample_Nearest_yuv444p_yuv444p(
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB28_2;
bra.uni $L__BB28_1;
$L__BB28_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r17;
$L__BB28_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv444p_uv
.visible .entry Subsample_Nearest_yuv444p_yuv444p_uv(
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB29_2;
bra.uni $L__BB29_1;
$L__BB29_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r21;
$L__BB29_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv444p
.visible .entry Subsample_Nearest_p010le_yuv444p(
.param .u64 Subsample_Nearest_p010le_yuv444p_param_0,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_1,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_2,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_3,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_4,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_5,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_6,
.param .u64 Subsample_Nearest_p010le_yuv444p_param_7,
.param .u32 Subsample_Nearest_p010le_yuv444p_param_8,
.param .u32 Subsample_Nearest_p010le_yuv444p_param_9,
.param .u32 Subsample_Nearest_p010le_yuv444p_param_10,
.param .u32 Subsample_Nearest_p010le_yuv444p_param_11,
.param .u32 Subsample_Nearest_p010le_yuv444p_param_12,
.param .f32 Subsample_Nearest_p010le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB30_2;
bra.uni $L__BB30_1;
$L__BB30_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB30_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv444p_uv
.visible .entry Subsample_Nearest_p010le_yuv444p_uv(
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_p010le_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_p010le_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_p010le_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_p010le_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_p010le_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_p010le_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_p010le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB31_2;
bra.uni $L__BB31_1;
$L__BB31_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p010le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p010le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r21;
shr.u32 %r22, %r18, 8;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r22;
$L__BB31_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv444p
.visible .entry Subsample_Nearest_p016le_yuv444p(
.param .u64 Subsample_Nearest_p016le_yuv444p_param_0,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_1,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_2,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_3,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_4,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_5,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_6,
.param .u64 Subsample_Nearest_p016le_yuv444p_param_7,
.param .u32 Subsample_Nearest_p016le_yuv444p_param_8,
.param .u32 Subsample_Nearest_p016le_yuv444p_param_9,
.param .u32 Subsample_Nearest_p016le_yuv444p_param_10,
.param .u32 Subsample_Nearest_p016le_yuv444p_param_11,
.param .u32 Subsample_Nearest_p016le_yuv444p_param_12,
.param .f32 Subsample_Nearest_p016le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB32_2;
bra.uni $L__BB32_1;
$L__BB32_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB32_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv444p_uv
.visible .entry Subsample_Nearest_p016le_yuv444p_uv(
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_p016le_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_p016le_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_p016le_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_p016le_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_p016le_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_p016le_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_p016le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<23>;
.reg .f32 %f<13>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB33_2;
bra.uni $L__BB33_1;
$L__BB33_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p016le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p016le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd7, %r2, %r5;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
add.s64 %rd10, %rd2, %rd9;
st.global.u8 [%rd10], %r21;
shr.u32 %r22, %r18, 8;
add.s64 %rd11, %rd1, %rd9;
st.global.u8 [%rd11], %r22;
$L__BB33_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv444p
.visible .entry Subsample_Nearest_yuv444p16le_yuv444p(
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<22>;
.reg .f32 %f<13>;
.reg .b64 %rd<9>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB34_2;
bra.uni $L__BB34_1;
$L__BB34_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
shr.u32 %r21, %r17, 8;
mul.wide.s32 %rd5, %r2, %r5;
cvt.s64.s32 %rd6, %r1;
add.s64 %rd7, %rd5, %rd6;
add.s64 %rd8, %rd1, %rd7;
st.global.u8 [%rd8], %r21;
$L__BB34_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv444p_uv
.visible .entry Subsample_Nearest_yuv444p16le_yuv444p_uv(
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<27>;
.reg .f32 %f<15>;
.reg .b64 %rd<14>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB35_2;
bra.uni $L__BB35_1;
$L__BB35_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
shr.u32 %r25, %r17, 8;
mul.wide.s32 %rd9, %r2, %r5;
cvt.s64.s32 %rd10, %r1;
add.s64 %rd11, %rd9, %rd10;
add.s64 %rd12, %rd2, %rd11;
st.global.u8 [%rd12], %r25;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
shr.u32 %r26, %r21, 8;
add.s64 %rd13, %rd1, %rd11;
st.global.u8 [%rd13], %r26;
$L__BB35_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_p010le
.visible .entry Subsample_Nearest_yuv420p_p010le(
.param .u64 Subsample_Nearest_yuv420p_p010le_param_0,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_1,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_2,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_3,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_4,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_5,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_6,
.param .u64 Subsample_Nearest_yuv420p_p010le_param_7,
.param .u32 Subsample_Nearest_yuv420p_p010le_param_8,
.param .u32 Subsample_Nearest_yuv420p_p010le_param_9,
.param .u32 Subsample_Nearest_yuv420p_p010le_param_10,
.param .u32 Subsample_Nearest_yuv420p_p010le_param_11,
.param .u32 Subsample_Nearest_yuv420p_p010le_param_12,
.param .f32 Subsample_Nearest_yuv420p_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB36_2;
bra.uni $L__BB36_1;
$L__BB36_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs4;
$L__BB36_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_p010le_uv
.visible .entry Subsample_Nearest_yuv420p_p010le_uv(
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_p010le_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_p010le_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_p010le_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_p010le_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_p010le_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_p010le_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB37_2;
bra.uni $L__BB37_1;
$L__BB37_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_p010le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
and.b16 %rs4, %rs3, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs5, %r21;
and.b16 %rs6, %rs5, 255;
mul.lo.s16 %rs7, %rs6, 257;
and.b16 %rs8, %rs7, -64;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs4, %rs8};
$L__BB37_2:
ret;
}
// .globl Subsample_Nearest_nv12_p010le
.visible .entry Subsample_Nearest_nv12_p010le(
.param .u64 Subsample_Nearest_nv12_p010le_param_0,
.param .u64 Subsample_Nearest_nv12_p010le_param_1,
.param .u64 Subsample_Nearest_nv12_p010le_param_2,
.param .u64 Subsample_Nearest_nv12_p010le_param_3,
.param .u64 Subsample_Nearest_nv12_p010le_param_4,
.param .u64 Subsample_Nearest_nv12_p010le_param_5,
.param .u64 Subsample_Nearest_nv12_p010le_param_6,
.param .u64 Subsample_Nearest_nv12_p010le_param_7,
.param .u32 Subsample_Nearest_nv12_p010le_param_8,
.param .u32 Subsample_Nearest_nv12_p010le_param_9,
.param .u32 Subsample_Nearest_nv12_p010le_param_10,
.param .u32 Subsample_Nearest_nv12_p010le_param_11,
.param .u32 Subsample_Nearest_nv12_p010le_param_12,
.param .f32 Subsample_Nearest_nv12_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB38_2;
bra.uni $L__BB38_1;
$L__BB38_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs4;
$L__BB38_2:
ret;
}
// .globl Subsample_Nearest_nv12_p010le_uv
.visible .entry Subsample_Nearest_nv12_p010le_uv(
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_0,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_1,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_2,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_3,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_4,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_5,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_6,
.param .u64 Subsample_Nearest_nv12_p010le_uv_param_7,
.param .u32 Subsample_Nearest_nv12_p010le_uv_param_8,
.param .u32 Subsample_Nearest_nv12_p010le_uv_param_9,
.param .u32 Subsample_Nearest_nv12_p010le_uv_param_10,
.param .u32 Subsample_Nearest_nv12_p010le_uv_param_11,
.param .u32 Subsample_Nearest_nv12_p010le_uv_param_12,
.param .f32 Subsample_Nearest_nv12_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB39_2;
bra.uni $L__BB39_1;
$L__BB39_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
and.b16 %rs3, %rs1, 255;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
and.b16 %rs6, %rs2, 255;
mul.lo.s16 %rs7, %rs6, 257;
and.b16 %rs8, %rs7, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs5, %rs8};
$L__BB39_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_p010le
.visible .entry Subsample_Nearest_yuv444p_p010le(
.param .u64 Subsample_Nearest_yuv444p_p010le_param_0,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_1,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_2,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_3,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_4,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_5,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_6,
.param .u64 Subsample_Nearest_yuv444p_p010le_param_7,
.param .u32 Subsample_Nearest_yuv444p_p010le_param_8,
.param .u32 Subsample_Nearest_yuv444p_p010le_param_9,
.param .u32 Subsample_Nearest_yuv444p_p010le_param_10,
.param .u32 Subsample_Nearest_yuv444p_p010le_param_11,
.param .u32 Subsample_Nearest_yuv444p_p010le_param_12,
.param .f32 Subsample_Nearest_yuv444p_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB40_2;
bra.uni $L__BB40_1;
$L__BB40_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs4;
$L__BB40_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_p010le_uv
.visible .entry Subsample_Nearest_yuv444p_p010le_uv(
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_p010le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_p010le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_p010le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_p010le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_p010le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_p010le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB41_2;
bra.uni $L__BB41_1;
$L__BB41_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_p010le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
and.b16 %rs4, %rs3, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs5, %r21;
and.b16 %rs6, %rs5, 255;
mul.lo.s16 %rs7, %rs6, 257;
and.b16 %rs8, %rs7, -64;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs4, %rs8};
$L__BB41_2:
ret;
}
// .globl Subsample_Nearest_p010le_p010le
.visible .entry Subsample_Nearest_p010le_p010le(
.param .u64 Subsample_Nearest_p010le_p010le_param_0,
.param .u64 Subsample_Nearest_p010le_p010le_param_1,
.param .u64 Subsample_Nearest_p010le_p010le_param_2,
.param .u64 Subsample_Nearest_p010le_p010le_param_3,
.param .u64 Subsample_Nearest_p010le_p010le_param_4,
.param .u64 Subsample_Nearest_p010le_p010le_param_5,
.param .u64 Subsample_Nearest_p010le_p010le_param_6,
.param .u64 Subsample_Nearest_p010le_p010le_param_7,
.param .u32 Subsample_Nearest_p010le_p010le_param_8,
.param .u32 Subsample_Nearest_p010le_p010le_param_9,
.param .u32 Subsample_Nearest_p010le_p010le_param_10,
.param .u32 Subsample_Nearest_p010le_p010le_param_11,
.param .u32 Subsample_Nearest_p010le_p010le_param_12,
.param .f32 Subsample_Nearest_p010le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB42_2;
bra.uni $L__BB42_1;
$L__BB42_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %r17;
$L__BB42_2:
ret;
}
// .globl Subsample_Nearest_p010le_p010le_uv
.visible .entry Subsample_Nearest_p010le_p010le_uv(
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_0,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_1,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_2,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_3,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_4,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_5,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_6,
.param .u64 Subsample_Nearest_p010le_p010le_uv_param_7,
.param .u32 Subsample_Nearest_p010le_p010le_uv_param_8,
.param .u32 Subsample_Nearest_p010le_p010le_uv_param_9,
.param .u32 Subsample_Nearest_p010le_p010le_uv_param_10,
.param .u32 Subsample_Nearest_p010le_p010le_uv_param_11,
.param .u32 Subsample_Nearest_p010le_p010le_uv_param_12,
.param .f32 Subsample_Nearest_p010le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB43_2;
bra.uni $L__BB43_1;
$L__BB43_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs1, %rs2};
$L__BB43_2:
ret;
}
// .globl Subsample_Nearest_p016le_p010le
.visible .entry Subsample_Nearest_p016le_p010le(
.param .u64 Subsample_Nearest_p016le_p010le_param_0,
.param .u64 Subsample_Nearest_p016le_p010le_param_1,
.param .u64 Subsample_Nearest_p016le_p010le_param_2,
.param .u64 Subsample_Nearest_p016le_p010le_param_3,
.param .u64 Subsample_Nearest_p016le_p010le_param_4,
.param .u64 Subsample_Nearest_p016le_p010le_param_5,
.param .u64 Subsample_Nearest_p016le_p010le_param_6,
.param .u64 Subsample_Nearest_p016le_p010le_param_7,
.param .u32 Subsample_Nearest_p016le_p010le_param_8,
.param .u32 Subsample_Nearest_p016le_p010le_param_9,
.param .u32 Subsample_Nearest_p016le_p010le_param_10,
.param .u32 Subsample_Nearest_p016le_p010le_param_11,
.param .u32 Subsample_Nearest_p016le_p010le_param_12,
.param .f32 Subsample_Nearest_p016le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB44_2;
bra.uni $L__BB44_1;
$L__BB44_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs2;
$L__BB44_2:
ret;
}
// .globl Subsample_Nearest_p016le_p010le_uv
.visible .entry Subsample_Nearest_p016le_p010le_uv(
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_0,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_1,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_2,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_3,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_4,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_5,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_6,
.param .u64 Subsample_Nearest_p016le_p010le_uv_param_7,
.param .u32 Subsample_Nearest_p016le_p010le_uv_param_8,
.param .u32 Subsample_Nearest_p016le_p010le_uv_param_9,
.param .u32 Subsample_Nearest_p016le_p010le_uv_param_10,
.param .u32 Subsample_Nearest_p016le_p010le_uv_param_11,
.param .u32 Subsample_Nearest_p016le_p010le_uv_param_12,
.param .f32 Subsample_Nearest_p016le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB45_2;
bra.uni $L__BB45_1;
$L__BB45_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
and.b16 %rs3, %rs1, -64;
and.b16 %rs4, %rs2, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs3, %rs4};
$L__BB45_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_p010le
.visible .entry Subsample_Nearest_yuv444p16le_p010le(
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB46_2;
bra.uni $L__BB46_1;
$L__BB46_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs2;
$L__BB46_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_p010le_uv
.visible .entry Subsample_Nearest_yuv444p16le_p010le_uv(
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_p010le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_p010le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB47_2;
bra.uni $L__BB47_1;
$L__BB47_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_p010le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs3, %r21;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs2, %rs4};
$L__BB47_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_p016le
.visible .entry Subsample_Nearest_yuv420p_p016le(
.param .u64 Subsample_Nearest_yuv420p_p016le_param_0,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_1,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_2,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_3,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_4,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_5,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_6,
.param .u64 Subsample_Nearest_yuv420p_p016le_param_7,
.param .u32 Subsample_Nearest_yuv420p_p016le_param_8,
.param .u32 Subsample_Nearest_yuv420p_p016le_param_9,
.param .u32 Subsample_Nearest_yuv420p_p016le_param_10,
.param .u32 Subsample_Nearest_yuv420p_p016le_param_11,
.param .u32 Subsample_Nearest_yuv420p_p016le_param_12,
.param .f32 Subsample_Nearest_yuv420p_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB48_2;
bra.uni $L__BB48_1;
$L__BB48_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB48_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_p016le_uv
.visible .entry Subsample_Nearest_yuv420p_p016le_uv(
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_p016le_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_p016le_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_p016le_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_p016le_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_p016le_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_p016le_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB49_2;
bra.uni $L__BB49_1;
$L__BB49_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_p016le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs4, %r21;
and.b16 %rs5, %rs4, 255;
mul.lo.s16 %rs6, %rs5, 257;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs3, %rs6};
$L__BB49_2:
ret;
}
// .globl Subsample_Nearest_nv12_p016le
.visible .entry Subsample_Nearest_nv12_p016le(
.param .u64 Subsample_Nearest_nv12_p016le_param_0,
.param .u64 Subsample_Nearest_nv12_p016le_param_1,
.param .u64 Subsample_Nearest_nv12_p016le_param_2,
.param .u64 Subsample_Nearest_nv12_p016le_param_3,
.param .u64 Subsample_Nearest_nv12_p016le_param_4,
.param .u64 Subsample_Nearest_nv12_p016le_param_5,
.param .u64 Subsample_Nearest_nv12_p016le_param_6,
.param .u64 Subsample_Nearest_nv12_p016le_param_7,
.param .u32 Subsample_Nearest_nv12_p016le_param_8,
.param .u32 Subsample_Nearest_nv12_p016le_param_9,
.param .u32 Subsample_Nearest_nv12_p016le_param_10,
.param .u32 Subsample_Nearest_nv12_p016le_param_11,
.param .u32 Subsample_Nearest_nv12_p016le_param_12,
.param .f32 Subsample_Nearest_nv12_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB50_2;
bra.uni $L__BB50_1;
$L__BB50_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB50_2:
ret;
}
// .globl Subsample_Nearest_nv12_p016le_uv
.visible .entry Subsample_Nearest_nv12_p016le_uv(
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_0,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_1,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_2,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_3,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_4,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_5,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_6,
.param .u64 Subsample_Nearest_nv12_p016le_uv_param_7,
.param .u32 Subsample_Nearest_nv12_p016le_uv_param_8,
.param .u32 Subsample_Nearest_nv12_p016le_uv_param_9,
.param .u32 Subsample_Nearest_nv12_p016le_uv_param_10,
.param .u32 Subsample_Nearest_nv12_p016le_uv_param_11,
.param .u32 Subsample_Nearest_nv12_p016le_uv_param_12,
.param .f32 Subsample_Nearest_nv12_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB51_2;
bra.uni $L__BB51_1;
$L__BB51_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
and.b16 %rs3, %rs1, 255;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs2, 255;
mul.lo.s16 %rs6, %rs5, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs4, %rs6};
$L__BB51_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_p016le
.visible .entry Subsample_Nearest_yuv444p_p016le(
.param .u64 Subsample_Nearest_yuv444p_p016le_param_0,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_1,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_2,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_3,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_4,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_5,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_6,
.param .u64 Subsample_Nearest_yuv444p_p016le_param_7,
.param .u32 Subsample_Nearest_yuv444p_p016le_param_8,
.param .u32 Subsample_Nearest_yuv444p_p016le_param_9,
.param .u32 Subsample_Nearest_yuv444p_p016le_param_10,
.param .u32 Subsample_Nearest_yuv444p_p016le_param_11,
.param .u32 Subsample_Nearest_yuv444p_p016le_param_12,
.param .f32 Subsample_Nearest_yuv444p_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB52_2;
bra.uni $L__BB52_1;
$L__BB52_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB52_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_p016le_uv
.visible .entry Subsample_Nearest_yuv444p_p016le_uv(
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_p016le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_p016le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_p016le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_p016le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_p016le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_p016le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB53_2;
bra.uni $L__BB53_1;
$L__BB53_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_p016le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs4, %r21;
and.b16 %rs5, %rs4, 255;
mul.lo.s16 %rs6, %rs5, 257;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs3, %rs6};
$L__BB53_2:
ret;
}
// .globl Subsample_Nearest_p010le_p016le
.visible .entry Subsample_Nearest_p010le_p016le(
.param .u64 Subsample_Nearest_p010le_p016le_param_0,
.param .u64 Subsample_Nearest_p010le_p016le_param_1,
.param .u64 Subsample_Nearest_p010le_p016le_param_2,
.param .u64 Subsample_Nearest_p010le_p016le_param_3,
.param .u64 Subsample_Nearest_p010le_p016le_param_4,
.param .u64 Subsample_Nearest_p010le_p016le_param_5,
.param .u64 Subsample_Nearest_p010le_p016le_param_6,
.param .u64 Subsample_Nearest_p010le_p016le_param_7,
.param .u32 Subsample_Nearest_p010le_p016le_param_8,
.param .u32 Subsample_Nearest_p010le_p016le_param_9,
.param .u32 Subsample_Nearest_p010le_p016le_param_10,
.param .u32 Subsample_Nearest_p010le_p016le_param_11,
.param .u32 Subsample_Nearest_p010le_p016le_param_12,
.param .f32 Subsample_Nearest_p010le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB54_2;
bra.uni $L__BB54_1;
$L__BB54_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB54_2:
ret;
}
// .globl Subsample_Nearest_p010le_p016le_uv
.visible .entry Subsample_Nearest_p010le_p016le_uv(
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_0,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_1,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_2,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_3,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_4,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_5,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_6,
.param .u64 Subsample_Nearest_p010le_p016le_uv_param_7,
.param .u32 Subsample_Nearest_p010le_p016le_uv_param_8,
.param .u32 Subsample_Nearest_p010le_p016le_uv_param_9,
.param .u32 Subsample_Nearest_p010le_p016le_uv_param_10,
.param .u32 Subsample_Nearest_p010le_p016le_uv_param_11,
.param .u32 Subsample_Nearest_p010le_p016le_uv_param_12,
.param .f32 Subsample_Nearest_p010le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB55_2;
bra.uni $L__BB55_1;
$L__BB55_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs4, %rs6};
$L__BB55_2:
ret;
}
// .globl Subsample_Nearest_p016le_p016le
.visible .entry Subsample_Nearest_p016le_p016le(
.param .u64 Subsample_Nearest_p016le_p016le_param_0,
.param .u64 Subsample_Nearest_p016le_p016le_param_1,
.param .u64 Subsample_Nearest_p016le_p016le_param_2,
.param .u64 Subsample_Nearest_p016le_p016le_param_3,
.param .u64 Subsample_Nearest_p016le_p016le_param_4,
.param .u64 Subsample_Nearest_p016le_p016le_param_5,
.param .u64 Subsample_Nearest_p016le_p016le_param_6,
.param .u64 Subsample_Nearest_p016le_p016le_param_7,
.param .u32 Subsample_Nearest_p016le_p016le_param_8,
.param .u32 Subsample_Nearest_p016le_p016le_param_9,
.param .u32 Subsample_Nearest_p016le_p016le_param_10,
.param .u32 Subsample_Nearest_p016le_p016le_param_11,
.param .u32 Subsample_Nearest_p016le_p016le_param_12,
.param .f32 Subsample_Nearest_p016le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB56_2;
bra.uni $L__BB56_1;
$L__BB56_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %r17;
$L__BB56_2:
ret;
}
// .globl Subsample_Nearest_p016le_p016le_uv
.visible .entry Subsample_Nearest_p016le_p016le_uv(
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_0,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_1,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_2,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_3,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_4,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_5,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_6,
.param .u64 Subsample_Nearest_p016le_p016le_uv_param_7,
.param .u32 Subsample_Nearest_p016le_p016le_uv_param_8,
.param .u32 Subsample_Nearest_p016le_p016le_uv_param_9,
.param .u32 Subsample_Nearest_p016le_p016le_uv_param_10,
.param .u32 Subsample_Nearest_p016le_p016le_uv_param_11,
.param .u32 Subsample_Nearest_p016le_p016le_uv_param_12,
.param .f32 Subsample_Nearest_p016le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB57_2;
bra.uni $L__BB57_1;
$L__BB57_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v2.u16 [%rd12], {%rs1, %rs2};
$L__BB57_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_p016le
.visible .entry Subsample_Nearest_yuv444p16le_p016le(
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB58_2;
bra.uni $L__BB58_1;
$L__BB58_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %r17;
$L__BB58_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_p016le_uv
.visible .entry Subsample_Nearest_yuv444p16le_p016le_uv(
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_p016le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_p016le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB59_2;
bra.uni $L__BB59_1;
$L__BB59_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_p016le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs2, %r21;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 2;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 2;
add.s64 %rd14, %rd1, %rd13;
st.global.v2.u16 [%rd14], {%rs1, %rs2};
$L__BB59_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_yuv444p16le
.visible .entry Subsample_Nearest_yuv420p_yuv444p16le(
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB60_2;
bra.uni $L__BB60_1;
$L__BB60_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv420p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv420p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB60_2:
ret;
}
// .globl Subsample_Nearest_yuv420p_yuv444p16le_uv
.visible .entry Subsample_Nearest_yuv420p_yuv444p16le_uv(
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_yuv420p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<18>;
ld.param.u32 %r4, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB61_2;
bra.uni $L__BB61_1;
$L__BB61_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv420p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd9, %r2;
cvt.s64.s32 %rd10, %r5;
shr.u64 %rd11, %rd10, 1;
mul.lo.s64 %rd12, %rd11, %rd9;
cvt.s64.s32 %rd13, %r1;
add.s64 %rd14, %rd12, %rd13;
shl.b64 %rd15, %rd14, 1;
add.s64 %rd16, %rd2, %rd15;
st.global.u16 [%rd16], %rs3;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs4, %r21;
and.b16 %rs5, %rs4, 255;
mul.lo.s16 %rs6, %rs5, 257;
add.s64 %rd17, %rd1, %rd15;
st.global.u16 [%rd17], %rs6;
$L__BB61_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv444p16le
.visible .entry Subsample_Nearest_nv12_yuv444p16le(
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_nv12_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB62_2;
bra.uni $L__BB62_1;
$L__BB62_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_nv12_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB62_2:
ret;
}
// .globl Subsample_Nearest_nv12_yuv444p16le_uv
.visible .entry Subsample_Nearest_nv12_yuv444p16le_uv(
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_nv12_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_nv12_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_nv12_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Nearest_nv12_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_nv12_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB63_2;
bra.uni $L__BB63_1;
$L__BB63_1:
ld.param.u32 %r7, [Subsample_Nearest_nv12_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_nv12_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_nv12_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_nv12_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_nv12_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_nv12_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
and.b16 %rs3, %rs1, 255;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd2, %rd13;
st.global.u16 [%rd14], %rs4;
and.b16 %rs5, %rs2, 255;
mul.lo.s16 %rs6, %rs5, 257;
add.s64 %rd15, %rd1, %rd13;
st.global.u16 [%rd15], %rs6;
$L__BB63_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv444p16le
.visible .entry Subsample_Nearest_yuv444p_yuv444p16le(
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB64_2;
bra.uni $L__BB64_1;
$L__BB64_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB64_2:
ret;
}
// .globl Subsample_Nearest_yuv444p_yuv444p16le_uv
.visible .entry Subsample_Nearest_yuv444p_yuv444p16le_uv(
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<18>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB65_2;
bra.uni $L__BB65_1;
$L__BB65_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
and.b16 %rs2, %rs1, 255;
mul.lo.s16 %rs3, %rs2, 257;
cvt.s64.s32 %rd9, %r2;
cvt.s64.s32 %rd10, %r5;
shr.u64 %rd11, %rd10, 1;
mul.lo.s64 %rd12, %rd11, %rd9;
cvt.s64.s32 %rd13, %r1;
add.s64 %rd14, %rd12, %rd13;
shl.b64 %rd15, %rd14, 1;
add.s64 %rd16, %rd2, %rd15;
st.global.u16 [%rd16], %rs3;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs4, %r21;
and.b16 %rs5, %rs4, 255;
mul.lo.s16 %rs6, %rs5, 257;
add.s64 %rd17, %rd1, %rd15;
st.global.u16 [%rd17], %rs6;
$L__BB65_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv444p16le
.visible .entry Subsample_Nearest_p010le_yuv444p16le(
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_p010le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB66_2;
bra.uni $L__BB66_1;
$L__BB66_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p010le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %rs3;
$L__BB66_2:
ret;
}
// .globl Subsample_Nearest_p010le_yuv444p16le_uv
.visible .entry Subsample_Nearest_p010le_yuv444p16le_uv(
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_p010le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_p010le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_p010le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Nearest_p010le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p010le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB67_2;
bra.uni $L__BB67_1;
$L__BB67_1:
ld.param.u32 %r7, [Subsample_Nearest_p010le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p010le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p010le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p010le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p010le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p010le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd2, %rd13;
st.global.u16 [%rd14], %rs4;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
add.s64 %rd15, %rd1, %rd13;
st.global.u16 [%rd15], %rs6;
$L__BB67_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv444p16le
.visible .entry Subsample_Nearest_p016le_yuv444p16le(
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_p016le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB68_2;
bra.uni $L__BB68_1;
$L__BB68_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_p016le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %r17;
$L__BB68_2:
ret;
}
// .globl Subsample_Nearest_p016le_yuv444p16le_uv
.visible .entry Subsample_Nearest_p016le_yuv444p16le_uv(
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_p016le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_p016le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_p016le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Nearest_p016le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_p016le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB69_2;
bra.uni $L__BB69_1;
$L__BB69_1:
ld.param.u32 %r7, [Subsample_Nearest_p016le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_p016le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_p016le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Nearest_p016le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Nearest_p016le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Nearest_p016le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd7, %r2;
cvt.s64.s32 %rd8, %r5;
shr.u64 %rd9, %rd8, 1;
mul.lo.s64 %rd10, %rd9, %rd7;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
shl.b64 %rd13, %rd12, 1;
add.s64 %rd14, %rd2, %rd13;
st.global.u16 [%rd14], %r17;
add.s64 %rd15, %rd1, %rd13;
st.global.u16 [%rd15], %r18;
$L__BB69_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv444p16le
.visible .entry Subsample_Nearest_yuv444p16le_yuv444p16le(
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB70_2;
bra.uni $L__BB70_1;
$L__BB70_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_yuv444p16le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 1;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 1;
add.s64 %rd12, %rd1, %rd11;
st.global.u16 [%rd12], %r17;
$L__BB70_2:
ret;
}
// .globl Subsample_Nearest_yuv444p16le_yuv444p16le_uv
.visible .entry Subsample_Nearest_yuv444p16le_yuv444p16le_uv(
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<25>;
.reg .f32 %f<15>;
.reg .b64 %rd<18>;
ld.param.u32 %r4, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB71_2;
bra.uni $L__BB71_1;
$L__BB71_1:
ld.param.u32 %r7, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Nearest_yuv444p16le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f5, %r6;
cvt.rn.f32.s32 %f6, %r3;
div.rn.f32 %f7, %f5, %f6;
cvt.rn.f32.s32 %f8, %r7;
cvt.rn.f32.s32 %f9, %r4;
div.rn.f32 %f10, %f8, %f9;
cvt.rn.f32.s32 %f11, %r1;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f1, %f7, %f12;
cvt.rn.f32.s32 %f13, %r2;
add.f32 %f14, %f13, 0f3F000000;
mul.f32 %f2, %f10, %f14;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f1, %f2}];
// end inline asm
cvt.s64.s32 %rd9, %r2;
cvt.s64.s32 %rd10, %r5;
shr.u64 %rd11, %rd10, 1;
mul.lo.s64 %rd12, %rd11, %rd9;
cvt.s64.s32 %rd13, %r1;
add.s64 %rd14, %rd12, %rd13;
shl.b64 %rd15, %rd14, 1;
add.s64 %rd16, %rd2, %rd15;
st.global.u16 [%rd16], %r17;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f1, %f2}];
// end inline asm
add.s64 %rd17, %rd1, %rd15;
st.global.u16 [%rd17], %r21;
$L__BB71_2:
ret;
}
// .globl Subsample_Nearest_bgr0_bgr0
.visible .entry Subsample_Nearest_bgr0_bgr0(
.param .u64 Subsample_Nearest_bgr0_bgr0_param_0,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_1,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_2,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_3,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_4,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_5,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_6,
.param .u64 Subsample_Nearest_bgr0_bgr0_param_7,
.param .u32 Subsample_Nearest_bgr0_bgr0_param_8,
.param .u32 Subsample_Nearest_bgr0_bgr0_param_9,
.param .u32 Subsample_Nearest_bgr0_bgr0_param_10,
.param .u32 Subsample_Nearest_bgr0_bgr0_param_11,
.param .u32 Subsample_Nearest_bgr0_bgr0_param_12,
.param .f32 Subsample_Nearest_bgr0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_bgr0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Nearest_bgr0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB72_2;
bra.uni $L__BB72_1;
$L__BB72_1:
ld.param.u32 %r7, [Subsample_Nearest_bgr0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Nearest_bgr0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Nearest_bgr0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_bgr0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_bgr0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v4.u8 [%rd12], {%rs1, %rs2, %rs3, %rs4};
$L__BB72_2:
ret;
}
// .globl Subsample_Nearest_bgr0_bgr0_uv
.visible .entry Subsample_Nearest_bgr0_bgr0_uv(
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_0,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_1,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_2,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_3,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_4,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_5,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_6,
.param .u64 Subsample_Nearest_bgr0_bgr0_uv_param_7,
.param .u32 Subsample_Nearest_bgr0_bgr0_uv_param_8,
.param .u32 Subsample_Nearest_bgr0_bgr0_uv_param_9,
.param .u32 Subsample_Nearest_bgr0_bgr0_uv_param_10,
.param .u32 Subsample_Nearest_bgr0_bgr0_uv_param_11,
.param .u32 Subsample_Nearest_bgr0_bgr0_uv_param_12,
.param .f32 Subsample_Nearest_bgr0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Nearest_rgb0_rgb0
.visible .entry Subsample_Nearest_rgb0_rgb0(
.param .u64 Subsample_Nearest_rgb0_rgb0_param_0,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_1,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_2,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_3,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_4,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_5,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_6,
.param .u64 Subsample_Nearest_rgb0_rgb0_param_7,
.param .u32 Subsample_Nearest_rgb0_rgb0_param_8,
.param .u32 Subsample_Nearest_rgb0_rgb0_param_9,
.param .u32 Subsample_Nearest_rgb0_rgb0_param_10,
.param .u32 Subsample_Nearest_rgb0_rgb0_param_11,
.param .u32 Subsample_Nearest_rgb0_rgb0_param_12,
.param .f32 Subsample_Nearest_rgb0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_rgb0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Nearest_rgb0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB74_2;
bra.uni $L__BB74_1;
$L__BB74_1:
ld.param.u32 %r7, [Subsample_Nearest_rgb0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Nearest_rgb0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Nearest_rgb0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_rgb0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_rgb0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v4.u8 [%rd12], {%rs1, %rs2, %rs3, %rs4};
$L__BB74_2:
ret;
}
// .globl Subsample_Nearest_rgb0_rgb0_uv
.visible .entry Subsample_Nearest_rgb0_rgb0_uv(
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_0,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_1,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_2,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_3,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_4,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_5,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_6,
.param .u64 Subsample_Nearest_rgb0_rgb0_uv_param_7,
.param .u32 Subsample_Nearest_rgb0_rgb0_uv_param_8,
.param .u32 Subsample_Nearest_rgb0_rgb0_uv_param_9,
.param .u32 Subsample_Nearest_rgb0_rgb0_uv_param_10,
.param .u32 Subsample_Nearest_rgb0_rgb0_uv_param_11,
.param .u32 Subsample_Nearest_rgb0_rgb0_uv_param_12,
.param .f32 Subsample_Nearest_rgb0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Nearest_bgr0_rgb0
.visible .entry Subsample_Nearest_bgr0_rgb0(
.param .u64 Subsample_Nearest_bgr0_rgb0_param_0,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_1,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_2,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_3,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_4,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_5,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_6,
.param .u64 Subsample_Nearest_bgr0_rgb0_param_7,
.param .u32 Subsample_Nearest_bgr0_rgb0_param_8,
.param .u32 Subsample_Nearest_bgr0_rgb0_param_9,
.param .u32 Subsample_Nearest_bgr0_rgb0_param_10,
.param .u32 Subsample_Nearest_bgr0_rgb0_param_11,
.param .u32 Subsample_Nearest_bgr0_rgb0_param_12,
.param .f32 Subsample_Nearest_bgr0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_bgr0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Nearest_bgr0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB76_2;
bra.uni $L__BB76_1;
$L__BB76_1:
ld.param.u32 %r7, [Subsample_Nearest_bgr0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Nearest_bgr0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Nearest_bgr0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_bgr0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_bgr0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v4.u8 [%rd12], {%rs3, %rs2, %rs1, %rs4};
$L__BB76_2:
ret;
}
// .globl Subsample_Nearest_bgr0_rgb0_uv
.visible .entry Subsample_Nearest_bgr0_rgb0_uv(
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_0,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_1,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_2,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_3,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_4,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_5,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_6,
.param .u64 Subsample_Nearest_bgr0_rgb0_uv_param_7,
.param .u32 Subsample_Nearest_bgr0_rgb0_uv_param_8,
.param .u32 Subsample_Nearest_bgr0_rgb0_uv_param_9,
.param .u32 Subsample_Nearest_bgr0_rgb0_uv_param_10,
.param .u32 Subsample_Nearest_bgr0_rgb0_uv_param_11,
.param .u32 Subsample_Nearest_bgr0_rgb0_uv_param_12,
.param .f32 Subsample_Nearest_bgr0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Nearest_rgb0_bgr0
.visible .entry Subsample_Nearest_rgb0_bgr0(
.param .u64 Subsample_Nearest_rgb0_bgr0_param_0,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_1,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_2,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_3,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_4,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_5,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_6,
.param .u64 Subsample_Nearest_rgb0_bgr0_param_7,
.param .u32 Subsample_Nearest_rgb0_bgr0_param_8,
.param .u32 Subsample_Nearest_rgb0_bgr0_param_9,
.param .u32 Subsample_Nearest_rgb0_bgr0_param_10,
.param .u32 Subsample_Nearest_rgb0_bgr0_param_11,
.param .u32 Subsample_Nearest_rgb0_bgr0_param_12,
.param .f32 Subsample_Nearest_rgb0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<21>;
.reg .f32 %f<13>;
.reg .b64 %rd<13>;
ld.param.u32 %r4, [Subsample_Nearest_rgb0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Nearest_rgb0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB78_2;
bra.uni $L__BB78_1;
$L__BB78_1:
ld.param.u32 %r7, [Subsample_Nearest_rgb0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Nearest_rgb0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Nearest_rgb0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Nearest_rgb0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Nearest_rgb0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f3, %r6;
cvt.rn.f32.s32 %f4, %r3;
div.rn.f32 %f5, %f3, %f4;
cvt.rn.f32.s32 %f6, %r7;
cvt.rn.f32.s32 %f7, %r4;
div.rn.f32 %f8, %f6, %f7;
cvt.rn.f32.s32 %f9, %r1;
add.f32 %f10, %f9, 0f3F000000;
mul.f32 %f1, %f5, %f10;
cvt.rn.f32.s32 %f11, %r2;
add.f32 %f12, %f11, 0f3F000000;
mul.f32 %f2, %f8, %f12;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f1, %f2}];
// end inline asm
cvt.u16.u32 %rs1, %r17;
cvt.u16.u32 %rs2, %r18;
cvt.u16.u32 %rs3, %r19;
cvt.u16.u32 %rs4, %r20;
cvt.s64.s32 %rd5, %r2;
cvt.s64.s32 %rd6, %r5;
shr.u64 %rd7, %rd6, 2;
mul.lo.s64 %rd8, %rd7, %rd5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
shl.b64 %rd11, %rd10, 2;
add.s64 %rd12, %rd1, %rd11;
st.global.v4.u8 [%rd12], {%rs3, %rs2, %rs1, %rs4};
$L__BB78_2:
ret;
}
// .globl Subsample_Nearest_rgb0_bgr0_uv
.visible .entry Subsample_Nearest_rgb0_bgr0_uv(
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_0,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_1,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_2,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_3,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_4,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_5,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_6,
.param .u64 Subsample_Nearest_rgb0_bgr0_uv_param_7,
.param .u32 Subsample_Nearest_rgb0_bgr0_uv_param_8,
.param .u32 Subsample_Nearest_rgb0_bgr0_uv_param_9,
.param .u32 Subsample_Nearest_rgb0_bgr0_uv_param_10,
.param .u32 Subsample_Nearest_rgb0_bgr0_uv_param_11,
.param .u32 Subsample_Nearest_rgb0_bgr0_uv_param_12,
.param .f32 Subsample_Nearest_rgb0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv420p
.visible .entry Subsample_Bilinear_yuv420p_yuv420p(
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB80_2;
bra.uni $L__BB80_1;
$L__BB80_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB80_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv420p_uv
.visible .entry Subsample_Bilinear_yuv420p_yuv420p_uv(
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB81_2;
bra.uni $L__BB81_1;
$L__BB81_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv420p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB81_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv420p
.visible .entry Subsample_Bilinear_nv12_yuv420p(
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv420p_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv420p_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv420p_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv420p_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv420p_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv420p_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB82_2;
bra.uni $L__BB82_1;
$L__BB82_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB82_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv420p_uv
.visible .entry Subsample_Bilinear_nv12_yuv420p_uv(
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB83_2;
bra.uni $L__BB83_1;
$L__BB83_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_nv12_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_nv12_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB83_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv420p
.visible .entry Subsample_Bilinear_yuv444p_yuv420p(
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB84_2;
bra.uni $L__BB84_1;
$L__BB84_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB84_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv420p_uv
.visible .entry Subsample_Bilinear_yuv444p_yuv420p_uv(
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB85_2;
bra.uni $L__BB85_1;
$L__BB85_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB85_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv420p
.visible .entry Subsample_Bilinear_p010le_yuv420p(
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv420p_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv420p_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv420p_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv420p_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv420p_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv420p_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB86_2;
bra.uni $L__BB86_1;
$L__BB86_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB86_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv420p_uv
.visible .entry Subsample_Bilinear_p010le_yuv420p_uv(
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB87_2;
bra.uni $L__BB87_1;
$L__BB87_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p010le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p010le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
shr.u32 %r50, %r48, 10;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB87_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv420p
.visible .entry Subsample_Bilinear_p016le_yuv420p(
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv420p_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv420p_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv420p_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv420p_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv420p_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv420p_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB88_2;
bra.uni $L__BB88_1;
$L__BB88_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB88_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv420p_uv
.visible .entry Subsample_Bilinear_p016le_yuv420p_uv(
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB89_2;
bra.uni $L__BB89_1;
$L__BB89_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p016le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p016le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
shr.u32 %r50, %r48, 10;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB89_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv420p
.visible .entry Subsample_Bilinear_yuv444p16le_yuv420p(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv420p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB90_2;
bra.uni $L__BB90_1;
$L__BB90_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB90_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv420p_uv
.visible .entry Subsample_Bilinear_yuv444p16le_yuv420p_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB91_2;
bra.uni $L__BB91_1;
$L__BB91_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p16le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 10;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 10;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB91_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_nv12
.visible .entry Subsample_Bilinear_yuv420p_nv12(
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_0,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_1,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_2,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_3,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_4,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_5,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_6,
.param .u64 Subsample_Bilinear_yuv420p_nv12_param_7,
.param .u32 Subsample_Bilinear_yuv420p_nv12_param_8,
.param .u32 Subsample_Bilinear_yuv420p_nv12_param_9,
.param .u32 Subsample_Bilinear_yuv420p_nv12_param_10,
.param .u32 Subsample_Bilinear_yuv420p_nv12_param_11,
.param .u32 Subsample_Bilinear_yuv420p_nv12_param_12,
.param .f32 Subsample_Bilinear_yuv420p_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB92_2;
bra.uni $L__BB92_1;
$L__BB92_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB92_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_nv12_uv
.visible .entry Subsample_Bilinear_yuv420p_nv12_uv(
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB93_2;
bra.uni $L__BB93_1;
$L__BB93_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_nv12_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv420p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
cvt.u16.u32 %rs1, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
cvt.u16.u32 %rs2, %r66;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 1;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 1;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u8 [%rd20], {%rs1, %rs2};
$L__BB93_2:
ret;
}
// .globl Subsample_Bilinear_nv12_nv12
.visible .entry Subsample_Bilinear_nv12_nv12(
.param .u64 Subsample_Bilinear_nv12_nv12_param_0,
.param .u64 Subsample_Bilinear_nv12_nv12_param_1,
.param .u64 Subsample_Bilinear_nv12_nv12_param_2,
.param .u64 Subsample_Bilinear_nv12_nv12_param_3,
.param .u64 Subsample_Bilinear_nv12_nv12_param_4,
.param .u64 Subsample_Bilinear_nv12_nv12_param_5,
.param .u64 Subsample_Bilinear_nv12_nv12_param_6,
.param .u64 Subsample_Bilinear_nv12_nv12_param_7,
.param .u32 Subsample_Bilinear_nv12_nv12_param_8,
.param .u32 Subsample_Bilinear_nv12_nv12_param_9,
.param .u32 Subsample_Bilinear_nv12_nv12_param_10,
.param .u32 Subsample_Bilinear_nv12_nv12_param_11,
.param .u32 Subsample_Bilinear_nv12_nv12_param_12,
.param .f32 Subsample_Bilinear_nv12_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB94_2;
bra.uni $L__BB94_1;
$L__BB94_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB94_2:
ret;
}
// .globl Subsample_Bilinear_nv12_nv12_uv
.visible .entry Subsample_Bilinear_nv12_nv12_uv(
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB95_2;
bra.uni $L__BB95_1;
$L__BB95_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u8 [%rd15], {%rs1, %rs2};
$L__BB95_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_nv12
.visible .entry Subsample_Bilinear_yuv444p_nv12(
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_0,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_1,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_2,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_3,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_4,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_5,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_6,
.param .u64 Subsample_Bilinear_yuv444p_nv12_param_7,
.param .u32 Subsample_Bilinear_yuv444p_nv12_param_8,
.param .u32 Subsample_Bilinear_yuv444p_nv12_param_9,
.param .u32 Subsample_Bilinear_yuv444p_nv12_param_10,
.param .u32 Subsample_Bilinear_yuv444p_nv12_param_11,
.param .u32 Subsample_Bilinear_yuv444p_nv12_param_12,
.param .f32 Subsample_Bilinear_yuv444p_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB96_2;
bra.uni $L__BB96_1;
$L__BB96_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB96_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_nv12_uv
.visible .entry Subsample_Bilinear_yuv444p_nv12_uv(
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB97_2;
bra.uni $L__BB97_1;
$L__BB97_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_nv12_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
cvt.u16.u32 %rs1, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
cvt.u16.u32 %rs2, %r66;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 1;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 1;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u8 [%rd20], {%rs1, %rs2};
$L__BB97_2:
ret;
}
// .globl Subsample_Bilinear_p010le_nv12
.visible .entry Subsample_Bilinear_p010le_nv12(
.param .u64 Subsample_Bilinear_p010le_nv12_param_0,
.param .u64 Subsample_Bilinear_p010le_nv12_param_1,
.param .u64 Subsample_Bilinear_p010le_nv12_param_2,
.param .u64 Subsample_Bilinear_p010le_nv12_param_3,
.param .u64 Subsample_Bilinear_p010le_nv12_param_4,
.param .u64 Subsample_Bilinear_p010le_nv12_param_5,
.param .u64 Subsample_Bilinear_p010le_nv12_param_6,
.param .u64 Subsample_Bilinear_p010le_nv12_param_7,
.param .u32 Subsample_Bilinear_p010le_nv12_param_8,
.param .u32 Subsample_Bilinear_p010le_nv12_param_9,
.param .u32 Subsample_Bilinear_p010le_nv12_param_10,
.param .u32 Subsample_Bilinear_p010le_nv12_param_11,
.param .u32 Subsample_Bilinear_p010le_nv12_param_12,
.param .f32 Subsample_Bilinear_p010le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB98_2;
bra.uni $L__BB98_1;
$L__BB98_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB98_2:
ret;
}
// .globl Subsample_Bilinear_p010le_nv12_uv
.visible .entry Subsample_Bilinear_p010le_nv12_uv(
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB99_2;
bra.uni $L__BB99_1;
$L__BB99_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
cvt.u16.u32 %rs1, %r49;
shr.u32 %r50, %r48, 10;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u8 [%rd15], {%rs1, %rs2};
$L__BB99_2:
ret;
}
// .globl Subsample_Bilinear_p016le_nv12
.visible .entry Subsample_Bilinear_p016le_nv12(
.param .u64 Subsample_Bilinear_p016le_nv12_param_0,
.param .u64 Subsample_Bilinear_p016le_nv12_param_1,
.param .u64 Subsample_Bilinear_p016le_nv12_param_2,
.param .u64 Subsample_Bilinear_p016le_nv12_param_3,
.param .u64 Subsample_Bilinear_p016le_nv12_param_4,
.param .u64 Subsample_Bilinear_p016le_nv12_param_5,
.param .u64 Subsample_Bilinear_p016le_nv12_param_6,
.param .u64 Subsample_Bilinear_p016le_nv12_param_7,
.param .u32 Subsample_Bilinear_p016le_nv12_param_8,
.param .u32 Subsample_Bilinear_p016le_nv12_param_9,
.param .u32 Subsample_Bilinear_p016le_nv12_param_10,
.param .u32 Subsample_Bilinear_p016le_nv12_param_11,
.param .u32 Subsample_Bilinear_p016le_nv12_param_12,
.param .f32 Subsample_Bilinear_p016le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB100_2;
bra.uni $L__BB100_1;
$L__BB100_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB100_2:
ret;
}
// .globl Subsample_Bilinear_p016le_nv12_uv
.visible .entry Subsample_Bilinear_p016le_nv12_uv(
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB101_2;
bra.uni $L__BB101_1;
$L__BB101_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
cvt.u16.u32 %rs1, %r49;
shr.u32 %r50, %r48, 10;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u8 [%rd15], {%rs1, %rs2};
$L__BB101_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_nv12
.visible .entry Subsample_Bilinear_yuv444p16le_nv12(
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_nv12_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB102_2;
bra.uni $L__BB102_1;
$L__BB102_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB102_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_nv12_uv
.visible .entry Subsample_Bilinear_yuv444p16le_nv12_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_nv12_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_nv12_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB103_2;
bra.uni $L__BB103_1;
$L__BB103_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 10;
cvt.u16.u32 %rs1, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 10;
cvt.u16.u32 %rs2, %r66;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 1;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 1;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u8 [%rd20], {%rs1, %rs2};
$L__BB103_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv444p
.visible .entry Subsample_Bilinear_yuv420p_yuv444p(
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB104_2;
bra.uni $L__BB104_1;
$L__BB104_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB104_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv444p_uv
.visible .entry Subsample_Bilinear_yuv420p_yuv444p_uv(
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB105_2;
bra.uni $L__BB105_1;
$L__BB105_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv420p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB105_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv444p
.visible .entry Subsample_Bilinear_nv12_yuv444p(
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv444p_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv444p_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv444p_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv444p_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv444p_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv444p_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB106_2;
bra.uni $L__BB106_1;
$L__BB106_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB106_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv444p_uv
.visible .entry Subsample_Bilinear_nv12_yuv444p_uv(
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB107_2;
bra.uni $L__BB107_1;
$L__BB107_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_nv12_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_nv12_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB107_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv444p
.visible .entry Subsample_Bilinear_yuv444p_yuv444p(
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB108_2;
bra.uni $L__BB108_1;
$L__BB108_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB108_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv444p_uv
.visible .entry Subsample_Bilinear_yuv444p_yuv444p_uv(
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB109_2;
bra.uni $L__BB109_1;
$L__BB109_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 255;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 255;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB109_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv444p
.visible .entry Subsample_Bilinear_p010le_yuv444p(
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv444p_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv444p_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv444p_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv444p_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv444p_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv444p_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB110_2;
bra.uni $L__BB110_1;
$L__BB110_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB110_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv444p_uv
.visible .entry Subsample_Bilinear_p010le_yuv444p_uv(
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB111_2;
bra.uni $L__BB111_1;
$L__BB111_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p010le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p010le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
shr.u32 %r50, %r48, 10;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB111_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv444p
.visible .entry Subsample_Bilinear_p016le_yuv444p(
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv444p_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv444p_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv444p_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv444p_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv444p_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv444p_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB112_2;
bra.uni $L__BB112_1;
$L__BB112_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB112_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv444p_uv
.visible .entry Subsample_Bilinear_p016le_yuv444p_uv(
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<15>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB113_2;
bra.uni $L__BB113_1;
$L__BB113_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p016le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p016le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 10;
mul.wide.s32 %rd10, %r2, %r5;
cvt.s64.s32 %rd11, %r1;
add.s64 %rd12, %rd10, %rd11;
add.s64 %rd13, %rd2, %rd12;
st.global.u8 [%rd13], %r49;
shr.u32 %r50, %r48, 10;
add.s64 %rd14, %rd1, %rd12;
st.global.u8 [%rd14], %r50;
$L__BB113_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv444p
.visible .entry Subsample_Bilinear_yuv444p16le_yuv444p(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv444p_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB114_2;
bra.uni $L__BB114_1;
$L__BB114_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 10;
mul.wide.s32 %rd8, %r2, %r5;
cvt.s64.s32 %rd9, %r1;
add.s64 %rd10, %rd8, %rd9;
add.s64 %rd11, %rd1, %rd10;
st.global.u8 [%rd11], %r41;
$L__BB114_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv444p_uv
.visible .entry Subsample_Bilinear_yuv444p16le_yuv444p_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<20>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB115_2;
bra.uni $L__BB115_1;
$L__BB115_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p16le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 10;
mul.wide.s32 %rd15, %r2, %r5;
cvt.s64.s32 %rd16, %r1;
add.s64 %rd17, %rd15, %rd16;
add.s64 %rd18, %rd2, %rd17;
st.global.u8 [%rd18], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 10;
add.s64 %rd19, %rd1, %rd17;
st.global.u8 [%rd19], %r66;
$L__BB115_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_p010le
.visible .entry Subsample_Bilinear_yuv420p_p010le(
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_0,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_1,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_2,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_3,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_4,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_5,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_6,
.param .u64 Subsample_Bilinear_yuv420p_p010le_param_7,
.param .u32 Subsample_Bilinear_yuv420p_p010le_param_8,
.param .u32 Subsample_Bilinear_yuv420p_p010le_param_9,
.param .u32 Subsample_Bilinear_yuv420p_p010le_param_10,
.param .u32 Subsample_Bilinear_yuv420p_p010le_param_11,
.param .u32 Subsample_Bilinear_yuv420p_p010le_param_12,
.param .f32 Subsample_Bilinear_yuv420p_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<6>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB116_2;
bra.uni $L__BB116_1;
$L__BB116_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs5;
$L__BB116_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_p010le_uv
.visible .entry Subsample_Bilinear_yuv420p_p010le_uv(
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<11>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB117_2;
bra.uni $L__BB117_1;
$L__BB117_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_p010le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv420p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs6, %r62;
add.s16 %rs7, %rs6, 2;
shr.u16 %rs8, %rs7, 2;
mul.lo.s16 %rs9, %rs8, 257;
and.b16 %rs10, %rs9, -64;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs5, %rs10};
$L__BB117_2:
ret;
}
// .globl Subsample_Bilinear_nv12_p010le
.visible .entry Subsample_Bilinear_nv12_p010le(
.param .u64 Subsample_Bilinear_nv12_p010le_param_0,
.param .u64 Subsample_Bilinear_nv12_p010le_param_1,
.param .u64 Subsample_Bilinear_nv12_p010le_param_2,
.param .u64 Subsample_Bilinear_nv12_p010le_param_3,
.param .u64 Subsample_Bilinear_nv12_p010le_param_4,
.param .u64 Subsample_Bilinear_nv12_p010le_param_5,
.param .u64 Subsample_Bilinear_nv12_p010le_param_6,
.param .u64 Subsample_Bilinear_nv12_p010le_param_7,
.param .u32 Subsample_Bilinear_nv12_p010le_param_8,
.param .u32 Subsample_Bilinear_nv12_p010le_param_9,
.param .u32 Subsample_Bilinear_nv12_p010le_param_10,
.param .u32 Subsample_Bilinear_nv12_p010le_param_11,
.param .u32 Subsample_Bilinear_nv12_p010le_param_12,
.param .f32 Subsample_Bilinear_nv12_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<6>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB118_2;
bra.uni $L__BB118_1;
$L__BB118_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs5;
$L__BB118_2:
ret;
}
// .globl Subsample_Bilinear_nv12_p010le_uv
.visible .entry Subsample_Bilinear_nv12_p010le_uv(
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<11>;
.reg .b32 %r<47>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB119_2;
bra.uni $L__BB119_1;
$L__BB119_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
cvt.u16.u32 %rs1, %r44;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
cvt.u16.u32 %rs4, %r46;
add.s16 %rs5, %rs4, 2;
shr.u16 %rs6, %rs5, 2;
mul.lo.s16 %rs7, %rs3, 257;
and.b16 %rs8, %rs7, -64;
mul.lo.s16 %rs9, %rs6, 257;
and.b16 %rs10, %rs9, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs8, %rs10};
$L__BB119_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_p010le
.visible .entry Subsample_Bilinear_yuv444p_p010le(
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_0,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_1,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_2,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_3,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_4,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_5,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_6,
.param .u64 Subsample_Bilinear_yuv444p_p010le_param_7,
.param .u32 Subsample_Bilinear_yuv444p_p010le_param_8,
.param .u32 Subsample_Bilinear_yuv444p_p010le_param_9,
.param .u32 Subsample_Bilinear_yuv444p_p010le_param_10,
.param .u32 Subsample_Bilinear_yuv444p_p010le_param_11,
.param .u32 Subsample_Bilinear_yuv444p_p010le_param_12,
.param .f32 Subsample_Bilinear_yuv444p_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<6>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB120_2;
bra.uni $L__BB120_1;
$L__BB120_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs5;
$L__BB120_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_p010le_uv
.visible .entry Subsample_Bilinear_yuv444p_p010le_uv(
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<11>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB121_2;
bra.uni $L__BB121_1;
$L__BB121_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_p010le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
and.b16 %rs5, %rs4, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs6, %r62;
add.s16 %rs7, %rs6, 2;
shr.u16 %rs8, %rs7, 2;
mul.lo.s16 %rs9, %rs8, 257;
and.b16 %rs10, %rs9, -64;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs5, %rs10};
$L__BB121_2:
ret;
}
// .globl Subsample_Bilinear_p010le_p010le
.visible .entry Subsample_Bilinear_p010le_p010le(
.param .u64 Subsample_Bilinear_p010le_p010le_param_0,
.param .u64 Subsample_Bilinear_p010le_p010le_param_1,
.param .u64 Subsample_Bilinear_p010le_p010le_param_2,
.param .u64 Subsample_Bilinear_p010le_p010le_param_3,
.param .u64 Subsample_Bilinear_p010le_p010le_param_4,
.param .u64 Subsample_Bilinear_p010le_p010le_param_5,
.param .u64 Subsample_Bilinear_p010le_p010le_param_6,
.param .u64 Subsample_Bilinear_p010le_p010le_param_7,
.param .u32 Subsample_Bilinear_p010le_p010le_param_8,
.param .u32 Subsample_Bilinear_p010le_p010le_param_9,
.param .u32 Subsample_Bilinear_p010le_p010le_param_10,
.param .u32 Subsample_Bilinear_p010le_p010le_param_11,
.param .u32 Subsample_Bilinear_p010le_p010le_param_12,
.param .f32 Subsample_Bilinear_p010le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB122_2;
bra.uni $L__BB122_1;
$L__BB122_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
$L__BB122_2:
ret;
}
// .globl Subsample_Bilinear_p010le_p010le_uv
.visible .entry Subsample_Bilinear_p010le_p010le_uv(
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB123_2;
bra.uni $L__BB123_1;
$L__BB123_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs1, %rs2};
$L__BB123_2:
ret;
}
// .globl Subsample_Bilinear_p016le_p010le
.visible .entry Subsample_Bilinear_p016le_p010le(
.param .u64 Subsample_Bilinear_p016le_p010le_param_0,
.param .u64 Subsample_Bilinear_p016le_p010le_param_1,
.param .u64 Subsample_Bilinear_p016le_p010le_param_2,
.param .u64 Subsample_Bilinear_p016le_p010le_param_3,
.param .u64 Subsample_Bilinear_p016le_p010le_param_4,
.param .u64 Subsample_Bilinear_p016le_p010le_param_5,
.param .u64 Subsample_Bilinear_p016le_p010le_param_6,
.param .u64 Subsample_Bilinear_p016le_p010le_param_7,
.param .u32 Subsample_Bilinear_p016le_p010le_param_8,
.param .u32 Subsample_Bilinear_p016le_p010le_param_9,
.param .u32 Subsample_Bilinear_p016le_p010le_param_10,
.param .u32 Subsample_Bilinear_p016le_p010le_param_11,
.param .u32 Subsample_Bilinear_p016le_p010le_param_12,
.param .f32 Subsample_Bilinear_p016le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB124_2;
bra.uni $L__BB124_1;
$L__BB124_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.u16.u32 %rs1, %r41;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs2;
$L__BB124_2:
ret;
}
// .globl Subsample_Bilinear_p016le_p010le_uv
.visible .entry Subsample_Bilinear_p016le_p010le_uv(
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB125_2;
bra.uni $L__BB125_1;
$L__BB125_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
and.b16 %rs3, %rs1, -64;
and.b16 %rs4, %rs2, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs3, %rs4};
$L__BB125_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_p010le
.visible .entry Subsample_Bilinear_yuv444p16le_p010le(
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_p010le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB126_2;
bra.uni $L__BB126_1;
$L__BB126_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.u16.u32 %rs1, %r41;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs2;
$L__BB126_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_p010le_uv
.visible .entry Subsample_Bilinear_yuv444p16le_p010le_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_p010le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_p010le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB127_2;
bra.uni $L__BB127_1;
$L__BB127_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
cvt.u16.u32 %rs1, %r57;
and.b16 %rs2, %rs1, -64;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
cvt.u16.u32 %rs3, %r66;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs2, %rs4};
$L__BB127_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_p016le
.visible .entry Subsample_Bilinear_yuv420p_p016le(
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_0,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_1,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_2,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_3,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_4,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_5,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_6,
.param .u64 Subsample_Bilinear_yuv420p_p016le_param_7,
.param .u32 Subsample_Bilinear_yuv420p_p016le_param_8,
.param .u32 Subsample_Bilinear_yuv420p_p016le_param_9,
.param .u32 Subsample_Bilinear_yuv420p_p016le_param_10,
.param .u32 Subsample_Bilinear_yuv420p_p016le_param_11,
.param .u32 Subsample_Bilinear_yuv420p_p016le_param_12,
.param .f32 Subsample_Bilinear_yuv420p_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB128_2;
bra.uni $L__BB128_1;
$L__BB128_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB128_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_p016le_uv
.visible .entry Subsample_Bilinear_yuv420p_p016le_uv(
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB129_2;
bra.uni $L__BB129_1;
$L__BB129_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_p016le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv420p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs5, %r62;
add.s16 %rs6, %rs5, 2;
shr.u16 %rs7, %rs6, 2;
mul.lo.s16 %rs8, %rs7, 257;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs4, %rs8};
$L__BB129_2:
ret;
}
// .globl Subsample_Bilinear_nv12_p016le
.visible .entry Subsample_Bilinear_nv12_p016le(
.param .u64 Subsample_Bilinear_nv12_p016le_param_0,
.param .u64 Subsample_Bilinear_nv12_p016le_param_1,
.param .u64 Subsample_Bilinear_nv12_p016le_param_2,
.param .u64 Subsample_Bilinear_nv12_p016le_param_3,
.param .u64 Subsample_Bilinear_nv12_p016le_param_4,
.param .u64 Subsample_Bilinear_nv12_p016le_param_5,
.param .u64 Subsample_Bilinear_nv12_p016le_param_6,
.param .u64 Subsample_Bilinear_nv12_p016le_param_7,
.param .u32 Subsample_Bilinear_nv12_p016le_param_8,
.param .u32 Subsample_Bilinear_nv12_p016le_param_9,
.param .u32 Subsample_Bilinear_nv12_p016le_param_10,
.param .u32 Subsample_Bilinear_nv12_p016le_param_11,
.param .u32 Subsample_Bilinear_nv12_p016le_param_12,
.param .f32 Subsample_Bilinear_nv12_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB130_2;
bra.uni $L__BB130_1;
$L__BB130_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB130_2:
ret;
}
// .globl Subsample_Bilinear_nv12_p016le_uv
.visible .entry Subsample_Bilinear_nv12_p016le_uv(
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<47>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB131_2;
bra.uni $L__BB131_1;
$L__BB131_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
cvt.u16.u32 %rs1, %r44;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
cvt.u16.u32 %rs4, %r46;
add.s16 %rs5, %rs4, 2;
shr.u16 %rs6, %rs5, 2;
mul.lo.s16 %rs7, %rs3, 257;
mul.lo.s16 %rs8, %rs6, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs7, %rs8};
$L__BB131_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_p016le
.visible .entry Subsample_Bilinear_yuv444p_p016le(
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_0,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_1,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_2,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_3,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_4,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_5,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_6,
.param .u64 Subsample_Bilinear_yuv444p_p016le_param_7,
.param .u32 Subsample_Bilinear_yuv444p_p016le_param_8,
.param .u32 Subsample_Bilinear_yuv444p_p016le_param_9,
.param .u32 Subsample_Bilinear_yuv444p_p016le_param_10,
.param .u32 Subsample_Bilinear_yuv444p_p016le_param_11,
.param .u32 Subsample_Bilinear_yuv444p_p016le_param_12,
.param .f32 Subsample_Bilinear_yuv444p_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB132_2;
bra.uni $L__BB132_1;
$L__BB132_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB132_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_p016le_uv
.visible .entry Subsample_Bilinear_yuv444p_p016le_uv(
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB133_2;
bra.uni $L__BB133_1;
$L__BB133_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_p016le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs5, %r62;
add.s16 %rs6, %rs5, 2;
shr.u16 %rs7, %rs6, 2;
mul.lo.s16 %rs8, %rs7, 257;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs4, %rs8};
$L__BB133_2:
ret;
}
// .globl Subsample_Bilinear_p010le_p016le
.visible .entry Subsample_Bilinear_p010le_p016le(
.param .u64 Subsample_Bilinear_p010le_p016le_param_0,
.param .u64 Subsample_Bilinear_p010le_p016le_param_1,
.param .u64 Subsample_Bilinear_p010le_p016le_param_2,
.param .u64 Subsample_Bilinear_p010le_p016le_param_3,
.param .u64 Subsample_Bilinear_p010le_p016le_param_4,
.param .u64 Subsample_Bilinear_p010le_p016le_param_5,
.param .u64 Subsample_Bilinear_p010le_p016le_param_6,
.param .u64 Subsample_Bilinear_p010le_p016le_param_7,
.param .u32 Subsample_Bilinear_p010le_p016le_param_8,
.param .u32 Subsample_Bilinear_p010le_p016le_param_9,
.param .u32 Subsample_Bilinear_p010le_p016le_param_10,
.param .u32 Subsample_Bilinear_p010le_p016le_param_11,
.param .u32 Subsample_Bilinear_p010le_p016le_param_12,
.param .f32 Subsample_Bilinear_p010le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB134_2;
bra.uni $L__BB134_1;
$L__BB134_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.u16.u32 %rs1, %r41;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs3;
$L__BB134_2:
ret;
}
// .globl Subsample_Bilinear_p010le_p016le_uv
.visible .entry Subsample_Bilinear_p010le_p016le_uv(
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB135_2;
bra.uni $L__BB135_1;
$L__BB135_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs4, %rs6};
$L__BB135_2:
ret;
}
// .globl Subsample_Bilinear_p016le_p016le
.visible .entry Subsample_Bilinear_p016le_p016le(
.param .u64 Subsample_Bilinear_p016le_p016le_param_0,
.param .u64 Subsample_Bilinear_p016le_p016le_param_1,
.param .u64 Subsample_Bilinear_p016le_p016le_param_2,
.param .u64 Subsample_Bilinear_p016le_p016le_param_3,
.param .u64 Subsample_Bilinear_p016le_p016le_param_4,
.param .u64 Subsample_Bilinear_p016le_p016le_param_5,
.param .u64 Subsample_Bilinear_p016le_p016le_param_6,
.param .u64 Subsample_Bilinear_p016le_p016le_param_7,
.param .u32 Subsample_Bilinear_p016le_p016le_param_8,
.param .u32 Subsample_Bilinear_p016le_p016le_param_9,
.param .u32 Subsample_Bilinear_p016le_p016le_param_10,
.param .u32 Subsample_Bilinear_p016le_p016le_param_11,
.param .u32 Subsample_Bilinear_p016le_p016le_param_12,
.param .f32 Subsample_Bilinear_p016le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB136_2;
bra.uni $L__BB136_1;
$L__BB136_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
$L__BB136_2:
ret;
}
// .globl Subsample_Bilinear_p016le_p016le_uv
.visible .entry Subsample_Bilinear_p016le_p016le_uv(
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB137_2;
bra.uni $L__BB137_1;
$L__BB137_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v2.u16 [%rd15], {%rs1, %rs2};
$L__BB137_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_p016le
.visible .entry Subsample_Bilinear_yuv444p16le_p016le(
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_p016le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB138_2;
bra.uni $L__BB138_1;
$L__BB138_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
$L__BB138_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_p016le_uv
.visible .entry Subsample_Bilinear_yuv444p16le_p016le_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_p016le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_p016le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<3>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<21>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB139_2;
bra.uni $L__BB139_1;
$L__BB139_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_10];
ld.param.u64 %rd9, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
cvt.u16.u32 %rs1, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd9, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd9, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd9, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd9, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
cvt.u16.u32 %rs2, %r66;
cvt.s64.s32 %rd13, %r2;
cvt.s64.s32 %rd14, %r5;
shr.u64 %rd15, %rd14, 2;
mul.lo.s64 %rd16, %rd15, %rd13;
cvt.s64.s32 %rd17, %r1;
add.s64 %rd18, %rd16, %rd17;
shl.b64 %rd19, %rd18, 2;
add.s64 %rd20, %rd1, %rd19;
st.global.v2.u16 [%rd20], {%rs1, %rs2};
$L__BB139_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv444p16le
.visible .entry Subsample_Bilinear_yuv420p_yuv444p16le(
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB140_2;
bra.uni $L__BB140_1;
$L__BB140_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv420p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv420p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB140_2:
ret;
}
// .globl Subsample_Bilinear_yuv420p_yuv444p16le_uv
.visible .entry Subsample_Bilinear_yuv420p_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB141_2;
bra.uni $L__BB141_1;
$L__BB141_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv420p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd15, %r2;
cvt.s64.s32 %rd16, %r5;
shr.u64 %rd17, %rd16, 1;
mul.lo.s64 %rd18, %rd17, %rd15;
cvt.s64.s32 %rd19, %r1;
add.s64 %rd20, %rd18, %rd19;
shl.b64 %rd21, %rd20, 1;
add.s64 %rd22, %rd2, %rd21;
st.global.u16 [%rd22], %rs4;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs5, %r62;
add.s16 %rs6, %rs5, 2;
shr.u16 %rs7, %rs6, 2;
mul.lo.s16 %rs8, %rs7, 257;
add.s64 %rd23, %rd1, %rd21;
st.global.u16 [%rd23], %rs8;
$L__BB141_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv444p16le
.visible .entry Subsample_Bilinear_nv12_yuv444p16le(
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB142_2;
bra.uni $L__BB142_1;
$L__BB142_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_nv12_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB142_2:
ret;
}
// .globl Subsample_Bilinear_nv12_yuv444p16le_uv
.visible .entry Subsample_Bilinear_nv12_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_nv12_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_nv12_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<47>;
.reg .f32 %f<33>;
.reg .b64 %rd<19>;
ld.param.u32 %r4, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB143_2;
bra.uni $L__BB143_1;
$L__BB143_1:
ld.param.u32 %r7, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_nv12_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 255;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 255;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 255;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 255;
add.s32 %r46, %r42, %r45;
cvt.u16.u32 %rs1, %r44;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
cvt.u16.u32 %rs4, %r46;
add.s16 %rs5, %rs4, 2;
shr.u16 %rs6, %rs5, 2;
mul.lo.s16 %rs7, %rs3, 257;
cvt.s64.s32 %rd10, %r2;
cvt.s64.s32 %rd11, %r5;
shr.u64 %rd12, %rd11, 1;
mul.lo.s64 %rd13, %rd12, %rd10;
cvt.s64.s32 %rd14, %r1;
add.s64 %rd15, %rd13, %rd14;
shl.b64 %rd16, %rd15, 1;
add.s64 %rd17, %rd2, %rd16;
st.global.u16 [%rd17], %rs7;
mul.lo.s16 %rs8, %rs6, 257;
add.s64 %rd18, %rd1, %rd16;
st.global.u16 [%rd18], %rs8;
$L__BB143_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv444p16le
.visible .entry Subsample_Bilinear_yuv444p_yuv444p16le(
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<40>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB144_2;
bra.uni $L__BB144_1;
$L__BB144_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 255;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 255;
add.s32 %r39, %r37, %r38;
cvt.u16.u32 %rs1, %r39;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs4;
$L__BB144_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p_yuv444p16le_uv
.visible .entry Subsample_Bilinear_yuv444p_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<63>;
.reg .f32 %f<41>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB145_2;
bra.uni $L__BB145_1;
$L__BB145_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 255;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 255;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 255;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 255;
add.s32 %r55, %r53, %r54;
cvt.u16.u32 %rs1, %r55;
add.s16 %rs2, %rs1, 2;
shr.u16 %rs3, %rs2, 2;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd15, %r2;
cvt.s64.s32 %rd16, %r5;
shr.u64 %rd17, %rd16, 1;
mul.lo.s64 %rd18, %rd17, %rd15;
cvt.s64.s32 %rd19, %r1;
add.s64 %rd20, %rd18, %rd19;
shl.b64 %rd21, %rd20, 1;
add.s64 %rd22, %rd2, %rd21;
st.global.u16 [%rd22], %rs4;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r56, %r33, 255;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r57, %r37, 255;
add.s32 %r58, %r56, %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r59, %r41, 255;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r61, %r45, 255;
add.s32 %r62, %r60, %r61;
cvt.u16.u32 %rs5, %r62;
add.s16 %rs6, %rs5, 2;
shr.u16 %rs7, %rs6, 2;
mul.lo.s16 %rs8, %rs7, 257;
add.s64 %rd23, %rd1, %rd21;
st.global.u16 [%rd23], %rs8;
$L__BB145_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv444p16le
.visible .entry Subsample_Bilinear_p010le_yuv444p16le(
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB146_2;
bra.uni $L__BB146_1;
$L__BB146_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p010le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.u16.u32 %rs1, %r41;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %rs3;
$L__BB146_2:
ret;
}
// .globl Subsample_Bilinear_p010le_yuv444p16le_uv
.visible .entry Subsample_Bilinear_p010le_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_p010le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_p010le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<7>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<19>;
ld.param.u32 %r4, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB147_2;
bra.uni $L__BB147_1;
$L__BB147_1:
ld.param.u32 %r7, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p010le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.u16.u32 %rs1, %r49;
cvt.u16.u32 %rs2, %r50;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
cvt.s64.s32 %rd10, %r2;
cvt.s64.s32 %rd11, %r5;
shr.u64 %rd12, %rd11, 1;
mul.lo.s64 %rd13, %rd12, %rd10;
cvt.s64.s32 %rd14, %r1;
add.s64 %rd15, %rd13, %rd14;
shl.b64 %rd16, %rd15, 1;
add.s64 %rd17, %rd2, %rd16;
st.global.u16 [%rd17], %rs4;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
add.s64 %rd18, %rd1, %rd16;
st.global.u16 [%rd18], %rs6;
$L__BB147_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv444p16le
.visible .entry Subsample_Bilinear_p016le_yuv444p16le(
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB148_2;
bra.uni $L__BB148_1;
$L__BB148_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_p016le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
$L__BB148_2:
ret;
}
// .globl Subsample_Bilinear_p016le_yuv444p16le_uv
.visible .entry Subsample_Bilinear_p016le_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_p016le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_p016le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<51>;
.reg .f32 %f<33>;
.reg .b64 %rd<19>;
ld.param.u32 %r4, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB149_2;
bra.uni $L__BB149_1;
$L__BB149_1:
ld.param.u32 %r7, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bilinear_p016le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 65535;
and.b32 %r37, %r22, 65535;
add.s32 %r38, %r36, %r37;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f5, %f8}];
// end inline asm
and.b32 %r39, %r25, 65535;
add.s32 %r40, %r35, %r39;
and.b32 %r41, %r26, 65535;
add.s32 %r42, %r38, %r41;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f7, %f8}];
// end inline asm
and.b32 %r43, %r29, 65535;
add.s32 %r44, %r40, %r43;
and.b32 %r45, %r30, 65535;
add.s32 %r46, %r42, %r45;
add.s32 %r47, %r44, 2;
add.s32 %r48, %r46, 2;
shr.u32 %r49, %r47, 2;
shr.u32 %r50, %r48, 2;
cvt.s64.s32 %rd10, %r2;
cvt.s64.s32 %rd11, %r5;
shr.u64 %rd12, %rd11, 1;
mul.lo.s64 %rd13, %rd12, %rd10;
cvt.s64.s32 %rd14, %r1;
add.s64 %rd15, %rd13, %rd14;
shl.b64 %rd16, %rd15, 1;
add.s64 %rd17, %rd2, %rd16;
st.global.u16 [%rd17], %r49;
add.s64 %rd18, %rd1, %rd16;
st.global.u16 [%rd18], %r50;
$L__BB149_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv444p16le
.visible .entry Subsample_Bilinear_yuv444p16le_yuv444p16le(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv444p16le_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<42>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB150_2;
bra.uni $L__BB150_1;
$L__BB150_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_yuv444p16le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 65535;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 65535;
add.s32 %r35, %r33, %r34;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r36, %r25, 65535;
add.s32 %r37, %r35, %r36;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r38, %r29, 65535;
add.s32 %r39, %r37, %r38;
add.s32 %r40, %r39, 2;
shr.u32 %r41, %r40, 2;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 1;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 1;
add.s64 %rd15, %rd1, %rd14;
st.global.u16 [%rd15], %r41;
$L__BB150_2:
ret;
}
// .globl Subsample_Bilinear_yuv444p16le_yuv444p16le_uv
.visible .entry Subsample_Bilinear_yuv444p16le_yuv444p16le_uv(
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<4>;
.reg .b32 %r<67>;
.reg .f32 %f<41>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB151_2;
bra.uni $L__BB151_1;
$L__BB151_1:
ld.param.u32 %r7, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_10];
ld.param.u64 %rd11, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bilinear_yuv444p16le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f17, %r6;
cvt.rn.f32.s32 %f18, %r3;
div.rn.f32 %f19, %f17, %f18;
cvt.rn.f32.s32 %f20, %r7;
cvt.rn.f32.s32 %f21, %r4;
div.rn.f32 %f22, %f20, %f21;
add.f32 %f23, %f19, 0fBF800000;
mul.f32 %f24, %f23, 0f3F000000;
max.f32 %f25, %f24, 0f00000000;
min.f32 %f26, %f25, 0f3F800000;
add.f32 %f27, %f22, 0fBF800000;
mul.f32 %f28, %f27, 0f3F000000;
max.f32 %f29, %f28, 0f00000000;
min.f32 %f30, %f29, 0f3F800000;
cvt.rn.f32.s32 %f31, %r2;
add.f32 %f32, %f31, 0f3F000000;
cvt.rn.f32.s32 %f33, %r1;
add.f32 %f34, %f33, 0f3F000000;
add.f32 %f35, %f26, 0f3F000000;
div.rn.f32 %f36, %f26, %f35;
add.f32 %f37, %f30, 0f3F000000;
div.rn.f32 %f38, %f30, %f37;
neg.f32 %f39, %f36;
fma.rn.f32 %f5, %f19, %f34, %f39;
neg.f32 %f40, %f38;
fma.rn.f32 %f4, %f22, %f32, %f40;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f5, %f4}];
// end inline asm
and.b32 %r49, %r17, 65535;
fma.rn.f32 %f7, %f19, %f34, %f36;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f7, %f4}];
// end inline asm
and.b32 %r50, %r21, 65535;
add.s32 %r51, %r49, %r50;
fma.rn.f32 %f8, %f22, %f32, %f38;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f5, %f8}];
// end inline asm
and.b32 %r52, %r25, 65535;
add.s32 %r53, %r51, %r52;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f7, %f8}];
// end inline asm
and.b32 %r54, %r29, 65535;
add.s32 %r55, %r53, %r54;
add.s32 %r56, %r55, 2;
shr.u32 %r57, %r56, 2;
cvt.s64.s32 %rd15, %r2;
cvt.s64.s32 %rd16, %r5;
shr.u64 %rd17, %rd16, 1;
mul.lo.s64 %rd18, %rd17, %rd15;
cvt.s64.s32 %rd19, %r1;
add.s64 %rd20, %rd18, %rd19;
shl.b64 %rd21, %rd20, 1;
add.s64 %rd22, %rd2, %rd21;
st.global.u16 [%rd22], %r57;
// begin inline asm
tex.2d.v4.u32.f32 {%r33, %r34, %r35, %r36}, [%rd11, {%f5, %f4}];
// end inline asm
and.b32 %r58, %r33, 65535;
// begin inline asm
tex.2d.v4.u32.f32 {%r37, %r38, %r39, %r40}, [%rd11, {%f7, %f4}];
// end inline asm
and.b32 %r59, %r37, 65535;
add.s32 %r60, %r58, %r59;
// begin inline asm
tex.2d.v4.u32.f32 {%r41, %r42, %r43, %r44}, [%rd11, {%f5, %f8}];
// end inline asm
and.b32 %r61, %r41, 65535;
add.s32 %r62, %r60, %r61;
// begin inline asm
tex.2d.v4.u32.f32 {%r45, %r46, %r47, %r48}, [%rd11, {%f7, %f8}];
// end inline asm
and.b32 %r63, %r45, 65535;
add.s32 %r64, %r62, %r63;
add.s32 %r65, %r64, 2;
shr.u32 %r66, %r65, 2;
add.s64 %rd23, %rd1, %rd21;
st.global.u16 [%rd23], %r66;
$L__BB151_2:
ret;
}
// .globl Subsample_Bilinear_bgr0_bgr0
.visible .entry Subsample_Bilinear_bgr0_bgr0(
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_0,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_1,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_2,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_3,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_4,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_5,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_6,
.param .u64 Subsample_Bilinear_bgr0_bgr0_param_7,
.param .u32 Subsample_Bilinear_bgr0_bgr0_param_8,
.param .u32 Subsample_Bilinear_bgr0_bgr0_param_9,
.param .u32 Subsample_Bilinear_bgr0_bgr0_param_10,
.param .u32 Subsample_Bilinear_bgr0_bgr0_param_11,
.param .u32 Subsample_Bilinear_bgr0_bgr0_param_12,
.param .f32 Subsample_Bilinear_bgr0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<69>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_bgr0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_bgr0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB152_2;
bra.uni $L__BB152_1;
$L__BB152_1:
ld.param.u32 %r7, [Subsample_Bilinear_bgr0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_bgr0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_bgr0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_bgr0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_bgr0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
and.b32 %r39, %r19, 255;
and.b32 %r40, %r23, 255;
add.s32 %r41, %r39, %r40;
and.b32 %r42, %r20, 255;
and.b32 %r43, %r24, 255;
add.s32 %r44, %r42, %r43;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r45, %r25, 255;
add.s32 %r46, %r35, %r45;
and.b32 %r47, %r26, 255;
add.s32 %r48, %r38, %r47;
and.b32 %r49, %r27, 255;
add.s32 %r50, %r41, %r49;
and.b32 %r51, %r28, 255;
add.s32 %r52, %r44, %r51;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r53, %r29, 255;
add.s32 %r54, %r46, %r53;
and.b32 %r55, %r30, 255;
add.s32 %r56, %r48, %r55;
and.b32 %r57, %r31, 255;
add.s32 %r58, %r50, %r57;
and.b32 %r59, %r32, 255;
add.s32 %r60, %r52, %r59;
add.s32 %r61, %r54, 2;
add.s32 %r62, %r56, 2;
add.s32 %r63, %r58, 2;
add.s32 %r64, %r60, 2;
shr.u32 %r65, %r61, 2;
shr.u32 %r66, %r62, 2;
shr.u32 %r67, %r63, 2;
shr.u32 %r68, %r64, 2;
cvt.u16.u32 %rs1, %r65;
cvt.u16.u32 %rs2, %r66;
cvt.u16.u32 %rs3, %r67;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs1, %rs2, %rs3, %rs4};
$L__BB152_2:
ret;
}
// .globl Subsample_Bilinear_bgr0_bgr0_uv
.visible .entry Subsample_Bilinear_bgr0_bgr0_uv(
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_0,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_1,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_2,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_3,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_4,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_5,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_6,
.param .u64 Subsample_Bilinear_bgr0_bgr0_uv_param_7,
.param .u32 Subsample_Bilinear_bgr0_bgr0_uv_param_8,
.param .u32 Subsample_Bilinear_bgr0_bgr0_uv_param_9,
.param .u32 Subsample_Bilinear_bgr0_bgr0_uv_param_10,
.param .u32 Subsample_Bilinear_bgr0_bgr0_uv_param_11,
.param .u32 Subsample_Bilinear_bgr0_bgr0_uv_param_12,
.param .f32 Subsample_Bilinear_bgr0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bilinear_rgb0_rgb0
.visible .entry Subsample_Bilinear_rgb0_rgb0(
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_0,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_1,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_2,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_3,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_4,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_5,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_6,
.param .u64 Subsample_Bilinear_rgb0_rgb0_param_7,
.param .u32 Subsample_Bilinear_rgb0_rgb0_param_8,
.param .u32 Subsample_Bilinear_rgb0_rgb0_param_9,
.param .u32 Subsample_Bilinear_rgb0_rgb0_param_10,
.param .u32 Subsample_Bilinear_rgb0_rgb0_param_11,
.param .u32 Subsample_Bilinear_rgb0_rgb0_param_12,
.param .f32 Subsample_Bilinear_rgb0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<69>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_rgb0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_rgb0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB154_2;
bra.uni $L__BB154_1;
$L__BB154_1:
ld.param.u32 %r7, [Subsample_Bilinear_rgb0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_rgb0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_rgb0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_rgb0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_rgb0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
and.b32 %r39, %r19, 255;
and.b32 %r40, %r23, 255;
add.s32 %r41, %r39, %r40;
and.b32 %r42, %r20, 255;
and.b32 %r43, %r24, 255;
add.s32 %r44, %r42, %r43;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r45, %r25, 255;
add.s32 %r46, %r35, %r45;
and.b32 %r47, %r26, 255;
add.s32 %r48, %r38, %r47;
and.b32 %r49, %r27, 255;
add.s32 %r50, %r41, %r49;
and.b32 %r51, %r28, 255;
add.s32 %r52, %r44, %r51;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r53, %r29, 255;
add.s32 %r54, %r46, %r53;
and.b32 %r55, %r30, 255;
add.s32 %r56, %r48, %r55;
and.b32 %r57, %r31, 255;
add.s32 %r58, %r50, %r57;
and.b32 %r59, %r32, 255;
add.s32 %r60, %r52, %r59;
add.s32 %r61, %r54, 2;
add.s32 %r62, %r56, 2;
add.s32 %r63, %r58, 2;
add.s32 %r64, %r60, 2;
shr.u32 %r65, %r61, 2;
shr.u32 %r66, %r62, 2;
shr.u32 %r67, %r63, 2;
shr.u32 %r68, %r64, 2;
cvt.u16.u32 %rs1, %r65;
cvt.u16.u32 %rs2, %r66;
cvt.u16.u32 %rs3, %r67;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs1, %rs2, %rs3, %rs4};
$L__BB154_2:
ret;
}
// .globl Subsample_Bilinear_rgb0_rgb0_uv
.visible .entry Subsample_Bilinear_rgb0_rgb0_uv(
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_0,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_1,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_2,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_3,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_4,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_5,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_6,
.param .u64 Subsample_Bilinear_rgb0_rgb0_uv_param_7,
.param .u32 Subsample_Bilinear_rgb0_rgb0_uv_param_8,
.param .u32 Subsample_Bilinear_rgb0_rgb0_uv_param_9,
.param .u32 Subsample_Bilinear_rgb0_rgb0_uv_param_10,
.param .u32 Subsample_Bilinear_rgb0_rgb0_uv_param_11,
.param .u32 Subsample_Bilinear_rgb0_rgb0_uv_param_12,
.param .f32 Subsample_Bilinear_rgb0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bilinear_bgr0_rgb0
.visible .entry Subsample_Bilinear_bgr0_rgb0(
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_0,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_1,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_2,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_3,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_4,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_5,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_6,
.param .u64 Subsample_Bilinear_bgr0_rgb0_param_7,
.param .u32 Subsample_Bilinear_bgr0_rgb0_param_8,
.param .u32 Subsample_Bilinear_bgr0_rgb0_param_9,
.param .u32 Subsample_Bilinear_bgr0_rgb0_param_10,
.param .u32 Subsample_Bilinear_bgr0_rgb0_param_11,
.param .u32 Subsample_Bilinear_bgr0_rgb0_param_12,
.param .f32 Subsample_Bilinear_bgr0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<69>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_bgr0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_bgr0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB156_2;
bra.uni $L__BB156_1;
$L__BB156_1:
ld.param.u32 %r7, [Subsample_Bilinear_bgr0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_bgr0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_bgr0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_bgr0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_bgr0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
and.b32 %r39, %r19, 255;
and.b32 %r40, %r23, 255;
add.s32 %r41, %r39, %r40;
and.b32 %r42, %r20, 255;
and.b32 %r43, %r24, 255;
add.s32 %r44, %r42, %r43;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r45, %r25, 255;
add.s32 %r46, %r35, %r45;
and.b32 %r47, %r26, 255;
add.s32 %r48, %r38, %r47;
and.b32 %r49, %r27, 255;
add.s32 %r50, %r41, %r49;
and.b32 %r51, %r28, 255;
add.s32 %r52, %r44, %r51;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r53, %r29, 255;
add.s32 %r54, %r46, %r53;
and.b32 %r55, %r30, 255;
add.s32 %r56, %r48, %r55;
and.b32 %r57, %r31, 255;
add.s32 %r58, %r50, %r57;
and.b32 %r59, %r32, 255;
add.s32 %r60, %r52, %r59;
add.s32 %r61, %r54, 2;
add.s32 %r62, %r56, 2;
add.s32 %r63, %r58, 2;
add.s32 %r64, %r60, 2;
shr.u32 %r65, %r61, 2;
shr.u32 %r66, %r62, 2;
shr.u32 %r67, %r63, 2;
shr.u32 %r68, %r64, 2;
cvt.u16.u32 %rs1, %r65;
cvt.u16.u32 %rs2, %r66;
cvt.u16.u32 %rs3, %r67;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs3, %rs2, %rs1, %rs4};
$L__BB156_2:
ret;
}
// .globl Subsample_Bilinear_bgr0_rgb0_uv
.visible .entry Subsample_Bilinear_bgr0_rgb0_uv(
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_0,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_1,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_2,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_3,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_4,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_5,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_6,
.param .u64 Subsample_Bilinear_bgr0_rgb0_uv_param_7,
.param .u32 Subsample_Bilinear_bgr0_rgb0_uv_param_8,
.param .u32 Subsample_Bilinear_bgr0_rgb0_uv_param_9,
.param .u32 Subsample_Bilinear_bgr0_rgb0_uv_param_10,
.param .u32 Subsample_Bilinear_bgr0_rgb0_uv_param_11,
.param .u32 Subsample_Bilinear_bgr0_rgb0_uv_param_12,
.param .f32 Subsample_Bilinear_bgr0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bilinear_rgb0_bgr0
.visible .entry Subsample_Bilinear_rgb0_bgr0(
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_0,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_1,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_2,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_3,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_4,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_5,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_6,
.param .u64 Subsample_Bilinear_rgb0_bgr0_param_7,
.param .u32 Subsample_Bilinear_rgb0_bgr0_param_8,
.param .u32 Subsample_Bilinear_rgb0_bgr0_param_9,
.param .u32 Subsample_Bilinear_rgb0_bgr0_param_10,
.param .u32 Subsample_Bilinear_rgb0_bgr0_param_11,
.param .u32 Subsample_Bilinear_rgb0_bgr0_param_12,
.param .f32 Subsample_Bilinear_rgb0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<5>;
.reg .b32 %r<69>;
.reg .f32 %f<33>;
.reg .b64 %rd<16>;
ld.param.u32 %r4, [Subsample_Bilinear_rgb0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Bilinear_rgb0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB158_2;
bra.uni $L__BB158_1;
$L__BB158_1:
ld.param.u32 %r7, [Subsample_Bilinear_rgb0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Bilinear_rgb0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Bilinear_rgb0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Bilinear_rgb0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Bilinear_rgb0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f9, %r6;
cvt.rn.f32.s32 %f10, %r3;
div.rn.f32 %f11, %f9, %f10;
cvt.rn.f32.s32 %f12, %r7;
cvt.rn.f32.s32 %f13, %r4;
div.rn.f32 %f14, %f12, %f13;
add.f32 %f15, %f11, 0fBF800000;
mul.f32 %f16, %f15, 0f3F000000;
max.f32 %f17, %f16, 0f00000000;
min.f32 %f18, %f17, 0f3F800000;
add.f32 %f19, %f14, 0fBF800000;
mul.f32 %f20, %f19, 0f3F000000;
max.f32 %f21, %f20, 0f00000000;
min.f32 %f22, %f21, 0f3F800000;
cvt.rn.f32.s32 %f23, %r2;
add.f32 %f24, %f23, 0f3F000000;
cvt.rn.f32.s32 %f25, %r1;
add.f32 %f26, %f25, 0f3F000000;
add.f32 %f27, %f18, 0f3F000000;
div.rn.f32 %f28, %f18, %f27;
add.f32 %f29, %f22, 0f3F000000;
div.rn.f32 %f30, %f22, %f29;
neg.f32 %f31, %f28;
fma.rn.f32 %f5, %f11, %f26, %f31;
neg.f32 %f32, %f30;
fma.rn.f32 %f4, %f14, %f24, %f32;
// begin inline asm
tex.2d.v4.u32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f5, %f4}];
// end inline asm
and.b32 %r33, %r17, 255;
fma.rn.f32 %f7, %f11, %f26, %f28;
// begin inline asm
tex.2d.v4.u32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f7, %f4}];
// end inline asm
and.b32 %r34, %r21, 255;
add.s32 %r35, %r33, %r34;
and.b32 %r36, %r18, 255;
and.b32 %r37, %r22, 255;
add.s32 %r38, %r36, %r37;
and.b32 %r39, %r19, 255;
and.b32 %r40, %r23, 255;
add.s32 %r41, %r39, %r40;
and.b32 %r42, %r20, 255;
and.b32 %r43, %r24, 255;
add.s32 %r44, %r42, %r43;
fma.rn.f32 %f8, %f14, %f24, %f30;
// begin inline asm
tex.2d.v4.u32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f5, %f8}];
// end inline asm
and.b32 %r45, %r25, 255;
add.s32 %r46, %r35, %r45;
and.b32 %r47, %r26, 255;
add.s32 %r48, %r38, %r47;
and.b32 %r49, %r27, 255;
add.s32 %r50, %r41, %r49;
and.b32 %r51, %r28, 255;
add.s32 %r52, %r44, %r51;
// begin inline asm
tex.2d.v4.u32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f7, %f8}];
// end inline asm
and.b32 %r53, %r29, 255;
add.s32 %r54, %r46, %r53;
and.b32 %r55, %r30, 255;
add.s32 %r56, %r48, %r55;
and.b32 %r57, %r31, 255;
add.s32 %r58, %r50, %r57;
and.b32 %r59, %r32, 255;
add.s32 %r60, %r52, %r59;
add.s32 %r61, %r54, 2;
add.s32 %r62, %r56, 2;
add.s32 %r63, %r58, 2;
add.s32 %r64, %r60, 2;
shr.u32 %r65, %r61, 2;
shr.u32 %r66, %r62, 2;
shr.u32 %r67, %r63, 2;
shr.u32 %r68, %r64, 2;
cvt.u16.u32 %rs1, %r65;
cvt.u16.u32 %rs2, %r66;
cvt.u16.u32 %rs3, %r67;
cvt.u16.u32 %rs4, %r68;
cvt.s64.s32 %rd8, %r2;
cvt.s64.s32 %rd9, %r5;
shr.u64 %rd10, %rd9, 2;
mul.lo.s64 %rd11, %rd10, %rd8;
cvt.s64.s32 %rd12, %r1;
add.s64 %rd13, %rd11, %rd12;
shl.b64 %rd14, %rd13, 2;
add.s64 %rd15, %rd1, %rd14;
st.global.v4.u8 [%rd15], {%rs3, %rs2, %rs1, %rs4};
$L__BB158_2:
ret;
}
// .globl Subsample_Bilinear_rgb0_bgr0_uv
.visible .entry Subsample_Bilinear_rgb0_bgr0_uv(
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_0,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_1,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_2,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_3,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_4,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_5,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_6,
.param .u64 Subsample_Bilinear_rgb0_bgr0_uv_param_7,
.param .u32 Subsample_Bilinear_rgb0_bgr0_uv_param_8,
.param .u32 Subsample_Bilinear_rgb0_bgr0_uv_param_9,
.param .u32 Subsample_Bilinear_rgb0_bgr0_uv_param_10,
.param .u32 Subsample_Bilinear_rgb0_bgr0_uv_param_11,
.param .u32 Subsample_Bilinear_rgb0_bgr0_uv_param_12,
.param .f32 Subsample_Bilinear_rgb0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv420p
.visible .entry Subsample_Bicubic_yuv420p_yuv420p(
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB160_2;
bra.uni $L__BB160_1;
$L__BB160_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB160_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv420p_uv
.visible .entry Subsample_Bicubic_yuv420p_yuv420p_uv(
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB161_2;
bra.uni $L__BB161_1;
$L__BB161_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv420p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs1;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs2;
$L__BB161_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv420p
.visible .entry Subsample_Bicubic_nv12_yuv420p(
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv420p_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv420p_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv420p_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv420p_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv420p_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv420p_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB162_2;
bra.uni $L__BB162_1;
$L__BB162_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB162_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv420p_uv
.visible .entry Subsample_Bicubic_nv12_yuv420p_uv(
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB163_2;
bra.uni $L__BB163_1;
$L__BB163_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_nv12_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_nv12_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs1;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs2;
$L__BB163_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv420p
.visible .entry Subsample_Bicubic_yuv444p_yuv420p(
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB164_2;
bra.uni $L__BB164_1;
$L__BB164_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB164_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv420p_uv
.visible .entry Subsample_Bicubic_yuv444p_yuv420p_uv(
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB165_2;
bra.uni $L__BB165_1;
$L__BB165_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs1;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs2;
$L__BB165_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv420p
.visible .entry Subsample_Bicubic_p010le_yuv420p(
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv420p_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv420p_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv420p_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv420p_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv420p_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv420p_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB166_2;
bra.uni $L__BB166_1;
$L__BB166_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB166_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv420p_uv
.visible .entry Subsample_Bicubic_p010le_yuv420p_uv(
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB167_2;
bra.uni $L__BB167_1;
$L__BB167_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p010le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p010le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB167_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv420p
.visible .entry Subsample_Bicubic_p016le_yuv420p(
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv420p_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv420p_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv420p_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv420p_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv420p_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv420p_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB168_2;
bra.uni $L__BB168_1;
$L__BB168_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB168_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv420p_uv
.visible .entry Subsample_Bicubic_p016le_yuv420p_uv(
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB169_2;
bra.uni $L__BB169_1;
$L__BB169_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p016le_yuv420p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p016le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB169_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv420p
.visible .entry Subsample_Bicubic_yuv444p16le_yuv420p(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv420p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB170_2;
bra.uni $L__BB170_1;
$L__BB170_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv420p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv420p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_yuv420p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_yuv420p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB170_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv420p_uv
.visible .entry Subsample_Bicubic_yuv444p16le_yuv420p_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB171_2;
bra.uni $L__BB171_1;
$L__BB171_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p16le_yuv420p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs2;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f190;
shr.u16 %rs4, %rs3, 8;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs4;
$L__BB171_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_nv12
.visible .entry Subsample_Bicubic_yuv420p_nv12(
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_0,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_1,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_2,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_3,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_4,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_5,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_6,
.param .u64 Subsample_Bicubic_yuv420p_nv12_param_7,
.param .u32 Subsample_Bicubic_yuv420p_nv12_param_8,
.param .u32 Subsample_Bicubic_yuv420p_nv12_param_9,
.param .u32 Subsample_Bicubic_yuv420p_nv12_param_10,
.param .u32 Subsample_Bicubic_yuv420p_nv12_param_11,
.param .u32 Subsample_Bicubic_yuv420p_nv12_param_12,
.param .f32 Subsample_Bicubic_yuv420p_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB172_2;
bra.uni $L__BB172_1;
$L__BB172_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB172_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_nv12_uv
.visible .entry Subsample_Bicubic_yuv420p_nv12_uv(
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB173_2;
bra.uni $L__BB173_1;
$L__BB173_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv420p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs1, %rs2};
$L__BB173_2:
ret;
}
// .globl Subsample_Bicubic_nv12_nv12
.visible .entry Subsample_Bicubic_nv12_nv12(
.param .u64 Subsample_Bicubic_nv12_nv12_param_0,
.param .u64 Subsample_Bicubic_nv12_nv12_param_1,
.param .u64 Subsample_Bicubic_nv12_nv12_param_2,
.param .u64 Subsample_Bicubic_nv12_nv12_param_3,
.param .u64 Subsample_Bicubic_nv12_nv12_param_4,
.param .u64 Subsample_Bicubic_nv12_nv12_param_5,
.param .u64 Subsample_Bicubic_nv12_nv12_param_6,
.param .u64 Subsample_Bicubic_nv12_nv12_param_7,
.param .u32 Subsample_Bicubic_nv12_nv12_param_8,
.param .u32 Subsample_Bicubic_nv12_nv12_param_9,
.param .u32 Subsample_Bicubic_nv12_nv12_param_10,
.param .u32 Subsample_Bicubic_nv12_nv12_param_11,
.param .u32 Subsample_Bicubic_nv12_nv12_param_12,
.param .f32 Subsample_Bicubic_nv12_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB174_2;
bra.uni $L__BB174_1;
$L__BB174_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB174_2:
ret;
}
// .globl Subsample_Bicubic_nv12_nv12_uv
.visible .entry Subsample_Bicubic_nv12_nv12_uv(
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB175_2;
bra.uni $L__BB175_1;
$L__BB175_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs1, %rs2};
$L__BB175_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_nv12
.visible .entry Subsample_Bicubic_yuv444p_nv12(
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_0,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_1,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_2,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_3,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_4,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_5,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_6,
.param .u64 Subsample_Bicubic_yuv444p_nv12_param_7,
.param .u32 Subsample_Bicubic_yuv444p_nv12_param_8,
.param .u32 Subsample_Bicubic_yuv444p_nv12_param_9,
.param .u32 Subsample_Bicubic_yuv444p_nv12_param_10,
.param .u32 Subsample_Bicubic_yuv444p_nv12_param_11,
.param .u32 Subsample_Bicubic_yuv444p_nv12_param_12,
.param .f32 Subsample_Bicubic_yuv444p_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB176_2;
bra.uni $L__BB176_1;
$L__BB176_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB176_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_nv12_uv
.visible .entry Subsample_Bicubic_yuv444p_nv12_uv(
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB177_2;
bra.uni $L__BB177_1;
$L__BB177_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs1, %rs2};
$L__BB177_2:
ret;
}
// .globl Subsample_Bicubic_p010le_nv12
.visible .entry Subsample_Bicubic_p010le_nv12(
.param .u64 Subsample_Bicubic_p010le_nv12_param_0,
.param .u64 Subsample_Bicubic_p010le_nv12_param_1,
.param .u64 Subsample_Bicubic_p010le_nv12_param_2,
.param .u64 Subsample_Bicubic_p010le_nv12_param_3,
.param .u64 Subsample_Bicubic_p010le_nv12_param_4,
.param .u64 Subsample_Bicubic_p010le_nv12_param_5,
.param .u64 Subsample_Bicubic_p010le_nv12_param_6,
.param .u64 Subsample_Bicubic_p010le_nv12_param_7,
.param .u32 Subsample_Bicubic_p010le_nv12_param_8,
.param .u32 Subsample_Bicubic_p010le_nv12_param_9,
.param .u32 Subsample_Bicubic_p010le_nv12_param_10,
.param .u32 Subsample_Bicubic_p010le_nv12_param_11,
.param .u32 Subsample_Bicubic_p010le_nv12_param_12,
.param .f32 Subsample_Bicubic_p010le_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB178_2;
bra.uni $L__BB178_1;
$L__BB178_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB178_2:
ret;
}
// .globl Subsample_Bicubic_p010le_nv12_uv
.visible .entry Subsample_Bicubic_p010le_nv12_uv(
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB179_2;
bra.uni $L__BB179_1;
$L__BB179_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
shr.u16 %rs4, %rs2, 8;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs3, %rs4};
$L__BB179_2:
ret;
}
// .globl Subsample_Bicubic_p016le_nv12
.visible .entry Subsample_Bicubic_p016le_nv12(
.param .u64 Subsample_Bicubic_p016le_nv12_param_0,
.param .u64 Subsample_Bicubic_p016le_nv12_param_1,
.param .u64 Subsample_Bicubic_p016le_nv12_param_2,
.param .u64 Subsample_Bicubic_p016le_nv12_param_3,
.param .u64 Subsample_Bicubic_p016le_nv12_param_4,
.param .u64 Subsample_Bicubic_p016le_nv12_param_5,
.param .u64 Subsample_Bicubic_p016le_nv12_param_6,
.param .u64 Subsample_Bicubic_p016le_nv12_param_7,
.param .u32 Subsample_Bicubic_p016le_nv12_param_8,
.param .u32 Subsample_Bicubic_p016le_nv12_param_9,
.param .u32 Subsample_Bicubic_p016le_nv12_param_10,
.param .u32 Subsample_Bicubic_p016le_nv12_param_11,
.param .u32 Subsample_Bicubic_p016le_nv12_param_12,
.param .f32 Subsample_Bicubic_p016le_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB180_2;
bra.uni $L__BB180_1;
$L__BB180_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB180_2:
ret;
}
// .globl Subsample_Bicubic_p016le_nv12_uv
.visible .entry Subsample_Bicubic_p016le_nv12_uv(
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB181_2;
bra.uni $L__BB181_1;
$L__BB181_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_nv12_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
shr.u16 %rs4, %rs2, 8;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs3, %rs4};
$L__BB181_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_nv12
.visible .entry Subsample_Bicubic_yuv444p16le_nv12(
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_nv12_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB182_2;
bra.uni $L__BB182_1;
$L__BB182_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_nv12_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_nv12_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_nv12_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_nv12_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB182_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_nv12_uv
.visible .entry Subsample_Bicubic_yuv444p16le_nv12_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_nv12_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_nv12_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB183_2;
bra.uni $L__BB183_1;
$L__BB183_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_nv12_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
shr.u16 %rs2, %rs1, 8;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f190;
shr.u16 %rs4, %rs3, 8;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs2, %rs4};
$L__BB183_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv444p
.visible .entry Subsample_Bicubic_yuv420p_yuv444p(
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB184_2;
bra.uni $L__BB184_1;
$L__BB184_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB184_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv444p_uv
.visible .entry Subsample_Bicubic_yuv420p_yuv444p_uv(
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB185_2;
bra.uni $L__BB185_1;
$L__BB185_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv420p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs1;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs2;
$L__BB185_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv444p
.visible .entry Subsample_Bicubic_nv12_yuv444p(
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv444p_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv444p_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv444p_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv444p_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv444p_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv444p_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB186_2;
bra.uni $L__BB186_1;
$L__BB186_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB186_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv444p_uv
.visible .entry Subsample_Bicubic_nv12_yuv444p_uv(
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB187_2;
bra.uni $L__BB187_1;
$L__BB187_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_nv12_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_nv12_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs1;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs2;
$L__BB187_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv444p
.visible .entry Subsample_Bicubic_yuv444p_yuv444p(
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB188_2;
bra.uni $L__BB188_1;
$L__BB188_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB188_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv444p_uv
.visible .entry Subsample_Bicubic_yuv444p_yuv444p_uv(
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB189_2;
bra.uni $L__BB189_1;
$L__BB189_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs1;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f190;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs2;
$L__BB189_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv444p
.visible .entry Subsample_Bicubic_p010le_yuv444p(
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv444p_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv444p_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv444p_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv444p_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv444p_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv444p_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB190_2;
bra.uni $L__BB190_1;
$L__BB190_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB190_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv444p_uv
.visible .entry Subsample_Bicubic_p010le_yuv444p_uv(
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB191_2;
bra.uni $L__BB191_1;
$L__BB191_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p010le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p010le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB191_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv444p
.visible .entry Subsample_Bicubic_p016le_yuv444p(
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv444p_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv444p_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv444p_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv444p_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv444p_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv444p_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB192_2;
bra.uni $L__BB192_1;
$L__BB192_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB192_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv444p_uv
.visible .entry Subsample_Bicubic_p016le_yuv444p_uv(
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB193_2;
bra.uni $L__BB193_1;
$L__BB193_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p016le_yuv444p_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p016le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB193_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv444p
.visible .entry Subsample_Bicubic_yuv444p16le_yuv444p(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv444p_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB194_2;
bra.uni $L__BB194_1;
$L__BB194_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv444p_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv444p_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_yuv444p_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_yuv444p_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB194_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv444p_uv
.visible .entry Subsample_Bicubic_yuv444p16le_yuv444p_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB195_2;
bra.uni $L__BB195_1;
$L__BB195_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p16le_yuv444p_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd39, %r2, %r5;
cvt.s64.s32 %rd40, %r1;
add.s64 %rd41, %rd39, %rd40;
add.s64 %rd42, %rd2, %rd41;
st.global.u8 [%rd42], %rs2;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f190;
shr.u16 %rs4, %rs3, 8;
add.s64 %rd43, %rd1, %rd41;
st.global.u8 [%rd43], %rs4;
$L__BB195_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_p010le
.visible .entry Subsample_Bicubic_yuv420p_p010le(
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_0,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_1,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_2,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_3,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_4,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_5,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_6,
.param .u64 Subsample_Bicubic_yuv420p_p010le_param_7,
.param .u32 Subsample_Bicubic_yuv420p_p010le_param_8,
.param .u32 Subsample_Bicubic_yuv420p_p010le_param_9,
.param .u32 Subsample_Bicubic_yuv420p_p010le_param_10,
.param .u32 Subsample_Bicubic_yuv420p_p010le_param_11,
.param .u32 Subsample_Bicubic_yuv420p_p010le_param_12,
.param .f32 Subsample_Bicubic_yuv420p_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB196_2;
bra.uni $L__BB196_1;
$L__BB196_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB196_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_p010le_uv
.visible .entry Subsample_Bicubic_yuv420p_p010le_uv(
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<7>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB197_2;
bra.uni $L__BB197_1;
$L__BB197_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv420p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs4, %f190;
mul.lo.s16 %rs5, %rs4, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs3, %rs6};
$L__BB197_2:
ret;
}
// .globl Subsample_Bicubic_nv12_p010le
.visible .entry Subsample_Bicubic_nv12_p010le(
.param .u64 Subsample_Bicubic_nv12_p010le_param_0,
.param .u64 Subsample_Bicubic_nv12_p010le_param_1,
.param .u64 Subsample_Bicubic_nv12_p010le_param_2,
.param .u64 Subsample_Bicubic_nv12_p010le_param_3,
.param .u64 Subsample_Bicubic_nv12_p010le_param_4,
.param .u64 Subsample_Bicubic_nv12_p010le_param_5,
.param .u64 Subsample_Bicubic_nv12_p010le_param_6,
.param .u64 Subsample_Bicubic_nv12_p010le_param_7,
.param .u32 Subsample_Bicubic_nv12_p010le_param_8,
.param .u32 Subsample_Bicubic_nv12_p010le_param_9,
.param .u32 Subsample_Bicubic_nv12_p010le_param_10,
.param .u32 Subsample_Bicubic_nv12_p010le_param_11,
.param .u32 Subsample_Bicubic_nv12_p010le_param_12,
.param .f32 Subsample_Bicubic_nv12_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB198_2;
bra.uni $L__BB198_1;
$L__BB198_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB198_2:
ret;
}
// .globl Subsample_Bicubic_nv12_p010le_uv
.visible .entry Subsample_Bicubic_nv12_p010le_uv(
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB199_2;
bra.uni $L__BB199_1;
$L__BB199_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
mul.lo.s16 %rs3, %rs1, 257;
and.b16 %rs4, %rs3, -64;
mul.lo.s16 %rs5, %rs2, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs4, %rs6};
$L__BB199_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_p010le
.visible .entry Subsample_Bicubic_yuv444p_p010le(
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_0,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_1,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_2,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_3,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_4,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_5,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_6,
.param .u64 Subsample_Bicubic_yuv444p_p010le_param_7,
.param .u32 Subsample_Bicubic_yuv444p_p010le_param_8,
.param .u32 Subsample_Bicubic_yuv444p_p010le_param_9,
.param .u32 Subsample_Bicubic_yuv444p_p010le_param_10,
.param .u32 Subsample_Bicubic_yuv444p_p010le_param_11,
.param .u32 Subsample_Bicubic_yuv444p_p010le_param_12,
.param .f32 Subsample_Bicubic_yuv444p_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB200_2;
bra.uni $L__BB200_1;
$L__BB200_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB200_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_p010le_uv
.visible .entry Subsample_Bicubic_yuv444p_p010le_uv(
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<7>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB201_2;
bra.uni $L__BB201_1;
$L__BB201_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs4, %f190;
mul.lo.s16 %rs5, %rs4, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs3, %rs6};
$L__BB201_2:
ret;
}
// .globl Subsample_Bicubic_p010le_p010le
.visible .entry Subsample_Bicubic_p010le_p010le(
.param .u64 Subsample_Bicubic_p010le_p010le_param_0,
.param .u64 Subsample_Bicubic_p010le_p010le_param_1,
.param .u64 Subsample_Bicubic_p010le_p010le_param_2,
.param .u64 Subsample_Bicubic_p010le_p010le_param_3,
.param .u64 Subsample_Bicubic_p010le_p010le_param_4,
.param .u64 Subsample_Bicubic_p010le_p010le_param_5,
.param .u64 Subsample_Bicubic_p010le_p010le_param_6,
.param .u64 Subsample_Bicubic_p010le_p010le_param_7,
.param .u32 Subsample_Bicubic_p010le_p010le_param_8,
.param .u32 Subsample_Bicubic_p010le_p010le_param_9,
.param .u32 Subsample_Bicubic_p010le_p010le_param_10,
.param .u32 Subsample_Bicubic_p010le_p010le_param_11,
.param .u32 Subsample_Bicubic_p010le_p010le_param_12,
.param .f32 Subsample_Bicubic_p010le_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB202_2;
bra.uni $L__BB202_1;
$L__BB202_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB202_2:
ret;
}
// .globl Subsample_Bicubic_p010le_p010le_uv
.visible .entry Subsample_Bicubic_p010le_p010le_uv(
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB203_2;
bra.uni $L__BB203_1;
$L__BB203_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs1, %rs2};
$L__BB203_2:
ret;
}
// .globl Subsample_Bicubic_p016le_p010le
.visible .entry Subsample_Bicubic_p016le_p010le(
.param .u64 Subsample_Bicubic_p016le_p010le_param_0,
.param .u64 Subsample_Bicubic_p016le_p010le_param_1,
.param .u64 Subsample_Bicubic_p016le_p010le_param_2,
.param .u64 Subsample_Bicubic_p016le_p010le_param_3,
.param .u64 Subsample_Bicubic_p016le_p010le_param_4,
.param .u64 Subsample_Bicubic_p016le_p010le_param_5,
.param .u64 Subsample_Bicubic_p016le_p010le_param_6,
.param .u64 Subsample_Bicubic_p016le_p010le_param_7,
.param .u32 Subsample_Bicubic_p016le_p010le_param_8,
.param .u32 Subsample_Bicubic_p016le_p010le_param_9,
.param .u32 Subsample_Bicubic_p016le_p010le_param_10,
.param .u32 Subsample_Bicubic_p016le_p010le_param_11,
.param .u32 Subsample_Bicubic_p016le_p010le_param_12,
.param .f32 Subsample_Bicubic_p016le_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB204_2;
bra.uni $L__BB204_1;
$L__BB204_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB204_2:
ret;
}
// .globl Subsample_Bicubic_p016le_p010le_uv
.visible .entry Subsample_Bicubic_p016le_p010le_uv(
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB205_2;
bra.uni $L__BB205_1;
$L__BB205_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_p010le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
and.b16 %rs3, %rs1, -64;
and.b16 %rs4, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs3, %rs4};
$L__BB205_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_p010le
.visible .entry Subsample_Bicubic_yuv444p16le_p010le(
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_p010le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB206_2;
bra.uni $L__BB206_1;
$L__BB206_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_p010le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_p010le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_p010le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_p010le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB206_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_p010le_uv
.visible .entry Subsample_Bicubic_yuv444p16le_p010le_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_p010le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_p010le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB207_2;
bra.uni $L__BB207_1;
$L__BB207_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_p010le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
and.b16 %rs2, %rs1, -64;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f190;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB207_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_p016le
.visible .entry Subsample_Bicubic_yuv420p_p016le(
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_0,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_1,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_2,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_3,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_4,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_5,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_6,
.param .u64 Subsample_Bicubic_yuv420p_p016le_param_7,
.param .u32 Subsample_Bicubic_yuv420p_p016le_param_8,
.param .u32 Subsample_Bicubic_yuv420p_p016le_param_9,
.param .u32 Subsample_Bicubic_yuv420p_p016le_param_10,
.param .u32 Subsample_Bicubic_yuv420p_p016le_param_11,
.param .u32 Subsample_Bicubic_yuv420p_p016le_param_12,
.param .f32 Subsample_Bicubic_yuv420p_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB208_2;
bra.uni $L__BB208_1;
$L__BB208_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB208_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_p016le_uv
.visible .entry Subsample_Bicubic_yuv420p_p016le_uv(
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB209_2;
bra.uni $L__BB209_1;
$L__BB209_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv420p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f190;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB209_2:
ret;
}
// .globl Subsample_Bicubic_nv12_p016le
.visible .entry Subsample_Bicubic_nv12_p016le(
.param .u64 Subsample_Bicubic_nv12_p016le_param_0,
.param .u64 Subsample_Bicubic_nv12_p016le_param_1,
.param .u64 Subsample_Bicubic_nv12_p016le_param_2,
.param .u64 Subsample_Bicubic_nv12_p016le_param_3,
.param .u64 Subsample_Bicubic_nv12_p016le_param_4,
.param .u64 Subsample_Bicubic_nv12_p016le_param_5,
.param .u64 Subsample_Bicubic_nv12_p016le_param_6,
.param .u64 Subsample_Bicubic_nv12_p016le_param_7,
.param .u32 Subsample_Bicubic_nv12_p016le_param_8,
.param .u32 Subsample_Bicubic_nv12_p016le_param_9,
.param .u32 Subsample_Bicubic_nv12_p016le_param_10,
.param .u32 Subsample_Bicubic_nv12_p016le_param_11,
.param .u32 Subsample_Bicubic_nv12_p016le_param_12,
.param .f32 Subsample_Bicubic_nv12_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB210_2;
bra.uni $L__BB210_1;
$L__BB210_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB210_2:
ret;
}
// .globl Subsample_Bicubic_nv12_p016le_uv
.visible .entry Subsample_Bicubic_nv12_p016le_uv(
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB211_2;
bra.uni $L__BB211_1;
$L__BB211_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
mul.lo.s16 %rs3, %rs1, 257;
mul.lo.s16 %rs4, %rs2, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs3, %rs4};
$L__BB211_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_p016le
.visible .entry Subsample_Bicubic_yuv444p_p016le(
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_0,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_1,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_2,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_3,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_4,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_5,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_6,
.param .u64 Subsample_Bicubic_yuv444p_p016le_param_7,
.param .u32 Subsample_Bicubic_yuv444p_p016le_param_8,
.param .u32 Subsample_Bicubic_yuv444p_p016le_param_9,
.param .u32 Subsample_Bicubic_yuv444p_p016le_param_10,
.param .u32 Subsample_Bicubic_yuv444p_p016le_param_11,
.param .u32 Subsample_Bicubic_yuv444p_p016le_param_12,
.param .f32 Subsample_Bicubic_yuv444p_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB212_2;
bra.uni $L__BB212_1;
$L__BB212_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB212_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_p016le_uv
.visible .entry Subsample_Bicubic_yuv444p_p016le_uv(
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB213_2;
bra.uni $L__BB213_1;
$L__BB213_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f190;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB213_2:
ret;
}
// .globl Subsample_Bicubic_p010le_p016le
.visible .entry Subsample_Bicubic_p010le_p016le(
.param .u64 Subsample_Bicubic_p010le_p016le_param_0,
.param .u64 Subsample_Bicubic_p010le_p016le_param_1,
.param .u64 Subsample_Bicubic_p010le_p016le_param_2,
.param .u64 Subsample_Bicubic_p010le_p016le_param_3,
.param .u64 Subsample_Bicubic_p010le_p016le_param_4,
.param .u64 Subsample_Bicubic_p010le_p016le_param_5,
.param .u64 Subsample_Bicubic_p010le_p016le_param_6,
.param .u64 Subsample_Bicubic_p010le_p016le_param_7,
.param .u32 Subsample_Bicubic_p010le_p016le_param_8,
.param .u32 Subsample_Bicubic_p010le_p016le_param_9,
.param .u32 Subsample_Bicubic_p010le_p016le_param_10,
.param .u32 Subsample_Bicubic_p010le_p016le_param_11,
.param .u32 Subsample_Bicubic_p010le_p016le_param_12,
.param .f32 Subsample_Bicubic_p010le_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB214_2;
bra.uni $L__BB214_1;
$L__BB214_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB214_2:
ret;
}
// .globl Subsample_Bicubic_p010le_p016le_uv
.visible .entry Subsample_Bicubic_p010le_p016le_uv(
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB215_2;
bra.uni $L__BB215_1;
$L__BB215_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs4, %rs6};
$L__BB215_2:
ret;
}
// .globl Subsample_Bicubic_p016le_p016le
.visible .entry Subsample_Bicubic_p016le_p016le(
.param .u64 Subsample_Bicubic_p016le_p016le_param_0,
.param .u64 Subsample_Bicubic_p016le_p016le_param_1,
.param .u64 Subsample_Bicubic_p016le_p016le_param_2,
.param .u64 Subsample_Bicubic_p016le_p016le_param_3,
.param .u64 Subsample_Bicubic_p016le_p016le_param_4,
.param .u64 Subsample_Bicubic_p016le_p016le_param_5,
.param .u64 Subsample_Bicubic_p016le_p016le_param_6,
.param .u64 Subsample_Bicubic_p016le_p016le_param_7,
.param .u32 Subsample_Bicubic_p016le_p016le_param_8,
.param .u32 Subsample_Bicubic_p016le_p016le_param_9,
.param .u32 Subsample_Bicubic_p016le_p016le_param_10,
.param .u32 Subsample_Bicubic_p016le_p016le_param_11,
.param .u32 Subsample_Bicubic_p016le_p016le_param_12,
.param .f32 Subsample_Bicubic_p016le_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB216_2;
bra.uni $L__BB216_1;
$L__BB216_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB216_2:
ret;
}
// .globl Subsample_Bicubic_p016le_p016le_uv
.visible .entry Subsample_Bicubic_p016le_p016le_uv(
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB217_2;
bra.uni $L__BB217_1;
$L__BB217_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_p016le_uv_param_1];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs1, %rs2};
$L__BB217_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_p016le
.visible .entry Subsample_Bicubic_yuv444p16le_p016le(
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_p016le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB218_2;
bra.uni $L__BB218_1;
$L__BB218_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_p016le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_p016le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_p016le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_p016le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB218_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_p016le_uv
.visible .entry Subsample_Bicubic_yuv444p16le_p016le_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_p016le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_p016le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB219_2;
bra.uni $L__BB219_1;
$L__BB219_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_2];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_p016le_uv_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs2, %f190;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs1, %rs2};
$L__BB219_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv444p16le
.visible .entry Subsample_Bicubic_yuv420p_yuv444p16le(
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB220_2;
bra.uni $L__BB220_1;
$L__BB220_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv420p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv420p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB220_2:
ret;
}
// .globl Subsample_Bicubic_yuv420p_yuv444p16le_uv
.visible .entry Subsample_Bicubic_yuv420p_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<48>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB221_2;
bra.uni $L__BB221_1;
$L__BB221_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv420p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd39, %r2;
cvt.s64.s32 %rd40, %r5;
shr.u64 %rd41, %rd40, 1;
mul.lo.s64 %rd42, %rd41, %rd39;
cvt.s64.s32 %rd43, %r1;
add.s64 %rd44, %rd42, %rd43;
shl.b64 %rd45, %rd44, 1;
add.s64 %rd46, %rd2, %rd45;
st.global.u16 [%rd46], %rs2;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f190;
mul.lo.s16 %rs4, %rs3, 257;
add.s64 %rd47, %rd1, %rd45;
st.global.u16 [%rd47], %rs4;
$L__BB221_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv444p16le
.visible .entry Subsample_Bicubic_nv12_yuv444p16le(
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB222_2;
bra.uni $L__BB222_1;
$L__BB222_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_nv12_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB222_2:
ret;
}
// .globl Subsample_Bicubic_nv12_yuv444p16le_uv
.visible .entry Subsample_Bicubic_nv12_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_nv12_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_nv12_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB223_2;
bra.uni $L__BB223_1;
$L__BB223_1:
ld.param.f32 %f1, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_nv12_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f437F0000;
mul.f32 %f158, %f156, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
mul.lo.s16 %rs3, %rs1, 257;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs3;
mul.lo.s16 %rs4, %rs2, 257;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs4;
$L__BB223_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv444p16le
.visible .entry Subsample_Bicubic_yuv444p_yuv444p16le(
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB224_2;
bra.uni $L__BB224_1;
$L__BB224_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f121;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB224_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p_yuv444p16le_uv
.visible .entry Subsample_Bicubic_yuv444p_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<48>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB225_2;
bra.uni $L__BB225_1;
$L__BB225_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f153;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd39, %r2;
cvt.s64.s32 %rd40, %r5;
shr.u64 %rd41, %rd40, 1;
mul.lo.s64 %rd42, %rd41, %rd39;
cvt.s64.s32 %rd43, %r1;
add.s64 %rd44, %rd42, %rd43;
shl.b64 %rd45, %rd44, 1;
add.s64 %rd46, %rd2, %rd45;
st.global.u16 [%rd46], %rs2;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f190;
mul.lo.s16 %rs4, %rs3, 257;
add.s64 %rd47, %rd1, %rd45;
st.global.u16 [%rd47], %rs4;
$L__BB225_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv444p16le
.visible .entry Subsample_Bicubic_p010le_yuv444p16le(
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB226_2;
bra.uni $L__BB226_1;
$L__BB226_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p010le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB226_2:
ret;
}
// .globl Subsample_Bicubic_p010le_yuv444p16le_uv
.visible .entry Subsample_Bicubic_p010le_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_p010le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_p010le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB227_2;
bra.uni $L__BB227_1;
$L__BB227_1:
ld.param.f32 %f1, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p010le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs4;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs6;
$L__BB227_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv444p16le
.visible .entry Subsample_Bicubic_p016le_yuv444p16le(
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB228_2;
bra.uni $L__BB228_1;
$L__BB228_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_p016le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB228_2:
ret;
}
// .globl Subsample_Bicubic_p016le_yuv444p16le_uv
.visible .entry Subsample_Bicubic_p016le_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_p016le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_p016le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<159>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB229_2;
bra.uni $L__BB229_1;
$L__BB229_1:
ld.param.f32 %f1, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_1];
ld.param.u64 %rd4, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd4;
ld.param.u64 %rd5, [Subsample_Bicubic_p016le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd5;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r18;
mov.b32 %f86, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f4, %f3}];
// end inline asm
mov.b32 %f87, %r22;
mov.b32 %f88, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f6, %f3}];
// end inline asm
mov.b32 %f89, %r26;
mov.b32 %f90, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f8, %f3}];
// end inline asm
mov.b32 %f91, %r30;
mov.b32 %f92, %r29;
mul.f32 %f93, %f62, %f88;
mul.f32 %f94, %f62, %f87;
fma.rn.f32 %f95, %f56, %f86, %f93;
fma.rn.f32 %f96, %f56, %f85, %f94;
fma.rn.f32 %f97, %f67, %f90, %f95;
fma.rn.f32 %f98, %f67, %f89, %f96;
fma.rn.f32 %f99, %f70, %f92, %f97;
fma.rn.f32 %f100, %f70, %f91, %f98;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f2, %f11}];
// end inline asm
mov.b32 %f101, %r34;
mov.b32 %f102, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f4, %f11}];
// end inline asm
mov.b32 %f103, %r38;
mov.b32 %f104, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f6, %f11}];
// end inline asm
mov.b32 %f105, %r42;
mov.b32 %f106, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f8, %f11}];
// end inline asm
mov.b32 %f107, %r46;
mov.b32 %f108, %r45;
mul.f32 %f109, %f62, %f104;
mul.f32 %f110, %f62, %f103;
fma.rn.f32 %f111, %f56, %f102, %f109;
fma.rn.f32 %f112, %f56, %f101, %f110;
fma.rn.f32 %f113, %f67, %f106, %f111;
fma.rn.f32 %f114, %f67, %f105, %f112;
fma.rn.f32 %f115, %f70, %f108, %f113;
fma.rn.f32 %f116, %f70, %f107, %f114;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f2, %f19}];
// end inline asm
mov.b32 %f117, %r50;
mov.b32 %f118, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f4, %f19}];
// end inline asm
mov.b32 %f119, %r54;
mov.b32 %f120, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f6, %f19}];
// end inline asm
mov.b32 %f121, %r58;
mov.b32 %f122, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f8, %f19}];
// end inline asm
mov.b32 %f123, %r62;
mov.b32 %f124, %r61;
mul.f32 %f125, %f62, %f120;
mul.f32 %f126, %f62, %f119;
fma.rn.f32 %f127, %f56, %f118, %f125;
fma.rn.f32 %f128, %f56, %f117, %f126;
fma.rn.f32 %f129, %f67, %f122, %f127;
fma.rn.f32 %f130, %f67, %f121, %f128;
fma.rn.f32 %f131, %f70, %f124, %f129;
fma.rn.f32 %f132, %f70, %f123, %f130;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f2, %f27}];
// end inline asm
mov.b32 %f133, %r66;
mov.b32 %f134, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f4, %f27}];
// end inline asm
mov.b32 %f135, %r70;
mov.b32 %f136, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f6, %f27}];
// end inline asm
mov.b32 %f137, %r74;
mov.b32 %f138, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f8, %f27}];
// end inline asm
mov.b32 %f139, %r78;
mov.b32 %f140, %r77;
mul.f32 %f141, %f62, %f136;
mul.f32 %f142, %f62, %f135;
fma.rn.f32 %f143, %f56, %f134, %f141;
fma.rn.f32 %f144, %f56, %f133, %f142;
fma.rn.f32 %f145, %f67, %f138, %f143;
fma.rn.f32 %f146, %f67, %f137, %f144;
fma.rn.f32 %f147, %f70, %f140, %f145;
fma.rn.f32 %f148, %f70, %f139, %f146;
mul.f32 %f149, %f77, %f115;
mul.f32 %f150, %f77, %f116;
fma.rn.f32 %f151, %f74, %f99, %f149;
fma.rn.f32 %f152, %f74, %f100, %f150;
fma.rn.f32 %f153, %f81, %f131, %f151;
fma.rn.f32 %f154, %f81, %f132, %f152;
fma.rn.f32 %f155, %f84, %f147, %f153;
fma.rn.f32 %f156, %f84, %f148, %f154;
mul.f32 %f157, %f155, 0f477FFF00;
mul.f32 %f158, %f156, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f157;
cvt.rzi.u16.f32 %rs2, %f158;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs1;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs2;
$L__BB229_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv444p16le
.visible .entry Subsample_Bicubic_yuv444p16le_yuv444p16le(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv444p16le_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<122>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB230_2;
bra.uni $L__BB230_1;
$L__BB230_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_yuv444p16le_yuv444p16le_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f86, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f87, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f88, %r29;
mul.f32 %f89, %f62, %f86;
fma.rn.f32 %f90, %f56, %f85, %f89;
fma.rn.f32 %f91, %f67, %f87, %f90;
fma.rn.f32 %f92, %f70, %f88, %f91;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f93, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f94, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f95, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f96, %r45;
mul.f32 %f97, %f62, %f94;
fma.rn.f32 %f98, %f56, %f93, %f97;
fma.rn.f32 %f99, %f67, %f95, %f98;
fma.rn.f32 %f100, %f70, %f96, %f99;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f101, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f102, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f103, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f104, %r61;
mul.f32 %f105, %f62, %f102;
fma.rn.f32 %f106, %f56, %f101, %f105;
fma.rn.f32 %f107, %f67, %f103, %f106;
fma.rn.f32 %f108, %f70, %f104, %f107;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f109, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f110, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f111, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f112, %r77;
mul.f32 %f113, %f62, %f110;
fma.rn.f32 %f114, %f56, %f109, %f113;
fma.rn.f32 %f115, %f67, %f111, %f114;
fma.rn.f32 %f116, %f70, %f112, %f115;
mul.f32 %f117, %f77, %f100;
fma.rn.f32 %f118, %f74, %f92, %f117;
fma.rn.f32 %f119, %f81, %f108, %f118;
fma.rn.f32 %f120, %f84, %f116, %f119;
mul.f32 %f121, %f120, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f121;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB230_2:
ret;
}
// .globl Subsample_Bicubic_yuv444p16le_yuv444p16le_uv
.visible .entry Subsample_Bicubic_yuv444p16le_yuv444p16le_uv(
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<191>;
.reg .b64 %rd<48>;
ld.param.u32 %r4, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB231_2;
bra.uni $L__BB231_1;
$L__BB231_1:
ld.param.f32 %f1, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_10];
ld.param.u64 %rd23, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_2];
ld.param.u64 %rd7, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_1];
ld.param.u64 %rd5, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_6];
cvta.to.global.u64 %rd1, %rd5;
ld.param.u64 %rd6, [Subsample_Bicubic_yuv444p16le_yuv444p16le_uv_param_5];
cvta.to.global.u64 %rd2, %rd6;
cvt.rn.f32.s32 %f66, %r6;
cvt.rn.f32.s32 %f67, %r3;
div.rn.f32 %f68, %f66, %f67;
cvt.rn.f32.s32 %f69, %r7;
cvt.rn.f32.s32 %f70, %r4;
div.rn.f32 %f71, %f69, %f70;
cvt.rn.f32.s32 %f72, %r1;
add.f32 %f73, %f72, 0f3F000000;
fma.rn.f32 %f74, %f68, %f73, 0fBF000000;
cvt.rn.f32.s32 %f75, %r2;
add.f32 %f76, %f75, 0f3F000000;
fma.rn.f32 %f77, %f71, %f76, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f74;
cvt.rmi.f32.f32 %f11, %f77;
sub.f32 %f78, %f74, %f4;
sub.f32 %f79, %f77, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f80, %f1;
selp.f32 %f81, 0f00000000, %f80, %p4;
add.f32 %f82, %f78, 0f3F800000;
mul.f32 %f83, %f81, 0fC0A00000;
fma.rn.f32 %f84, %f81, %f82, %f83;
mul.f32 %f85, %f81, 0f41000000;
fma.rn.f32 %f86, %f82, %f84, %f85;
mul.f32 %f87, %f81, 0fC0800000;
fma.rn.f32 %f88, %f82, %f86, %f87;
add.f32 %f89, %f81, 0f40000000;
add.f32 %f90, %f81, 0f40400000;
neg.f32 %f91, %f90;
fma.rn.f32 %f92, %f89, %f78, %f91;
mul.f32 %f93, %f78, %f92;
fma.rn.f32 %f94, %f78, %f93, 0f3F800000;
mov.f32 %f95, 0f3F800000;
sub.f32 %f96, %f95, %f78;
fma.rn.f32 %f97, %f89, %f96, %f91;
mul.f32 %f98, %f96, %f97;
fma.rn.f32 %f99, %f96, %f98, 0f3F800000;
sub.f32 %f100, %f95, %f88;
sub.f32 %f101, %f100, %f94;
sub.f32 %f102, %f101, %f99;
add.f32 %f103, %f79, 0f3F800000;
fma.rn.f32 %f104, %f81, %f103, %f83;
fma.rn.f32 %f105, %f103, %f104, %f85;
fma.rn.f32 %f106, %f103, %f105, %f87;
fma.rn.f32 %f107, %f89, %f79, %f91;
mul.f32 %f108, %f79, %f107;
fma.rn.f32 %f109, %f79, %f108, 0f3F800000;
sub.f32 %f110, %f95, %f79;
fma.rn.f32 %f111, %f89, %f110, %f91;
mul.f32 %f112, %f110, %f111;
fma.rn.f32 %f113, %f110, %f112, 0f3F800000;
sub.f32 %f114, %f95, %f106;
sub.f32 %f115, %f114, %f109;
sub.f32 %f116, %f115, %f113;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd7, {%f2, %f3}];
// end inline asm
mov.b32 %f117, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd7, {%f4, %f3}];
// end inline asm
mov.b32 %f118, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd7, {%f6, %f3}];
// end inline asm
mov.b32 %f119, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd7, {%f8, %f3}];
// end inline asm
mov.b32 %f120, %r29;
mul.f32 %f121, %f94, %f118;
fma.rn.f32 %f122, %f88, %f117, %f121;
fma.rn.f32 %f123, %f99, %f119, %f122;
fma.rn.f32 %f124, %f102, %f120, %f123;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd7, {%f2, %f11}];
// end inline asm
mov.b32 %f125, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd7, {%f4, %f11}];
// end inline asm
mov.b32 %f126, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd7, {%f6, %f11}];
// end inline asm
mov.b32 %f127, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd7, {%f8, %f11}];
// end inline asm
mov.b32 %f128, %r45;
mul.f32 %f129, %f94, %f126;
fma.rn.f32 %f130, %f88, %f125, %f129;
fma.rn.f32 %f131, %f99, %f127, %f130;
fma.rn.f32 %f132, %f102, %f128, %f131;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd7, {%f2, %f19}];
// end inline asm
mov.b32 %f133, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd7, {%f4, %f19}];
// end inline asm
mov.b32 %f134, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd7, {%f6, %f19}];
// end inline asm
mov.b32 %f135, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd7, {%f8, %f19}];
// end inline asm
mov.b32 %f136, %r61;
mul.f32 %f137, %f94, %f134;
fma.rn.f32 %f138, %f88, %f133, %f137;
fma.rn.f32 %f139, %f99, %f135, %f138;
fma.rn.f32 %f140, %f102, %f136, %f139;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd7, {%f2, %f27}];
// end inline asm
mov.b32 %f141, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd7, {%f4, %f27}];
// end inline asm
mov.b32 %f142, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd7, {%f6, %f27}];
// end inline asm
mov.b32 %f143, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd7, {%f8, %f27}];
// end inline asm
mov.b32 %f144, %r77;
mul.f32 %f145, %f94, %f142;
fma.rn.f32 %f146, %f88, %f141, %f145;
fma.rn.f32 %f147, %f99, %f143, %f146;
fma.rn.f32 %f148, %f102, %f144, %f147;
mul.f32 %f149, %f109, %f132;
fma.rn.f32 %f150, %f106, %f124, %f149;
fma.rn.f32 %f151, %f113, %f140, %f150;
fma.rn.f32 %f152, %f116, %f148, %f151;
mul.f32 %f153, %f152, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f153;
cvt.s64.s32 %rd39, %r2;
cvt.s64.s32 %rd40, %r5;
shr.u64 %rd41, %rd40, 1;
mul.lo.s64 %rd42, %rd41, %rd39;
cvt.s64.s32 %rd43, %r1;
add.s64 %rd44, %rd42, %rd43;
shl.b64 %rd45, %rd44, 1;
add.s64 %rd46, %rd2, %rd45;
st.global.u16 [%rd46], %rs1;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd23, {%f2, %f3}];
// end inline asm
mov.b32 %f154, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd23, {%f4, %f3}];
// end inline asm
mov.b32 %f155, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd23, {%f6, %f3}];
// end inline asm
mov.b32 %f156, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd23, {%f8, %f3}];
// end inline asm
mov.b32 %f157, %r93;
mul.f32 %f158, %f94, %f155;
fma.rn.f32 %f159, %f88, %f154, %f158;
fma.rn.f32 %f160, %f99, %f156, %f159;
fma.rn.f32 %f161, %f102, %f157, %f160;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd23, {%f2, %f11}];
// end inline asm
mov.b32 %f162, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd23, {%f4, %f11}];
// end inline asm
mov.b32 %f163, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd23, {%f6, %f11}];
// end inline asm
mov.b32 %f164, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd23, {%f8, %f11}];
// end inline asm
mov.b32 %f165, %r109;
mul.f32 %f166, %f94, %f163;
fma.rn.f32 %f167, %f88, %f162, %f166;
fma.rn.f32 %f168, %f99, %f164, %f167;
fma.rn.f32 %f169, %f102, %f165, %f168;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd23, {%f2, %f19}];
// end inline asm
mov.b32 %f170, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd23, {%f4, %f19}];
// end inline asm
mov.b32 %f171, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd23, {%f6, %f19}];
// end inline asm
mov.b32 %f172, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd23, {%f8, %f19}];
// end inline asm
mov.b32 %f173, %r125;
mul.f32 %f174, %f94, %f171;
fma.rn.f32 %f175, %f88, %f170, %f174;
fma.rn.f32 %f176, %f99, %f172, %f175;
fma.rn.f32 %f177, %f102, %f173, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd23, {%f2, %f27}];
// end inline asm
mov.b32 %f178, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd23, {%f4, %f27}];
// end inline asm
mov.b32 %f179, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd23, {%f6, %f27}];
// end inline asm
mov.b32 %f180, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd23, {%f8, %f27}];
// end inline asm
mov.b32 %f181, %r141;
mul.f32 %f182, %f94, %f179;
fma.rn.f32 %f183, %f88, %f178, %f182;
fma.rn.f32 %f184, %f99, %f180, %f183;
fma.rn.f32 %f185, %f102, %f181, %f184;
mul.f32 %f186, %f109, %f169;
fma.rn.f32 %f187, %f106, %f161, %f186;
fma.rn.f32 %f188, %f113, %f177, %f187;
fma.rn.f32 %f189, %f116, %f185, %f188;
mul.f32 %f190, %f189, 0f477FFF00;
cvt.rzi.u16.f32 %rs2, %f190;
add.s64 %rd47, %rd1, %rd45;
st.global.u16 [%rd47], %rs2;
$L__BB231_2:
ret;
}
// .globl Subsample_Bicubic_bgr0_bgr0
.visible .entry Subsample_Bicubic_bgr0_bgr0(
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_0,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_1,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_2,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_3,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_4,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_5,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_6,
.param .u64 Subsample_Bicubic_bgr0_bgr0_param_7,
.param .u32 Subsample_Bicubic_bgr0_bgr0_param_8,
.param .u32 Subsample_Bicubic_bgr0_bgr0_param_9,
.param .u32 Subsample_Bicubic_bgr0_bgr0_param_10,
.param .u32 Subsample_Bicubic_bgr0_bgr0_param_11,
.param .u32 Subsample_Bicubic_bgr0_bgr0_param_12,
.param .f32 Subsample_Bicubic_bgr0_bgr0_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<233>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_bgr0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_bgr0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB232_2;
bra.uni $L__BB232_1;
$L__BB232_1:
ld.param.f32 %f1, [Subsample_Bicubic_bgr0_bgr0_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_bgr0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_bgr0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_bgr0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_bgr0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_bgr0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r20;
mov.b32 %f86, %r19;
mov.b32 %f87, %r18;
mov.b32 %f88, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f89, %r24;
mov.b32 %f90, %r23;
mov.b32 %f91, %r22;
mov.b32 %f92, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f93, %r28;
mov.b32 %f94, %r27;
mov.b32 %f95, %r26;
mov.b32 %f96, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f97, %r32;
mov.b32 %f98, %r31;
mov.b32 %f99, %r30;
mov.b32 %f100, %r29;
mul.f32 %f101, %f62, %f92;
mul.f32 %f102, %f62, %f91;
mul.f32 %f103, %f62, %f90;
mul.f32 %f104, %f62, %f89;
fma.rn.f32 %f105, %f56, %f88, %f101;
fma.rn.f32 %f106, %f56, %f87, %f102;
fma.rn.f32 %f107, %f56, %f86, %f103;
fma.rn.f32 %f108, %f56, %f85, %f104;
fma.rn.f32 %f109, %f67, %f96, %f105;
fma.rn.f32 %f110, %f67, %f95, %f106;
fma.rn.f32 %f111, %f67, %f94, %f107;
fma.rn.f32 %f112, %f67, %f93, %f108;
fma.rn.f32 %f113, %f70, %f100, %f109;
fma.rn.f32 %f114, %f70, %f99, %f110;
fma.rn.f32 %f115, %f70, %f98, %f111;
fma.rn.f32 %f116, %f70, %f97, %f112;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f117, %r36;
mov.b32 %f118, %r35;
mov.b32 %f119, %r34;
mov.b32 %f120, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f121, %r40;
mov.b32 %f122, %r39;
mov.b32 %f123, %r38;
mov.b32 %f124, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f125, %r44;
mov.b32 %f126, %r43;
mov.b32 %f127, %r42;
mov.b32 %f128, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f129, %r48;
mov.b32 %f130, %r47;
mov.b32 %f131, %r46;
mov.b32 %f132, %r45;
mul.f32 %f133, %f62, %f124;
mul.f32 %f134, %f62, %f123;
mul.f32 %f135, %f62, %f122;
mul.f32 %f136, %f62, %f121;
fma.rn.f32 %f137, %f56, %f120, %f133;
fma.rn.f32 %f138, %f56, %f119, %f134;
fma.rn.f32 %f139, %f56, %f118, %f135;
fma.rn.f32 %f140, %f56, %f117, %f136;
fma.rn.f32 %f141, %f67, %f128, %f137;
fma.rn.f32 %f142, %f67, %f127, %f138;
fma.rn.f32 %f143, %f67, %f126, %f139;
fma.rn.f32 %f144, %f67, %f125, %f140;
fma.rn.f32 %f145, %f70, %f132, %f141;
fma.rn.f32 %f146, %f70, %f131, %f142;
fma.rn.f32 %f147, %f70, %f130, %f143;
fma.rn.f32 %f148, %f70, %f129, %f144;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f149, %r52;
mov.b32 %f150, %r51;
mov.b32 %f151, %r50;
mov.b32 %f152, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f153, %r56;
mov.b32 %f154, %r55;
mov.b32 %f155, %r54;
mov.b32 %f156, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f157, %r60;
mov.b32 %f158, %r59;
mov.b32 %f159, %r58;
mov.b32 %f160, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f161, %r64;
mov.b32 %f162, %r63;
mov.b32 %f163, %r62;
mov.b32 %f164, %r61;
mul.f32 %f165, %f62, %f156;
mul.f32 %f166, %f62, %f155;
mul.f32 %f167, %f62, %f154;
mul.f32 %f168, %f62, %f153;
fma.rn.f32 %f169, %f56, %f152, %f165;
fma.rn.f32 %f170, %f56, %f151, %f166;
fma.rn.f32 %f171, %f56, %f150, %f167;
fma.rn.f32 %f172, %f56, %f149, %f168;
fma.rn.f32 %f173, %f67, %f160, %f169;
fma.rn.f32 %f174, %f67, %f159, %f170;
fma.rn.f32 %f175, %f67, %f158, %f171;
fma.rn.f32 %f176, %f67, %f157, %f172;
fma.rn.f32 %f177, %f70, %f164, %f173;
fma.rn.f32 %f178, %f70, %f163, %f174;
fma.rn.f32 %f179, %f70, %f162, %f175;
fma.rn.f32 %f180, %f70, %f161, %f176;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f181, %r68;
mov.b32 %f182, %r67;
mov.b32 %f183, %r66;
mov.b32 %f184, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f185, %r72;
mov.b32 %f186, %r71;
mov.b32 %f187, %r70;
mov.b32 %f188, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f189, %r76;
mov.b32 %f190, %r75;
mov.b32 %f191, %r74;
mov.b32 %f192, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f193, %r80;
mov.b32 %f194, %r79;
mov.b32 %f195, %r78;
mov.b32 %f196, %r77;
mul.f32 %f197, %f62, %f188;
mul.f32 %f198, %f62, %f187;
mul.f32 %f199, %f62, %f186;
mul.f32 %f200, %f62, %f185;
fma.rn.f32 %f201, %f56, %f184, %f197;
fma.rn.f32 %f202, %f56, %f183, %f198;
fma.rn.f32 %f203, %f56, %f182, %f199;
fma.rn.f32 %f204, %f56, %f181, %f200;
fma.rn.f32 %f205, %f67, %f192, %f201;
fma.rn.f32 %f206, %f67, %f191, %f202;
fma.rn.f32 %f207, %f67, %f190, %f203;
fma.rn.f32 %f208, %f67, %f189, %f204;
fma.rn.f32 %f209, %f70, %f196, %f205;
fma.rn.f32 %f210, %f70, %f195, %f206;
fma.rn.f32 %f211, %f70, %f194, %f207;
fma.rn.f32 %f212, %f70, %f193, %f208;
mul.f32 %f213, %f77, %f145;
mul.f32 %f214, %f77, %f146;
mul.f32 %f215, %f77, %f147;
mul.f32 %f216, %f77, %f148;
fma.rn.f32 %f217, %f74, %f113, %f213;
fma.rn.f32 %f218, %f74, %f114, %f214;
fma.rn.f32 %f219, %f74, %f115, %f215;
fma.rn.f32 %f220, %f74, %f116, %f216;
fma.rn.f32 %f221, %f81, %f177, %f217;
fma.rn.f32 %f222, %f81, %f178, %f218;
fma.rn.f32 %f223, %f81, %f179, %f219;
fma.rn.f32 %f224, %f81, %f180, %f220;
fma.rn.f32 %f225, %f84, %f209, %f221;
fma.rn.f32 %f226, %f84, %f210, %f222;
fma.rn.f32 %f227, %f84, %f211, %f223;
fma.rn.f32 %f228, %f84, %f212, %f224;
mul.f32 %f229, %f225, 0f437F0000;
mul.f32 %f230, %f226, 0f437F0000;
mul.f32 %f231, %f227, 0f437F0000;
mul.f32 %f232, %f228, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f229;
cvt.rzi.u16.f32 %rs2, %f230;
cvt.rzi.u16.f32 %rs3, %f231;
cvt.rzi.u16.f32 %rs4, %f232;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v4.u8 [%rd27], {%rs1, %rs2, %rs3, %rs4};
$L__BB232_2:
ret;
}
// .globl Subsample_Bicubic_bgr0_bgr0_uv
.visible .entry Subsample_Bicubic_bgr0_bgr0_uv(
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_0,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_1,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_2,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_3,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_4,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_5,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_6,
.param .u64 Subsample_Bicubic_bgr0_bgr0_uv_param_7,
.param .u32 Subsample_Bicubic_bgr0_bgr0_uv_param_8,
.param .u32 Subsample_Bicubic_bgr0_bgr0_uv_param_9,
.param .u32 Subsample_Bicubic_bgr0_bgr0_uv_param_10,
.param .u32 Subsample_Bicubic_bgr0_bgr0_uv_param_11,
.param .u32 Subsample_Bicubic_bgr0_bgr0_uv_param_12,
.param .f32 Subsample_Bicubic_bgr0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bicubic_rgb0_rgb0
.visible .entry Subsample_Bicubic_rgb0_rgb0(
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_0,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_1,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_2,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_3,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_4,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_5,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_6,
.param .u64 Subsample_Bicubic_rgb0_rgb0_param_7,
.param .u32 Subsample_Bicubic_rgb0_rgb0_param_8,
.param .u32 Subsample_Bicubic_rgb0_rgb0_param_9,
.param .u32 Subsample_Bicubic_rgb0_rgb0_param_10,
.param .u32 Subsample_Bicubic_rgb0_rgb0_param_11,
.param .u32 Subsample_Bicubic_rgb0_rgb0_param_12,
.param .f32 Subsample_Bicubic_rgb0_rgb0_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<233>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_rgb0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_rgb0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB234_2;
bra.uni $L__BB234_1;
$L__BB234_1:
ld.param.f32 %f1, [Subsample_Bicubic_rgb0_rgb0_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_rgb0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_rgb0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_rgb0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_rgb0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_rgb0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r20;
mov.b32 %f86, %r19;
mov.b32 %f87, %r18;
mov.b32 %f88, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f89, %r24;
mov.b32 %f90, %r23;
mov.b32 %f91, %r22;
mov.b32 %f92, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f93, %r28;
mov.b32 %f94, %r27;
mov.b32 %f95, %r26;
mov.b32 %f96, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f97, %r32;
mov.b32 %f98, %r31;
mov.b32 %f99, %r30;
mov.b32 %f100, %r29;
mul.f32 %f101, %f62, %f92;
mul.f32 %f102, %f62, %f91;
mul.f32 %f103, %f62, %f90;
mul.f32 %f104, %f62, %f89;
fma.rn.f32 %f105, %f56, %f88, %f101;
fma.rn.f32 %f106, %f56, %f87, %f102;
fma.rn.f32 %f107, %f56, %f86, %f103;
fma.rn.f32 %f108, %f56, %f85, %f104;
fma.rn.f32 %f109, %f67, %f96, %f105;
fma.rn.f32 %f110, %f67, %f95, %f106;
fma.rn.f32 %f111, %f67, %f94, %f107;
fma.rn.f32 %f112, %f67, %f93, %f108;
fma.rn.f32 %f113, %f70, %f100, %f109;
fma.rn.f32 %f114, %f70, %f99, %f110;
fma.rn.f32 %f115, %f70, %f98, %f111;
fma.rn.f32 %f116, %f70, %f97, %f112;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f117, %r36;
mov.b32 %f118, %r35;
mov.b32 %f119, %r34;
mov.b32 %f120, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f121, %r40;
mov.b32 %f122, %r39;
mov.b32 %f123, %r38;
mov.b32 %f124, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f125, %r44;
mov.b32 %f126, %r43;
mov.b32 %f127, %r42;
mov.b32 %f128, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f129, %r48;
mov.b32 %f130, %r47;
mov.b32 %f131, %r46;
mov.b32 %f132, %r45;
mul.f32 %f133, %f62, %f124;
mul.f32 %f134, %f62, %f123;
mul.f32 %f135, %f62, %f122;
mul.f32 %f136, %f62, %f121;
fma.rn.f32 %f137, %f56, %f120, %f133;
fma.rn.f32 %f138, %f56, %f119, %f134;
fma.rn.f32 %f139, %f56, %f118, %f135;
fma.rn.f32 %f140, %f56, %f117, %f136;
fma.rn.f32 %f141, %f67, %f128, %f137;
fma.rn.f32 %f142, %f67, %f127, %f138;
fma.rn.f32 %f143, %f67, %f126, %f139;
fma.rn.f32 %f144, %f67, %f125, %f140;
fma.rn.f32 %f145, %f70, %f132, %f141;
fma.rn.f32 %f146, %f70, %f131, %f142;
fma.rn.f32 %f147, %f70, %f130, %f143;
fma.rn.f32 %f148, %f70, %f129, %f144;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f149, %r52;
mov.b32 %f150, %r51;
mov.b32 %f151, %r50;
mov.b32 %f152, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f153, %r56;
mov.b32 %f154, %r55;
mov.b32 %f155, %r54;
mov.b32 %f156, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f157, %r60;
mov.b32 %f158, %r59;
mov.b32 %f159, %r58;
mov.b32 %f160, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f161, %r64;
mov.b32 %f162, %r63;
mov.b32 %f163, %r62;
mov.b32 %f164, %r61;
mul.f32 %f165, %f62, %f156;
mul.f32 %f166, %f62, %f155;
mul.f32 %f167, %f62, %f154;
mul.f32 %f168, %f62, %f153;
fma.rn.f32 %f169, %f56, %f152, %f165;
fma.rn.f32 %f170, %f56, %f151, %f166;
fma.rn.f32 %f171, %f56, %f150, %f167;
fma.rn.f32 %f172, %f56, %f149, %f168;
fma.rn.f32 %f173, %f67, %f160, %f169;
fma.rn.f32 %f174, %f67, %f159, %f170;
fma.rn.f32 %f175, %f67, %f158, %f171;
fma.rn.f32 %f176, %f67, %f157, %f172;
fma.rn.f32 %f177, %f70, %f164, %f173;
fma.rn.f32 %f178, %f70, %f163, %f174;
fma.rn.f32 %f179, %f70, %f162, %f175;
fma.rn.f32 %f180, %f70, %f161, %f176;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f181, %r68;
mov.b32 %f182, %r67;
mov.b32 %f183, %r66;
mov.b32 %f184, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f185, %r72;
mov.b32 %f186, %r71;
mov.b32 %f187, %r70;
mov.b32 %f188, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f189, %r76;
mov.b32 %f190, %r75;
mov.b32 %f191, %r74;
mov.b32 %f192, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f193, %r80;
mov.b32 %f194, %r79;
mov.b32 %f195, %r78;
mov.b32 %f196, %r77;
mul.f32 %f197, %f62, %f188;
mul.f32 %f198, %f62, %f187;
mul.f32 %f199, %f62, %f186;
mul.f32 %f200, %f62, %f185;
fma.rn.f32 %f201, %f56, %f184, %f197;
fma.rn.f32 %f202, %f56, %f183, %f198;
fma.rn.f32 %f203, %f56, %f182, %f199;
fma.rn.f32 %f204, %f56, %f181, %f200;
fma.rn.f32 %f205, %f67, %f192, %f201;
fma.rn.f32 %f206, %f67, %f191, %f202;
fma.rn.f32 %f207, %f67, %f190, %f203;
fma.rn.f32 %f208, %f67, %f189, %f204;
fma.rn.f32 %f209, %f70, %f196, %f205;
fma.rn.f32 %f210, %f70, %f195, %f206;
fma.rn.f32 %f211, %f70, %f194, %f207;
fma.rn.f32 %f212, %f70, %f193, %f208;
mul.f32 %f213, %f77, %f145;
mul.f32 %f214, %f77, %f146;
mul.f32 %f215, %f77, %f147;
mul.f32 %f216, %f77, %f148;
fma.rn.f32 %f217, %f74, %f113, %f213;
fma.rn.f32 %f218, %f74, %f114, %f214;
fma.rn.f32 %f219, %f74, %f115, %f215;
fma.rn.f32 %f220, %f74, %f116, %f216;
fma.rn.f32 %f221, %f81, %f177, %f217;
fma.rn.f32 %f222, %f81, %f178, %f218;
fma.rn.f32 %f223, %f81, %f179, %f219;
fma.rn.f32 %f224, %f81, %f180, %f220;
fma.rn.f32 %f225, %f84, %f209, %f221;
fma.rn.f32 %f226, %f84, %f210, %f222;
fma.rn.f32 %f227, %f84, %f211, %f223;
fma.rn.f32 %f228, %f84, %f212, %f224;
mul.f32 %f229, %f225, 0f437F0000;
mul.f32 %f230, %f226, 0f437F0000;
mul.f32 %f231, %f227, 0f437F0000;
mul.f32 %f232, %f228, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f229;
cvt.rzi.u16.f32 %rs2, %f230;
cvt.rzi.u16.f32 %rs3, %f231;
cvt.rzi.u16.f32 %rs4, %f232;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v4.u8 [%rd27], {%rs1, %rs2, %rs3, %rs4};
$L__BB234_2:
ret;
}
// .globl Subsample_Bicubic_rgb0_rgb0_uv
.visible .entry Subsample_Bicubic_rgb0_rgb0_uv(
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_0,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_1,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_2,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_3,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_4,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_5,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_6,
.param .u64 Subsample_Bicubic_rgb0_rgb0_uv_param_7,
.param .u32 Subsample_Bicubic_rgb0_rgb0_uv_param_8,
.param .u32 Subsample_Bicubic_rgb0_rgb0_uv_param_9,
.param .u32 Subsample_Bicubic_rgb0_rgb0_uv_param_10,
.param .u32 Subsample_Bicubic_rgb0_rgb0_uv_param_11,
.param .u32 Subsample_Bicubic_rgb0_rgb0_uv_param_12,
.param .f32 Subsample_Bicubic_rgb0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bicubic_bgr0_rgb0
.visible .entry Subsample_Bicubic_bgr0_rgb0(
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_0,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_1,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_2,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_3,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_4,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_5,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_6,
.param .u64 Subsample_Bicubic_bgr0_rgb0_param_7,
.param .u32 Subsample_Bicubic_bgr0_rgb0_param_8,
.param .u32 Subsample_Bicubic_bgr0_rgb0_param_9,
.param .u32 Subsample_Bicubic_bgr0_rgb0_param_10,
.param .u32 Subsample_Bicubic_bgr0_rgb0_param_11,
.param .u32 Subsample_Bicubic_bgr0_rgb0_param_12,
.param .f32 Subsample_Bicubic_bgr0_rgb0_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<233>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_bgr0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_bgr0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB236_2;
bra.uni $L__BB236_1;
$L__BB236_1:
ld.param.f32 %f1, [Subsample_Bicubic_bgr0_rgb0_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_bgr0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_bgr0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_bgr0_rgb0_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_bgr0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_bgr0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r20;
mov.b32 %f86, %r19;
mov.b32 %f87, %r18;
mov.b32 %f88, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f89, %r24;
mov.b32 %f90, %r23;
mov.b32 %f91, %r22;
mov.b32 %f92, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f93, %r28;
mov.b32 %f94, %r27;
mov.b32 %f95, %r26;
mov.b32 %f96, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f97, %r32;
mov.b32 %f98, %r31;
mov.b32 %f99, %r30;
mov.b32 %f100, %r29;
mul.f32 %f101, %f62, %f92;
mul.f32 %f102, %f62, %f91;
mul.f32 %f103, %f62, %f90;
mul.f32 %f104, %f62, %f89;
fma.rn.f32 %f105, %f56, %f88, %f101;
fma.rn.f32 %f106, %f56, %f87, %f102;
fma.rn.f32 %f107, %f56, %f86, %f103;
fma.rn.f32 %f108, %f56, %f85, %f104;
fma.rn.f32 %f109, %f67, %f96, %f105;
fma.rn.f32 %f110, %f67, %f95, %f106;
fma.rn.f32 %f111, %f67, %f94, %f107;
fma.rn.f32 %f112, %f67, %f93, %f108;
fma.rn.f32 %f113, %f70, %f100, %f109;
fma.rn.f32 %f114, %f70, %f99, %f110;
fma.rn.f32 %f115, %f70, %f98, %f111;
fma.rn.f32 %f116, %f70, %f97, %f112;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f117, %r36;
mov.b32 %f118, %r35;
mov.b32 %f119, %r34;
mov.b32 %f120, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f121, %r40;
mov.b32 %f122, %r39;
mov.b32 %f123, %r38;
mov.b32 %f124, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f125, %r44;
mov.b32 %f126, %r43;
mov.b32 %f127, %r42;
mov.b32 %f128, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f129, %r48;
mov.b32 %f130, %r47;
mov.b32 %f131, %r46;
mov.b32 %f132, %r45;
mul.f32 %f133, %f62, %f124;
mul.f32 %f134, %f62, %f123;
mul.f32 %f135, %f62, %f122;
mul.f32 %f136, %f62, %f121;
fma.rn.f32 %f137, %f56, %f120, %f133;
fma.rn.f32 %f138, %f56, %f119, %f134;
fma.rn.f32 %f139, %f56, %f118, %f135;
fma.rn.f32 %f140, %f56, %f117, %f136;
fma.rn.f32 %f141, %f67, %f128, %f137;
fma.rn.f32 %f142, %f67, %f127, %f138;
fma.rn.f32 %f143, %f67, %f126, %f139;
fma.rn.f32 %f144, %f67, %f125, %f140;
fma.rn.f32 %f145, %f70, %f132, %f141;
fma.rn.f32 %f146, %f70, %f131, %f142;
fma.rn.f32 %f147, %f70, %f130, %f143;
fma.rn.f32 %f148, %f70, %f129, %f144;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f149, %r52;
mov.b32 %f150, %r51;
mov.b32 %f151, %r50;
mov.b32 %f152, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f153, %r56;
mov.b32 %f154, %r55;
mov.b32 %f155, %r54;
mov.b32 %f156, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f157, %r60;
mov.b32 %f158, %r59;
mov.b32 %f159, %r58;
mov.b32 %f160, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f161, %r64;
mov.b32 %f162, %r63;
mov.b32 %f163, %r62;
mov.b32 %f164, %r61;
mul.f32 %f165, %f62, %f156;
mul.f32 %f166, %f62, %f155;
mul.f32 %f167, %f62, %f154;
mul.f32 %f168, %f62, %f153;
fma.rn.f32 %f169, %f56, %f152, %f165;
fma.rn.f32 %f170, %f56, %f151, %f166;
fma.rn.f32 %f171, %f56, %f150, %f167;
fma.rn.f32 %f172, %f56, %f149, %f168;
fma.rn.f32 %f173, %f67, %f160, %f169;
fma.rn.f32 %f174, %f67, %f159, %f170;
fma.rn.f32 %f175, %f67, %f158, %f171;
fma.rn.f32 %f176, %f67, %f157, %f172;
fma.rn.f32 %f177, %f70, %f164, %f173;
fma.rn.f32 %f178, %f70, %f163, %f174;
fma.rn.f32 %f179, %f70, %f162, %f175;
fma.rn.f32 %f180, %f70, %f161, %f176;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f181, %r68;
mov.b32 %f182, %r67;
mov.b32 %f183, %r66;
mov.b32 %f184, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f185, %r72;
mov.b32 %f186, %r71;
mov.b32 %f187, %r70;
mov.b32 %f188, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f189, %r76;
mov.b32 %f190, %r75;
mov.b32 %f191, %r74;
mov.b32 %f192, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f193, %r80;
mov.b32 %f194, %r79;
mov.b32 %f195, %r78;
mov.b32 %f196, %r77;
mul.f32 %f197, %f62, %f188;
mul.f32 %f198, %f62, %f187;
mul.f32 %f199, %f62, %f186;
mul.f32 %f200, %f62, %f185;
fma.rn.f32 %f201, %f56, %f184, %f197;
fma.rn.f32 %f202, %f56, %f183, %f198;
fma.rn.f32 %f203, %f56, %f182, %f199;
fma.rn.f32 %f204, %f56, %f181, %f200;
fma.rn.f32 %f205, %f67, %f192, %f201;
fma.rn.f32 %f206, %f67, %f191, %f202;
fma.rn.f32 %f207, %f67, %f190, %f203;
fma.rn.f32 %f208, %f67, %f189, %f204;
fma.rn.f32 %f209, %f70, %f196, %f205;
fma.rn.f32 %f210, %f70, %f195, %f206;
fma.rn.f32 %f211, %f70, %f194, %f207;
fma.rn.f32 %f212, %f70, %f193, %f208;
mul.f32 %f213, %f77, %f145;
mul.f32 %f214, %f77, %f146;
mul.f32 %f215, %f77, %f147;
mul.f32 %f216, %f77, %f148;
fma.rn.f32 %f217, %f74, %f113, %f213;
fma.rn.f32 %f218, %f74, %f114, %f214;
fma.rn.f32 %f219, %f74, %f115, %f215;
fma.rn.f32 %f220, %f74, %f116, %f216;
fma.rn.f32 %f221, %f81, %f177, %f217;
fma.rn.f32 %f222, %f81, %f178, %f218;
fma.rn.f32 %f223, %f81, %f179, %f219;
fma.rn.f32 %f224, %f81, %f180, %f220;
fma.rn.f32 %f225, %f84, %f209, %f221;
fma.rn.f32 %f226, %f84, %f210, %f222;
fma.rn.f32 %f227, %f84, %f211, %f223;
fma.rn.f32 %f228, %f84, %f212, %f224;
mul.f32 %f229, %f225, 0f437F0000;
mul.f32 %f230, %f226, 0f437F0000;
mul.f32 %f231, %f227, 0f437F0000;
mul.f32 %f232, %f228, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f229;
cvt.rzi.u16.f32 %rs2, %f230;
cvt.rzi.u16.f32 %rs3, %f231;
cvt.rzi.u16.f32 %rs4, %f232;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v4.u8 [%rd27], {%rs3, %rs2, %rs1, %rs4};
$L__BB236_2:
ret;
}
// .globl Subsample_Bicubic_bgr0_rgb0_uv
.visible .entry Subsample_Bicubic_bgr0_rgb0_uv(
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_0,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_1,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_2,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_3,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_4,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_5,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_6,
.param .u64 Subsample_Bicubic_bgr0_rgb0_uv_param_7,
.param .u32 Subsample_Bicubic_bgr0_rgb0_uv_param_8,
.param .u32 Subsample_Bicubic_bgr0_rgb0_uv_param_9,
.param .u32 Subsample_Bicubic_bgr0_rgb0_uv_param_10,
.param .u32 Subsample_Bicubic_bgr0_rgb0_uv_param_11,
.param .u32 Subsample_Bicubic_bgr0_rgb0_uv_param_12,
.param .f32 Subsample_Bicubic_bgr0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Bicubic_rgb0_bgr0
.visible .entry Subsample_Bicubic_rgb0_bgr0(
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_0,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_1,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_2,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_3,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_4,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_5,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_6,
.param .u64 Subsample_Bicubic_rgb0_bgr0_param_7,
.param .u32 Subsample_Bicubic_rgb0_bgr0_param_8,
.param .u32 Subsample_Bicubic_rgb0_bgr0_param_9,
.param .u32 Subsample_Bicubic_rgb0_bgr0_param_10,
.param .u32 Subsample_Bicubic_rgb0_bgr0_param_11,
.param .u32 Subsample_Bicubic_rgb0_bgr0_param_12,
.param .f32 Subsample_Bicubic_rgb0_bgr0_param_13
)
{
.reg .pred %p<5>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<233>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Bicubic_rgb0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Bicubic_rgb0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB238_2;
bra.uni $L__BB238_1;
$L__BB238_1:
ld.param.f32 %f1, [Subsample_Bicubic_rgb0_bgr0_param_13];
ld.param.u32 %r7, [Subsample_Bicubic_rgb0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Bicubic_rgb0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Bicubic_rgb0_bgr0_param_10];
ld.param.u64 %rd4, [Subsample_Bicubic_rgb0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Bicubic_rgb0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvt.rn.f32.s32 %f34, %r6;
cvt.rn.f32.s32 %f35, %r3;
div.rn.f32 %f36, %f34, %f35;
cvt.rn.f32.s32 %f37, %r7;
cvt.rn.f32.s32 %f38, %r4;
div.rn.f32 %f39, %f37, %f38;
cvt.rn.f32.s32 %f40, %r1;
add.f32 %f41, %f40, 0f3F000000;
fma.rn.f32 %f42, %f36, %f41, 0fBF000000;
cvt.rn.f32.s32 %f43, %r2;
add.f32 %f44, %f43, 0f3F000000;
fma.rn.f32 %f45, %f39, %f44, 0fBF000000;
cvt.rmi.f32.f32 %f4, %f42;
cvt.rmi.f32.f32 %f11, %f45;
sub.f32 %f46, %f42, %f4;
sub.f32 %f47, %f45, %f11;
setp.eq.f32 %p4, %f1, 0f497423F0;
neg.f32 %f48, %f1;
selp.f32 %f49, 0f00000000, %f48, %p4;
add.f32 %f50, %f46, 0f3F800000;
mul.f32 %f51, %f49, 0fC0A00000;
fma.rn.f32 %f52, %f49, %f50, %f51;
mul.f32 %f53, %f49, 0f41000000;
fma.rn.f32 %f54, %f50, %f52, %f53;
mul.f32 %f55, %f49, 0fC0800000;
fma.rn.f32 %f56, %f50, %f54, %f55;
add.f32 %f57, %f49, 0f40000000;
add.f32 %f58, %f49, 0f40400000;
neg.f32 %f59, %f58;
fma.rn.f32 %f60, %f57, %f46, %f59;
mul.f32 %f61, %f46, %f60;
fma.rn.f32 %f62, %f46, %f61, 0f3F800000;
mov.f32 %f63, 0f3F800000;
sub.f32 %f64, %f63, %f46;
fma.rn.f32 %f65, %f57, %f64, %f59;
mul.f32 %f66, %f64, %f65;
fma.rn.f32 %f67, %f64, %f66, 0f3F800000;
sub.f32 %f68, %f63, %f56;
sub.f32 %f69, %f68, %f62;
sub.f32 %f70, %f69, %f67;
add.f32 %f71, %f47, 0f3F800000;
fma.rn.f32 %f72, %f49, %f71, %f51;
fma.rn.f32 %f73, %f71, %f72, %f53;
fma.rn.f32 %f74, %f71, %f73, %f55;
fma.rn.f32 %f75, %f57, %f47, %f59;
mul.f32 %f76, %f47, %f75;
fma.rn.f32 %f77, %f47, %f76, 0f3F800000;
sub.f32 %f78, %f63, %f47;
fma.rn.f32 %f79, %f57, %f78, %f59;
mul.f32 %f80, %f78, %f79;
fma.rn.f32 %f81, %f78, %f80, 0f3F800000;
sub.f32 %f82, %f63, %f74;
sub.f32 %f83, %f82, %f77;
sub.f32 %f84, %f83, %f81;
add.f32 %f2, %f4, 0fBF800000;
add.f32 %f3, %f11, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f2, %f3}];
// end inline asm
mov.b32 %f85, %r20;
mov.b32 %f86, %r19;
mov.b32 %f87, %r18;
mov.b32 %f88, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f4, %f3}];
// end inline asm
mov.b32 %f89, %r24;
mov.b32 %f90, %r23;
mov.b32 %f91, %r22;
mov.b32 %f92, %r21;
add.f32 %f6, %f4, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f6, %f3}];
// end inline asm
mov.b32 %f93, %r28;
mov.b32 %f94, %r27;
mov.b32 %f95, %r26;
mov.b32 %f96, %r25;
add.f32 %f8, %f4, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f8, %f3}];
// end inline asm
mov.b32 %f97, %r32;
mov.b32 %f98, %r31;
mov.b32 %f99, %r30;
mov.b32 %f100, %r29;
mul.f32 %f101, %f62, %f92;
mul.f32 %f102, %f62, %f91;
mul.f32 %f103, %f62, %f90;
mul.f32 %f104, %f62, %f89;
fma.rn.f32 %f105, %f56, %f88, %f101;
fma.rn.f32 %f106, %f56, %f87, %f102;
fma.rn.f32 %f107, %f56, %f86, %f103;
fma.rn.f32 %f108, %f56, %f85, %f104;
fma.rn.f32 %f109, %f67, %f96, %f105;
fma.rn.f32 %f110, %f67, %f95, %f106;
fma.rn.f32 %f111, %f67, %f94, %f107;
fma.rn.f32 %f112, %f67, %f93, %f108;
fma.rn.f32 %f113, %f70, %f100, %f109;
fma.rn.f32 %f114, %f70, %f99, %f110;
fma.rn.f32 %f115, %f70, %f98, %f111;
fma.rn.f32 %f116, %f70, %f97, %f112;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f2, %f11}];
// end inline asm
mov.b32 %f117, %r36;
mov.b32 %f118, %r35;
mov.b32 %f119, %r34;
mov.b32 %f120, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f4, %f11}];
// end inline asm
mov.b32 %f121, %r40;
mov.b32 %f122, %r39;
mov.b32 %f123, %r38;
mov.b32 %f124, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f6, %f11}];
// end inline asm
mov.b32 %f125, %r44;
mov.b32 %f126, %r43;
mov.b32 %f127, %r42;
mov.b32 %f128, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f8, %f11}];
// end inline asm
mov.b32 %f129, %r48;
mov.b32 %f130, %r47;
mov.b32 %f131, %r46;
mov.b32 %f132, %r45;
mul.f32 %f133, %f62, %f124;
mul.f32 %f134, %f62, %f123;
mul.f32 %f135, %f62, %f122;
mul.f32 %f136, %f62, %f121;
fma.rn.f32 %f137, %f56, %f120, %f133;
fma.rn.f32 %f138, %f56, %f119, %f134;
fma.rn.f32 %f139, %f56, %f118, %f135;
fma.rn.f32 %f140, %f56, %f117, %f136;
fma.rn.f32 %f141, %f67, %f128, %f137;
fma.rn.f32 %f142, %f67, %f127, %f138;
fma.rn.f32 %f143, %f67, %f126, %f139;
fma.rn.f32 %f144, %f67, %f125, %f140;
fma.rn.f32 %f145, %f70, %f132, %f141;
fma.rn.f32 %f146, %f70, %f131, %f142;
fma.rn.f32 %f147, %f70, %f130, %f143;
fma.rn.f32 %f148, %f70, %f129, %f144;
add.f32 %f19, %f11, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f2, %f19}];
// end inline asm
mov.b32 %f149, %r52;
mov.b32 %f150, %r51;
mov.b32 %f151, %r50;
mov.b32 %f152, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f4, %f19}];
// end inline asm
mov.b32 %f153, %r56;
mov.b32 %f154, %r55;
mov.b32 %f155, %r54;
mov.b32 %f156, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f6, %f19}];
// end inline asm
mov.b32 %f157, %r60;
mov.b32 %f158, %r59;
mov.b32 %f159, %r58;
mov.b32 %f160, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f8, %f19}];
// end inline asm
mov.b32 %f161, %r64;
mov.b32 %f162, %r63;
mov.b32 %f163, %r62;
mov.b32 %f164, %r61;
mul.f32 %f165, %f62, %f156;
mul.f32 %f166, %f62, %f155;
mul.f32 %f167, %f62, %f154;
mul.f32 %f168, %f62, %f153;
fma.rn.f32 %f169, %f56, %f152, %f165;
fma.rn.f32 %f170, %f56, %f151, %f166;
fma.rn.f32 %f171, %f56, %f150, %f167;
fma.rn.f32 %f172, %f56, %f149, %f168;
fma.rn.f32 %f173, %f67, %f160, %f169;
fma.rn.f32 %f174, %f67, %f159, %f170;
fma.rn.f32 %f175, %f67, %f158, %f171;
fma.rn.f32 %f176, %f67, %f157, %f172;
fma.rn.f32 %f177, %f70, %f164, %f173;
fma.rn.f32 %f178, %f70, %f163, %f174;
fma.rn.f32 %f179, %f70, %f162, %f175;
fma.rn.f32 %f180, %f70, %f161, %f176;
add.f32 %f27, %f11, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f2, %f27}];
// end inline asm
mov.b32 %f181, %r68;
mov.b32 %f182, %r67;
mov.b32 %f183, %r66;
mov.b32 %f184, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f4, %f27}];
// end inline asm
mov.b32 %f185, %r72;
mov.b32 %f186, %r71;
mov.b32 %f187, %r70;
mov.b32 %f188, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f6, %f27}];
// end inline asm
mov.b32 %f189, %r76;
mov.b32 %f190, %r75;
mov.b32 %f191, %r74;
mov.b32 %f192, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f8, %f27}];
// end inline asm
mov.b32 %f193, %r80;
mov.b32 %f194, %r79;
mov.b32 %f195, %r78;
mov.b32 %f196, %r77;
mul.f32 %f197, %f62, %f188;
mul.f32 %f198, %f62, %f187;
mul.f32 %f199, %f62, %f186;
mul.f32 %f200, %f62, %f185;
fma.rn.f32 %f201, %f56, %f184, %f197;
fma.rn.f32 %f202, %f56, %f183, %f198;
fma.rn.f32 %f203, %f56, %f182, %f199;
fma.rn.f32 %f204, %f56, %f181, %f200;
fma.rn.f32 %f205, %f67, %f192, %f201;
fma.rn.f32 %f206, %f67, %f191, %f202;
fma.rn.f32 %f207, %f67, %f190, %f203;
fma.rn.f32 %f208, %f67, %f189, %f204;
fma.rn.f32 %f209, %f70, %f196, %f205;
fma.rn.f32 %f210, %f70, %f195, %f206;
fma.rn.f32 %f211, %f70, %f194, %f207;
fma.rn.f32 %f212, %f70, %f193, %f208;
mul.f32 %f213, %f77, %f145;
mul.f32 %f214, %f77, %f146;
mul.f32 %f215, %f77, %f147;
mul.f32 %f216, %f77, %f148;
fma.rn.f32 %f217, %f74, %f113, %f213;
fma.rn.f32 %f218, %f74, %f114, %f214;
fma.rn.f32 %f219, %f74, %f115, %f215;
fma.rn.f32 %f220, %f74, %f116, %f216;
fma.rn.f32 %f221, %f81, %f177, %f217;
fma.rn.f32 %f222, %f81, %f178, %f218;
fma.rn.f32 %f223, %f81, %f179, %f219;
fma.rn.f32 %f224, %f81, %f180, %f220;
fma.rn.f32 %f225, %f84, %f209, %f221;
fma.rn.f32 %f226, %f84, %f210, %f222;
fma.rn.f32 %f227, %f84, %f211, %f223;
fma.rn.f32 %f228, %f84, %f212, %f224;
mul.f32 %f229, %f225, 0f437F0000;
mul.f32 %f230, %f226, 0f437F0000;
mul.f32 %f231, %f227, 0f437F0000;
mul.f32 %f232, %f228, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f229;
cvt.rzi.u16.f32 %rs2, %f230;
cvt.rzi.u16.f32 %rs3, %f231;
cvt.rzi.u16.f32 %rs4, %f232;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v4.u8 [%rd27], {%rs3, %rs2, %rs1, %rs4};
$L__BB238_2:
ret;
}
// .globl Subsample_Bicubic_rgb0_bgr0_uv
.visible .entry Subsample_Bicubic_rgb0_bgr0_uv(
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_0,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_1,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_2,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_3,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_4,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_5,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_6,
.param .u64 Subsample_Bicubic_rgb0_bgr0_uv_param_7,
.param .u32 Subsample_Bicubic_rgb0_bgr0_uv_param_8,
.param .u32 Subsample_Bicubic_rgb0_bgr0_uv_param_9,
.param .u32 Subsample_Bicubic_rgb0_bgr0_uv_param_10,
.param .u32 Subsample_Bicubic_rgb0_bgr0_uv_param_11,
.param .u32 Subsample_Bicubic_rgb0_bgr0_uv_param_12,
.param .f32 Subsample_Bicubic_rgb0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv420p
.visible .entry Subsample_Lanczos_yuv420p_yuv420p(
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB240_18;
bra.uni $L__BB240_1;
$L__BB240_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB240_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB240_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB240_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB240_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB240_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB240_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB240_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB240_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB240_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB240_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB240_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB240_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB240_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB240_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB240_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB240_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB240_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv420p_uv
.visible .entry Subsample_Lanczos_yuv420p_yuv420p_uv(
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv420p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB241_34;
bra.uni $L__BB241_1;
$L__BB241_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB241_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB241_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB241_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB241_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB241_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB241_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB241_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB241_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB241_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB241_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB241_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB241_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB241_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB241_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB241_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB241_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs1;
mov.f32 %f331, %f338;
@%p4 bra $L__BB241_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB241_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB241_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB241_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB241_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB241_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB241_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB241_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB241_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB241_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB241_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB241_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB241_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB241_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv420p_yuv420p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB241_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB241_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f322;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs2;
$L__BB241_34:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv420p
.visible .entry Subsample_Lanczos_nv12_yuv420p(
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv420p_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv420p_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv420p_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv420p_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv420p_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv420p_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB242_18;
bra.uni $L__BB242_1;
$L__BB242_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB242_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB242_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB242_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB242_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB242_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB242_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB242_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB242_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB242_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB242_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB242_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB242_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB242_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB242_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB242_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB242_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB242_18:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv420p_uv
.visible .entry Subsample_Lanczos_nv12_yuv420p_uv(
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv420p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB243_18;
bra.uni $L__BB243_1;
$L__BB243_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB243_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB243_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB243_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB243_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB243_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB243_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB243_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB243_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB243_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB243_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB243_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB243_13:
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv420p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_nv12_yuv420p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB243_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB243_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_nv12_yuv420p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB243_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB243_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs1;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs2;
$L__BB243_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv420p
.visible .entry Subsample_Lanczos_yuv444p_yuv420p(
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB244_18;
bra.uni $L__BB244_1;
$L__BB244_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB244_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB244_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB244_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB244_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB244_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB244_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB244_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB244_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB244_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB244_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB244_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB244_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB244_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB244_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB244_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB244_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB244_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv420p_uv
.visible .entry Subsample_Lanczos_yuv444p_yuv420p_uv(
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv420p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB245_34;
bra.uni $L__BB245_1;
$L__BB245_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB245_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB245_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB245_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB245_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB245_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB245_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB245_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB245_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB245_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB245_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB245_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB245_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB245_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB245_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB245_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB245_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs1;
mov.f32 %f331, %f338;
@%p4 bra $L__BB245_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB245_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB245_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB245_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB245_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB245_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB245_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB245_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB245_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB245_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB245_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB245_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB245_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB245_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv444p_yuv420p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB245_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB245_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f322;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs2;
$L__BB245_34:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv420p
.visible .entry Subsample_Lanczos_p010le_yuv420p(
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv420p_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv420p_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv420p_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv420p_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv420p_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv420p_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB246_18;
bra.uni $L__BB246_1;
$L__BB246_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB246_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB246_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB246_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB246_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB246_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB246_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB246_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB246_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB246_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB246_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB246_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB246_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB246_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB246_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB246_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB246_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB246_18:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv420p_uv
.visible .entry Subsample_Lanczos_p010le_yuv420p_uv(
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv420p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB247_18;
bra.uni $L__BB247_1;
$L__BB247_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB247_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB247_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB247_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB247_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB247_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB247_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB247_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB247_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB247_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB247_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB247_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB247_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv420p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p010le_yuv420p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB247_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB247_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p010le_yuv420p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB247_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB247_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB247_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv420p
.visible .entry Subsample_Lanczos_p016le_yuv420p(
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv420p_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv420p_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv420p_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv420p_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv420p_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv420p_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB248_18;
bra.uni $L__BB248_1;
$L__BB248_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB248_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB248_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB248_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB248_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB248_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB248_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB248_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB248_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB248_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB248_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB248_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB248_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB248_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB248_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB248_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB248_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB248_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv420p_uv
.visible .entry Subsample_Lanczos_p016le_yuv420p_uv(
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv420p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB249_18;
bra.uni $L__BB249_1;
$L__BB249_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB249_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB249_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB249_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB249_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB249_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB249_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB249_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB249_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB249_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB249_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB249_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB249_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv420p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p016le_yuv420p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB249_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB249_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv420p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p016le_yuv420p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB249_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB249_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB249_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv420p
.visible .entry Subsample_Lanczos_yuv444p16le_yuv420p(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv420p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv420p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv420p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB250_18;
bra.uni $L__BB250_1;
$L__BB250_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv420p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv420p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB250_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB250_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB250_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB250_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB250_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB250_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB250_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB250_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB250_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB250_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB250_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB250_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_yuv420p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB250_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB250_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv420p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_yuv420p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB250_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB250_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB250_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv420p_uv
.visible .entry Subsample_Lanczos_yuv444p16le_yuv420p_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB251_34;
bra.uni $L__BB251_1;
$L__BB251_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB251_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB251_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB251_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB251_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB251_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB251_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB251_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB251_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB251_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB251_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB251_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB251_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB251_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB251_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB251_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB251_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f208;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs2;
mov.f32 %f331, %f338;
@%p4 bra $L__BB251_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB251_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB251_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB251_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB251_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB251_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB251_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB251_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB251_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB251_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB251_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB251_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB251_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB251_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv444p16le_yuv420p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB251_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB251_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f322;
shr.u16 %rs4, %rs3, 8;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs4;
$L__BB251_34:
ret;
}
// .globl Subsample_Lanczos_yuv420p_nv12
.visible .entry Subsample_Lanczos_yuv420p_nv12(
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_0,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_1,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_2,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_3,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_4,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_5,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_6,
.param .u64 Subsample_Lanczos_yuv420p_nv12_param_7,
.param .u32 Subsample_Lanczos_yuv420p_nv12_param_8,
.param .u32 Subsample_Lanczos_yuv420p_nv12_param_9,
.param .u32 Subsample_Lanczos_yuv420p_nv12_param_10,
.param .u32 Subsample_Lanczos_yuv420p_nv12_param_11,
.param .u32 Subsample_Lanczos_yuv420p_nv12_param_12,
.param .f32 Subsample_Lanczos_yuv420p_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB252_18;
bra.uni $L__BB252_1;
$L__BB252_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB252_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB252_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB252_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB252_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB252_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB252_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB252_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB252_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB252_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB252_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB252_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB252_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB252_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB252_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB252_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB252_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB252_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_nv12_uv
.visible .entry Subsample_Lanczos_yuv420p_nv12_uv(
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_nv12_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB253_34;
bra.uni $L__BB253_1;
$L__BB253_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_nv12_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB253_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB253_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB253_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB253_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB253_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB253_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB253_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB253_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB253_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB253_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB253_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB253_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB253_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB253_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv420p_nv12_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB253_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB253_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB253_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB253_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB253_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB253_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB253_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB253_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB253_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB253_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB253_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB253_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB253_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB253_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_nv12_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB253_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB253_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv420p_nv12_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB253_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB253_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f370;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs1, %rs2};
$L__BB253_34:
ret;
}
// .globl Subsample_Lanczos_nv12_nv12
.visible .entry Subsample_Lanczos_nv12_nv12(
.param .u64 Subsample_Lanczos_nv12_nv12_param_0,
.param .u64 Subsample_Lanczos_nv12_nv12_param_1,
.param .u64 Subsample_Lanczos_nv12_nv12_param_2,
.param .u64 Subsample_Lanczos_nv12_nv12_param_3,
.param .u64 Subsample_Lanczos_nv12_nv12_param_4,
.param .u64 Subsample_Lanczos_nv12_nv12_param_5,
.param .u64 Subsample_Lanczos_nv12_nv12_param_6,
.param .u64 Subsample_Lanczos_nv12_nv12_param_7,
.param .u32 Subsample_Lanczos_nv12_nv12_param_8,
.param .u32 Subsample_Lanczos_nv12_nv12_param_9,
.param .u32 Subsample_Lanczos_nv12_nv12_param_10,
.param .u32 Subsample_Lanczos_nv12_nv12_param_11,
.param .u32 Subsample_Lanczos_nv12_nv12_param_12,
.param .f32 Subsample_Lanczos_nv12_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB254_18;
bra.uni $L__BB254_1;
$L__BB254_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB254_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB254_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB254_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB254_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB254_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB254_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB254_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB254_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB254_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB254_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB254_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB254_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB254_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB254_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB254_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB254_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB254_18:
ret;
}
// .globl Subsample_Lanczos_nv12_nv12_uv
.visible .entry Subsample_Lanczos_nv12_nv12_uv(
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_nv12_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB255_18;
bra.uni $L__BB255_1;
$L__BB255_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_nv12_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB255_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB255_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB255_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB255_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB255_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB255_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB255_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB255_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB255_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB255_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB255_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB255_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_nv12_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB255_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB255_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_nv12_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB255_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB255_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs1, %rs2};
$L__BB255_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_nv12
.visible .entry Subsample_Lanczos_yuv444p_nv12(
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_0,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_1,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_2,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_3,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_4,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_5,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_6,
.param .u64 Subsample_Lanczos_yuv444p_nv12_param_7,
.param .u32 Subsample_Lanczos_yuv444p_nv12_param_8,
.param .u32 Subsample_Lanczos_yuv444p_nv12_param_9,
.param .u32 Subsample_Lanczos_yuv444p_nv12_param_10,
.param .u32 Subsample_Lanczos_yuv444p_nv12_param_11,
.param .u32 Subsample_Lanczos_yuv444p_nv12_param_12,
.param .f32 Subsample_Lanczos_yuv444p_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB256_18;
bra.uni $L__BB256_1;
$L__BB256_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB256_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB256_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB256_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB256_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB256_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB256_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB256_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB256_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB256_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB256_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB256_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB256_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB256_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB256_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB256_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB256_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB256_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_nv12_uv
.visible .entry Subsample_Lanczos_yuv444p_nv12_uv(
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_nv12_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB257_34;
bra.uni $L__BB257_1;
$L__BB257_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_nv12_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB257_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB257_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB257_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB257_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB257_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB257_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB257_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB257_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB257_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB257_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB257_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB257_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB257_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB257_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p_nv12_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB257_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB257_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB257_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB257_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB257_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB257_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB257_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB257_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB257_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB257_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB257_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB257_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB257_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB257_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_nv12_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB257_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB257_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p_nv12_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB257_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB257_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f370;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs1, %rs2};
$L__BB257_34:
ret;
}
// .globl Subsample_Lanczos_p010le_nv12
.visible .entry Subsample_Lanczos_p010le_nv12(
.param .u64 Subsample_Lanczos_p010le_nv12_param_0,
.param .u64 Subsample_Lanczos_p010le_nv12_param_1,
.param .u64 Subsample_Lanczos_p010le_nv12_param_2,
.param .u64 Subsample_Lanczos_p010le_nv12_param_3,
.param .u64 Subsample_Lanczos_p010le_nv12_param_4,
.param .u64 Subsample_Lanczos_p010le_nv12_param_5,
.param .u64 Subsample_Lanczos_p010le_nv12_param_6,
.param .u64 Subsample_Lanczos_p010le_nv12_param_7,
.param .u32 Subsample_Lanczos_p010le_nv12_param_8,
.param .u32 Subsample_Lanczos_p010le_nv12_param_9,
.param .u32 Subsample_Lanczos_p010le_nv12_param_10,
.param .u32 Subsample_Lanczos_p010le_nv12_param_11,
.param .u32 Subsample_Lanczos_p010le_nv12_param_12,
.param .f32 Subsample_Lanczos_p010le_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB258_18;
bra.uni $L__BB258_1;
$L__BB258_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB258_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB258_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB258_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB258_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB258_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB258_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB258_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB258_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB258_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB258_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB258_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB258_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB258_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB258_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB258_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB258_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB258_18:
ret;
}
// .globl Subsample_Lanczos_p010le_nv12_uv
.visible .entry Subsample_Lanczos_p010le_nv12_uv(
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_nv12_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB259_18;
bra.uni $L__BB259_1;
$L__BB259_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_nv12_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB259_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB259_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB259_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB259_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB259_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB259_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB259_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB259_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB259_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB259_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB259_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB259_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_nv12_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB259_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB259_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_nv12_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB259_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB259_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
shr.u16 %rs4, %rs2, 8;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs3, %rs4};
$L__BB259_18:
ret;
}
// .globl Subsample_Lanczos_p016le_nv12
.visible .entry Subsample_Lanczos_p016le_nv12(
.param .u64 Subsample_Lanczos_p016le_nv12_param_0,
.param .u64 Subsample_Lanczos_p016le_nv12_param_1,
.param .u64 Subsample_Lanczos_p016le_nv12_param_2,
.param .u64 Subsample_Lanczos_p016le_nv12_param_3,
.param .u64 Subsample_Lanczos_p016le_nv12_param_4,
.param .u64 Subsample_Lanczos_p016le_nv12_param_5,
.param .u64 Subsample_Lanczos_p016le_nv12_param_6,
.param .u64 Subsample_Lanczos_p016le_nv12_param_7,
.param .u32 Subsample_Lanczos_p016le_nv12_param_8,
.param .u32 Subsample_Lanczos_p016le_nv12_param_9,
.param .u32 Subsample_Lanczos_p016le_nv12_param_10,
.param .u32 Subsample_Lanczos_p016le_nv12_param_11,
.param .u32 Subsample_Lanczos_p016le_nv12_param_12,
.param .f32 Subsample_Lanczos_p016le_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB260_18;
bra.uni $L__BB260_1;
$L__BB260_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB260_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB260_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB260_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB260_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB260_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB260_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB260_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB260_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB260_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB260_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB260_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB260_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB260_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB260_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB260_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB260_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB260_18:
ret;
}
// .globl Subsample_Lanczos_p016le_nv12_uv
.visible .entry Subsample_Lanczos_p016le_nv12_uv(
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_nv12_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB261_18;
bra.uni $L__BB261_1;
$L__BB261_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_nv12_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB261_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB261_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB261_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB261_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB261_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB261_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB261_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB261_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB261_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB261_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB261_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB261_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_nv12_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB261_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB261_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_nv12_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_nv12_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB261_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB261_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
shr.u16 %rs4, %rs2, 8;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u8 [%rd27], {%rs3, %rs4};
$L__BB261_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_nv12
.visible .entry Subsample_Lanczos_yuv444p16le_nv12(
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_nv12_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_nv12_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_nv12_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB262_18;
bra.uni $L__BB262_1;
$L__BB262_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_nv12_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_nv12_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB262_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB262_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB262_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB262_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB262_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB262_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB262_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB262_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB262_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB262_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB262_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB262_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_nv12_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB262_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB262_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_nv12_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_nv12_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB262_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB262_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB262_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_nv12_uv
.visible .entry Subsample_Lanczos_yuv444p16le_nv12_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_nv12_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_nv12_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB263_34;
bra.uni $L__BB263_1;
$L__BB263_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB263_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB263_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB263_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB263_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB263_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB263_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB263_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB263_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB263_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB263_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB263_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB263_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB263_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB263_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB263_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB263_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB263_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB263_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB263_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB263_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB263_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB263_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB263_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB263_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB263_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB263_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB263_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB263_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB263_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB263_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p16le_nv12_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB263_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB263_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f319;
shr.u16 %rs2, %rs1, 8;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f370;
shr.u16 %rs4, %rs3, 8;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 1;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 1;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u8 [%rd44], {%rs2, %rs4};
$L__BB263_34:
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv444p
.visible .entry Subsample_Lanczos_yuv420p_yuv444p(
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB264_18;
bra.uni $L__BB264_1;
$L__BB264_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB264_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB264_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB264_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB264_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB264_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB264_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB264_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB264_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB264_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB264_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB264_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB264_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB264_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB264_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB264_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB264_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB264_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv444p_uv
.visible .entry Subsample_Lanczos_yuv420p_yuv444p_uv(
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv444p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB265_34;
bra.uni $L__BB265_1;
$L__BB265_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB265_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB265_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB265_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB265_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB265_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB265_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB265_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB265_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB265_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB265_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB265_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB265_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB265_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB265_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB265_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB265_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs1;
mov.f32 %f331, %f338;
@%p4 bra $L__BB265_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB265_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB265_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB265_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB265_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB265_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB265_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB265_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB265_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB265_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB265_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB265_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB265_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB265_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv420p_yuv444p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB265_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB265_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f322;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs2;
$L__BB265_34:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv444p
.visible .entry Subsample_Lanczos_nv12_yuv444p(
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv444p_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv444p_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv444p_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv444p_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv444p_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv444p_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB266_18;
bra.uni $L__BB266_1;
$L__BB266_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB266_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB266_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB266_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB266_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB266_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB266_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB266_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB266_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB266_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB266_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB266_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB266_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB266_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB266_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB266_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB266_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB266_18:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv444p_uv
.visible .entry Subsample_Lanczos_nv12_yuv444p_uv(
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv444p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB267_18;
bra.uni $L__BB267_1;
$L__BB267_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB267_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB267_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB267_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB267_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB267_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB267_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB267_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB267_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB267_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB267_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB267_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB267_13:
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv444p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_nv12_yuv444p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB267_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB267_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_nv12_yuv444p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB267_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB267_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs1;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs2;
$L__BB267_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv444p
.visible .entry Subsample_Lanczos_yuv444p_yuv444p(
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB268_18;
bra.uni $L__BB268_1;
$L__BB268_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB268_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB268_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB268_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB268_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB268_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB268_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB268_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB268_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB268_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB268_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB268_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB268_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB268_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB268_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB268_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB268_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs1;
$L__BB268_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv444p_uv
.visible .entry Subsample_Lanczos_yuv444p_yuv444p_uv(
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv444p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB269_34;
bra.uni $L__BB269_1;
$L__BB269_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB269_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB269_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB269_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB269_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB269_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB269_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB269_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB269_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB269_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB269_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB269_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB269_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB269_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB269_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB269_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB269_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs1;
mov.f32 %f331, %f338;
@%p4 bra $L__BB269_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB269_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB269_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB269_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB269_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB269_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB269_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB269_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB269_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB269_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB269_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB269_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB269_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB269_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv444p_yuv444p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB269_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB269_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs2, %f322;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs2;
$L__BB269_34:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv444p
.visible .entry Subsample_Lanczos_p010le_yuv444p(
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv444p_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv444p_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv444p_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv444p_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv444p_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv444p_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB270_18;
bra.uni $L__BB270_1;
$L__BB270_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB270_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB270_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB270_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB270_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB270_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB270_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB270_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB270_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB270_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB270_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB270_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB270_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB270_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB270_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB270_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB270_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB270_18:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv444p_uv
.visible .entry Subsample_Lanczos_p010le_yuv444p_uv(
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv444p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB271_18;
bra.uni $L__BB271_1;
$L__BB271_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB271_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB271_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB271_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB271_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB271_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB271_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB271_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB271_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB271_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB271_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB271_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB271_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv444p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p010le_yuv444p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB271_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB271_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p010le_yuv444p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB271_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB271_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB271_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv444p
.visible .entry Subsample_Lanczos_p016le_yuv444p(
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv444p_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv444p_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv444p_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv444p_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv444p_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv444p_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB272_18;
bra.uni $L__BB272_1;
$L__BB272_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB272_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB272_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB272_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB272_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB272_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB272_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB272_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB272_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB272_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB272_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB272_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB272_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB272_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB272_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB272_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB272_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB272_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv444p_uv
.visible .entry Subsample_Lanczos_p016le_yuv444p_uv(
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv444p_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<27>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB273_18;
bra.uni $L__BB273_1;
$L__BB273_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB273_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB273_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB273_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB273_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB273_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB273_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB273_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB273_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB273_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB273_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB273_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB273_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv444p_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p016le_yuv444p_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB273_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB273_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv444p_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p016le_yuv444p_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB273_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB273_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 8;
mul.wide.s32 %rd22, %r2, %r5;
cvt.s64.s32 %rd23, %r1;
add.s64 %rd24, %rd22, %rd23;
add.s64 %rd25, %rd2, %rd24;
st.global.u8 [%rd25], %rs3;
shr.u16 %rs4, %rs2, 8;
add.s64 %rd26, %rd1, %rd24;
st.global.u8 [%rd26], %rs4;
$L__BB273_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv444p
.visible .entry Subsample_Lanczos_yuv444p16le_yuv444p(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv444p_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<24>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv444p_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv444p_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB274_18;
bra.uni $L__BB274_1;
$L__BB274_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv444p_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv444p_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB274_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB274_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB274_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB274_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB274_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB274_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB274_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB274_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB274_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB274_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB274_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB274_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_yuv444p_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB274_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB274_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv444p_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_yuv444p_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB274_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB274_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd20, %r2, %r5;
cvt.s64.s32 %rd21, %r1;
add.s64 %rd22, %rd20, %rd21;
add.s64 %rd23, %rd1, %rd22;
st.global.u8 [%rd23], %rs2;
$L__BB274_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv444p_uv
.visible .entry Subsample_Lanczos_yuv444p16le_yuv444p_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<44>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB275_34;
bra.uni $L__BB275_1;
$L__BB275_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB275_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB275_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB275_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB275_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB275_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB275_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB275_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB275_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB275_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB275_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB275_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB275_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB275_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB275_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB275_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB275_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f208;
shr.u16 %rs2, %rs1, 8;
mul.wide.s32 %rd24, %r2, %r5;
cvt.s64.s32 %rd25, %r1;
add.s64 %rd3, %rd24, %rd25;
add.s64 %rd26, %rd2, %rd3;
st.global.u8 [%rd26], %rs2;
mov.f32 %f331, %f338;
@%p4 bra $L__BB275_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB275_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB275_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB275_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB275_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB275_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB275_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB275_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB275_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB275_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB275_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB275_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB275_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB275_31:
ld.param.u64 %rd27, [Subsample_Lanczos_yuv444p16le_yuv444p_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB275_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB275_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd27, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd27, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd27, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd27, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd27, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd27, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd27, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd27, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd27, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd27, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd27, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd27, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd27, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd27, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd27, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd27, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f322;
shr.u16 %rs4, %rs3, 8;
add.s64 %rd43, %rd1, %rd3;
st.global.u8 [%rd43], %rs4;
$L__BB275_34:
ret;
}
// .globl Subsample_Lanczos_yuv420p_p010le
.visible .entry Subsample_Lanczos_yuv420p_p010le(
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_0,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_1,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_2,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_3,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_4,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_5,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_6,
.param .u64 Subsample_Lanczos_yuv420p_p010le_param_7,
.param .u32 Subsample_Lanczos_yuv420p_p010le_param_8,
.param .u32 Subsample_Lanczos_yuv420p_p010le_param_9,
.param .u32 Subsample_Lanczos_yuv420p_p010le_param_10,
.param .u32 Subsample_Lanczos_yuv420p_p010le_param_11,
.param .u32 Subsample_Lanczos_yuv420p_p010le_param_12,
.param .f32 Subsample_Lanczos_yuv420p_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB276_18;
bra.uni $L__BB276_1;
$L__BB276_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB276_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB276_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB276_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB276_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB276_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB276_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB276_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB276_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB276_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB276_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB276_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB276_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB276_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB276_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB276_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB276_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB276_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_p010le_uv
.visible .entry Subsample_Lanczos_yuv420p_p010le_uv(
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_p010le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<7>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB277_34;
bra.uni $L__BB277_1;
$L__BB277_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_p010le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB277_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB277_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB277_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB277_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB277_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB277_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB277_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB277_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB277_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB277_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB277_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB277_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB277_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB277_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv420p_p010le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB277_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB277_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB277_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB277_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB277_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB277_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB277_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB277_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB277_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB277_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB277_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB277_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB277_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB277_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_p010le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB277_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB277_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv420p_p010le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB277_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB277_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs4, %f370;
mul.lo.s16 %rs5, %rs4, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs3, %rs6};
$L__BB277_34:
ret;
}
// .globl Subsample_Lanczos_nv12_p010le
.visible .entry Subsample_Lanczos_nv12_p010le(
.param .u64 Subsample_Lanczos_nv12_p010le_param_0,
.param .u64 Subsample_Lanczos_nv12_p010le_param_1,
.param .u64 Subsample_Lanczos_nv12_p010le_param_2,
.param .u64 Subsample_Lanczos_nv12_p010le_param_3,
.param .u64 Subsample_Lanczos_nv12_p010le_param_4,
.param .u64 Subsample_Lanczos_nv12_p010le_param_5,
.param .u64 Subsample_Lanczos_nv12_p010le_param_6,
.param .u64 Subsample_Lanczos_nv12_p010le_param_7,
.param .u32 Subsample_Lanczos_nv12_p010le_param_8,
.param .u32 Subsample_Lanczos_nv12_p010le_param_9,
.param .u32 Subsample_Lanczos_nv12_p010le_param_10,
.param .u32 Subsample_Lanczos_nv12_p010le_param_11,
.param .u32 Subsample_Lanczos_nv12_p010le_param_12,
.param .f32 Subsample_Lanczos_nv12_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB278_18;
bra.uni $L__BB278_1;
$L__BB278_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB278_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB278_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB278_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB278_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB278_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB278_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB278_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB278_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB278_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB278_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB278_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB278_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB278_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB278_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB278_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB278_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB278_18:
ret;
}
// .globl Subsample_Lanczos_nv12_p010le_uv
.visible .entry Subsample_Lanczos_nv12_p010le_uv(
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_p010le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB279_18;
bra.uni $L__BB279_1;
$L__BB279_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_p010le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB279_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB279_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB279_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB279_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB279_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB279_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB279_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB279_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB279_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB279_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB279_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB279_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_p010le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB279_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB279_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_p010le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB279_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB279_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
mul.lo.s16 %rs3, %rs1, 257;
and.b16 %rs4, %rs3, -64;
mul.lo.s16 %rs5, %rs2, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs4, %rs6};
$L__BB279_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_p010le
.visible .entry Subsample_Lanczos_yuv444p_p010le(
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_0,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_1,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_2,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_3,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_4,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_5,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_6,
.param .u64 Subsample_Lanczos_yuv444p_p010le_param_7,
.param .u32 Subsample_Lanczos_yuv444p_p010le_param_8,
.param .u32 Subsample_Lanczos_yuv444p_p010le_param_9,
.param .u32 Subsample_Lanczos_yuv444p_p010le_param_10,
.param .u32 Subsample_Lanczos_yuv444p_p010le_param_11,
.param .u32 Subsample_Lanczos_yuv444p_p010le_param_12,
.param .f32 Subsample_Lanczos_yuv444p_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB280_18;
bra.uni $L__BB280_1;
$L__BB280_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB280_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB280_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB280_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB280_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB280_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB280_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB280_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB280_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB280_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB280_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB280_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB280_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB280_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB280_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB280_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB280_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB280_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_p010le_uv
.visible .entry Subsample_Lanczos_yuv444p_p010le_uv(
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_p010le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<7>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB281_34;
bra.uni $L__BB281_1;
$L__BB281_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_p010le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB281_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB281_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB281_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB281_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB281_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB281_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB281_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB281_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB281_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB281_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB281_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB281_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB281_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB281_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p_p010le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB281_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB281_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB281_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB281_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB281_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB281_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB281_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB281_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB281_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB281_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB281_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB281_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB281_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB281_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_p010le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB281_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB281_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p_p010le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB281_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB281_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
mul.lo.s16 %rs2, %rs1, 257;
and.b16 %rs3, %rs2, -64;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs4, %f370;
mul.lo.s16 %rs5, %rs4, 257;
and.b16 %rs6, %rs5, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs3, %rs6};
$L__BB281_34:
ret;
}
// .globl Subsample_Lanczos_p010le_p010le
.visible .entry Subsample_Lanczos_p010le_p010le(
.param .u64 Subsample_Lanczos_p010le_p010le_param_0,
.param .u64 Subsample_Lanczos_p010le_p010le_param_1,
.param .u64 Subsample_Lanczos_p010le_p010le_param_2,
.param .u64 Subsample_Lanczos_p010le_p010le_param_3,
.param .u64 Subsample_Lanczos_p010le_p010le_param_4,
.param .u64 Subsample_Lanczos_p010le_p010le_param_5,
.param .u64 Subsample_Lanczos_p010le_p010le_param_6,
.param .u64 Subsample_Lanczos_p010le_p010le_param_7,
.param .u32 Subsample_Lanczos_p010le_p010le_param_8,
.param .u32 Subsample_Lanczos_p010le_p010le_param_9,
.param .u32 Subsample_Lanczos_p010le_p010le_param_10,
.param .u32 Subsample_Lanczos_p010le_p010le_param_11,
.param .u32 Subsample_Lanczos_p010le_p010le_param_12,
.param .f32 Subsample_Lanczos_p010le_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB282_18;
bra.uni $L__BB282_1;
$L__BB282_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB282_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB282_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB282_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB282_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB282_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB282_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB282_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB282_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB282_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB282_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB282_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB282_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB282_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB282_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB282_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB282_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB282_18:
ret;
}
// .globl Subsample_Lanczos_p010le_p010le_uv
.visible .entry Subsample_Lanczos_p010le_p010le_uv(
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_p010le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB283_18;
bra.uni $L__BB283_1;
$L__BB283_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_p010le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB283_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB283_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB283_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB283_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB283_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB283_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB283_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB283_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB283_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB283_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB283_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB283_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_p010le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB283_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB283_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_p010le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB283_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB283_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs1, %rs2};
$L__BB283_18:
ret;
}
// .globl Subsample_Lanczos_p016le_p010le
.visible .entry Subsample_Lanczos_p016le_p010le(
.param .u64 Subsample_Lanczos_p016le_p010le_param_0,
.param .u64 Subsample_Lanczos_p016le_p010le_param_1,
.param .u64 Subsample_Lanczos_p016le_p010le_param_2,
.param .u64 Subsample_Lanczos_p016le_p010le_param_3,
.param .u64 Subsample_Lanczos_p016le_p010le_param_4,
.param .u64 Subsample_Lanczos_p016le_p010le_param_5,
.param .u64 Subsample_Lanczos_p016le_p010le_param_6,
.param .u64 Subsample_Lanczos_p016le_p010le_param_7,
.param .u32 Subsample_Lanczos_p016le_p010le_param_8,
.param .u32 Subsample_Lanczos_p016le_p010le_param_9,
.param .u32 Subsample_Lanczos_p016le_p010le_param_10,
.param .u32 Subsample_Lanczos_p016le_p010le_param_11,
.param .u32 Subsample_Lanczos_p016le_p010le_param_12,
.param .f32 Subsample_Lanczos_p016le_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB284_18;
bra.uni $L__BB284_1;
$L__BB284_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB284_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB284_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB284_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB284_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB284_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB284_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB284_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB284_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB284_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB284_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB284_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB284_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB284_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB284_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB284_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB284_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB284_18:
ret;
}
// .globl Subsample_Lanczos_p016le_p010le_uv
.visible .entry Subsample_Lanczos_p016le_p010le_uv(
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_p010le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB285_18;
bra.uni $L__BB285_1;
$L__BB285_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_p010le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB285_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB285_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB285_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB285_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB285_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB285_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB285_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB285_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB285_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB285_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB285_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB285_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_p010le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB285_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB285_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_p010le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_p010le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB285_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB285_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
and.b16 %rs3, %rs1, -64;
and.b16 %rs4, %rs2, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs3, %rs4};
$L__BB285_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_p010le
.visible .entry Subsample_Lanczos_yuv444p16le_p010le(
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_p010le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_p010le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_p010le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB286_18;
bra.uni $L__BB286_1;
$L__BB286_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_p010le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_p010le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB286_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB286_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB286_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB286_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB286_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB286_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB286_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB286_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB286_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB286_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB286_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB286_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_p010le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB286_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB286_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_p010le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_p010le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB286_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB286_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
and.b16 %rs2, %rs1, -64;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB286_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_p010le_uv
.visible .entry Subsample_Lanczos_yuv444p16le_p010le_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_p010le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_p010le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB287_34;
bra.uni $L__BB287_1;
$L__BB287_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB287_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB287_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB287_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB287_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB287_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB287_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB287_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB287_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB287_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB287_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB287_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB287_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB287_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB287_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB287_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB287_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB287_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB287_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB287_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB287_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB287_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB287_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB287_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB287_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB287_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB287_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB287_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB287_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB287_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB287_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p16le_p010le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB287_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB287_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f319;
and.b16 %rs2, %rs1, -64;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f477FFF00;
cvt.rzi.u16.f32 %rs3, %f370;
and.b16 %rs4, %rs3, -64;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB287_34:
ret;
}
// .globl Subsample_Lanczos_yuv420p_p016le
.visible .entry Subsample_Lanczos_yuv420p_p016le(
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_0,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_1,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_2,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_3,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_4,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_5,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_6,
.param .u64 Subsample_Lanczos_yuv420p_p016le_param_7,
.param .u32 Subsample_Lanczos_yuv420p_p016le_param_8,
.param .u32 Subsample_Lanczos_yuv420p_p016le_param_9,
.param .u32 Subsample_Lanczos_yuv420p_p016le_param_10,
.param .u32 Subsample_Lanczos_yuv420p_p016le_param_11,
.param .u32 Subsample_Lanczos_yuv420p_p016le_param_12,
.param .f32 Subsample_Lanczos_yuv420p_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB288_18;
bra.uni $L__BB288_1;
$L__BB288_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB288_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB288_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB288_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB288_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB288_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB288_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB288_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB288_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB288_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB288_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB288_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB288_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB288_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB288_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB288_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB288_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB288_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_p016le_uv
.visible .entry Subsample_Lanczos_yuv420p_p016le_uv(
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_p016le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB289_34;
bra.uni $L__BB289_1;
$L__BB289_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_p016le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB289_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB289_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB289_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB289_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB289_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB289_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB289_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB289_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB289_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB289_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB289_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB289_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB289_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB289_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv420p_p016le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB289_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB289_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB289_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB289_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB289_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB289_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB289_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB289_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB289_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB289_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB289_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB289_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB289_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB289_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_p016le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB289_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB289_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv420p_p016le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB289_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB289_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
mul.lo.s16 %rs2, %rs1, 257;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f370;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB289_34:
ret;
}
// .globl Subsample_Lanczos_nv12_p016le
.visible .entry Subsample_Lanczos_nv12_p016le(
.param .u64 Subsample_Lanczos_nv12_p016le_param_0,
.param .u64 Subsample_Lanczos_nv12_p016le_param_1,
.param .u64 Subsample_Lanczos_nv12_p016le_param_2,
.param .u64 Subsample_Lanczos_nv12_p016le_param_3,
.param .u64 Subsample_Lanczos_nv12_p016le_param_4,
.param .u64 Subsample_Lanczos_nv12_p016le_param_5,
.param .u64 Subsample_Lanczos_nv12_p016le_param_6,
.param .u64 Subsample_Lanczos_nv12_p016le_param_7,
.param .u32 Subsample_Lanczos_nv12_p016le_param_8,
.param .u32 Subsample_Lanczos_nv12_p016le_param_9,
.param .u32 Subsample_Lanczos_nv12_p016le_param_10,
.param .u32 Subsample_Lanczos_nv12_p016le_param_11,
.param .u32 Subsample_Lanczos_nv12_p016le_param_12,
.param .f32 Subsample_Lanczos_nv12_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB290_18;
bra.uni $L__BB290_1;
$L__BB290_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB290_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB290_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB290_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB290_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB290_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB290_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB290_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB290_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB290_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB290_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB290_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB290_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB290_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB290_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB290_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB290_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB290_18:
ret;
}
// .globl Subsample_Lanczos_nv12_p016le_uv
.visible .entry Subsample_Lanczos_nv12_p016le_uv(
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_p016le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB291_18;
bra.uni $L__BB291_1;
$L__BB291_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_p016le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB291_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB291_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB291_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB291_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB291_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB291_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB291_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB291_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB291_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB291_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB291_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB291_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_p016le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB291_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB291_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_p016le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB291_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB291_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
mul.lo.s16 %rs3, %rs1, 257;
mul.lo.s16 %rs4, %rs2, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs3, %rs4};
$L__BB291_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_p016le
.visible .entry Subsample_Lanczos_yuv444p_p016le(
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_0,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_1,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_2,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_3,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_4,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_5,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_6,
.param .u64 Subsample_Lanczos_yuv444p_p016le_param_7,
.param .u32 Subsample_Lanczos_yuv444p_p016le_param_8,
.param .u32 Subsample_Lanczos_yuv444p_p016le_param_9,
.param .u32 Subsample_Lanczos_yuv444p_p016le_param_10,
.param .u32 Subsample_Lanczos_yuv444p_p016le_param_11,
.param .u32 Subsample_Lanczos_yuv444p_p016le_param_12,
.param .f32 Subsample_Lanczos_yuv444p_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB292_18;
bra.uni $L__BB292_1;
$L__BB292_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB292_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB292_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB292_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB292_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB292_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB292_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB292_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB292_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB292_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB292_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB292_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB292_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB292_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB292_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB292_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB292_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB292_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_p016le_uv
.visible .entry Subsample_Lanczos_yuv444p_p016le_uv(
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_p016le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB293_34;
bra.uni $L__BB293_1;
$L__BB293_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_p016le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB293_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB293_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB293_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB293_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB293_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB293_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB293_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB293_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB293_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB293_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB293_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB293_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB293_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB293_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p_p016le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB293_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB293_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB293_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB293_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB293_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB293_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB293_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB293_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB293_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB293_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB293_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB293_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB293_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB293_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_p016le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB293_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB293_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p_p016le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB293_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB293_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f319;
mul.lo.s16 %rs2, %rs1, 257;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f370;
mul.lo.s16 %rs4, %rs3, 257;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs2, %rs4};
$L__BB293_34:
ret;
}
// .globl Subsample_Lanczos_p010le_p016le
.visible .entry Subsample_Lanczos_p010le_p016le(
.param .u64 Subsample_Lanczos_p010le_p016le_param_0,
.param .u64 Subsample_Lanczos_p010le_p016le_param_1,
.param .u64 Subsample_Lanczos_p010le_p016le_param_2,
.param .u64 Subsample_Lanczos_p010le_p016le_param_3,
.param .u64 Subsample_Lanczos_p010le_p016le_param_4,
.param .u64 Subsample_Lanczos_p010le_p016le_param_5,
.param .u64 Subsample_Lanczos_p010le_p016le_param_6,
.param .u64 Subsample_Lanczos_p010le_p016le_param_7,
.param .u32 Subsample_Lanczos_p010le_p016le_param_8,
.param .u32 Subsample_Lanczos_p010le_p016le_param_9,
.param .u32 Subsample_Lanczos_p010le_p016le_param_10,
.param .u32 Subsample_Lanczos_p010le_p016le_param_11,
.param .u32 Subsample_Lanczos_p010le_p016le_param_12,
.param .f32 Subsample_Lanczos_p010le_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB294_18;
bra.uni $L__BB294_1;
$L__BB294_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB294_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB294_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB294_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB294_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB294_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB294_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB294_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB294_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB294_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB294_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB294_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB294_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB294_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB294_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB294_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB294_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB294_18:
ret;
}
// .globl Subsample_Lanczos_p010le_p016le_uv
.visible .entry Subsample_Lanczos_p010le_p016le_uv(
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_p016le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB295_18;
bra.uni $L__BB295_1;
$L__BB295_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_p016le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB295_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB295_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB295_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB295_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB295_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB295_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB295_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB295_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB295_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB295_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB295_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB295_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_p016le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB295_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB295_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_p016le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB295_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB295_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs4, %rs6};
$L__BB295_18:
ret;
}
// .globl Subsample_Lanczos_p016le_p016le
.visible .entry Subsample_Lanczos_p016le_p016le(
.param .u64 Subsample_Lanczos_p016le_p016le_param_0,
.param .u64 Subsample_Lanczos_p016le_p016le_param_1,
.param .u64 Subsample_Lanczos_p016le_p016le_param_2,
.param .u64 Subsample_Lanczos_p016le_p016le_param_3,
.param .u64 Subsample_Lanczos_p016le_p016le_param_4,
.param .u64 Subsample_Lanczos_p016le_p016le_param_5,
.param .u64 Subsample_Lanczos_p016le_p016le_param_6,
.param .u64 Subsample_Lanczos_p016le_p016le_param_7,
.param .u32 Subsample_Lanczos_p016le_p016le_param_8,
.param .u32 Subsample_Lanczos_p016le_p016le_param_9,
.param .u32 Subsample_Lanczos_p016le_p016le_param_10,
.param .u32 Subsample_Lanczos_p016le_p016le_param_11,
.param .u32 Subsample_Lanczos_p016le_p016le_param_12,
.param .f32 Subsample_Lanczos_p016le_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB296_18;
bra.uni $L__BB296_1;
$L__BB296_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB296_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB296_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB296_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB296_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB296_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB296_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB296_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB296_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB296_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB296_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB296_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB296_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB296_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB296_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB296_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB296_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB296_18:
ret;
}
// .globl Subsample_Lanczos_p016le_p016le_uv
.visible .entry Subsample_Lanczos_p016le_p016le_uv(
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_p016le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB297_18;
bra.uni $L__BB297_1;
$L__BB297_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_p016le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB297_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB297_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB297_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB297_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB297_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB297_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB297_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB297_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB297_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB297_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB297_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB297_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_p016le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB297_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB297_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_p016le_uv_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_p016le_uv_param_1];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB297_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB297_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 2;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 2;
add.s64 %rd27, %rd1, %rd26;
st.global.v2.u16 [%rd27], {%rs1, %rs2};
$L__BB297_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_p016le
.visible .entry Subsample_Lanczos_yuv444p16le_p016le(
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_p016le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_p016le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_p016le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB298_18;
bra.uni $L__BB298_1;
$L__BB298_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_p016le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_p016le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB298_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB298_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB298_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB298_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB298_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB298_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB298_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB298_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB298_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB298_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB298_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB298_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_p016le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB298_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB298_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_p016le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_p016le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB298_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB298_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB298_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_p016le_uv
.visible .entry Subsample_Lanczos_yuv444p16le_p016le_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_p016le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_p016le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<387>;
.reg .b64 %rd<45>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB299_34;
bra.uni $L__BB299_1;
$L__BB299_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_11];
cvt.rn.f32.s32 %f131, %r6;
cvt.rn.f32.s32 %f132, %r3;
div.rn.f32 %f133, %f131, %f132;
cvt.rn.f32.s32 %f134, %r7;
cvt.rn.f32.s32 %f135, %r4;
div.rn.f32 %f136, %f134, %f135;
cvt.rn.f32.s32 %f137, %r1;
add.f32 %f138, %f137, 0f3F000000;
fma.rn.f32 %f139, %f133, %f138, 0fBF000000;
cvt.rn.f32.s32 %f140, %r2;
add.f32 %f141, %f140, 0f3F000000;
cvt.rmi.f32.f32 %f255, %f139;
sub.f32 %f143, %f139, %f255;
add.f32 %f144, %f143, 0f3F800000;
mul.f32 %f4, %f144, 0f40490FDB;
mul.f32 %f5, %f143, 0f40490FDB;
add.f32 %f145, %f143, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f147, %f4, %f4;
mul.f32 %f9, %f147, 0f3F000000;
mov.f32 %f386, 0f3F800000;
mov.f32 %f371, %f386;
@%p4 bra $L__BB299_3;
sin.approx.f32 %f148, %f4;
sin.approx.f32 %f149, %f8;
mul.f32 %f150, %f148, %f149;
div.rn.f32 %f371, %f150, %f9;
$L__BB299_3:
fma.rn.f32 %f142, %f136, %f141, 0fBF000000;
add.f32 %f146, %f143, 0fC0000000;
mul.f32 %f6, %f145, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f152, %f5, %f5;
mul.f32 %f13, %f152, 0f3F000000;
mov.f32 %f372, %f386;
@%p5 bra $L__BB299_5;
sin.approx.f32 %f153, %f5;
sin.approx.f32 %f154, %f12;
mul.f32 %f155, %f153, %f154;
div.rn.f32 %f372, %f155, %f13;
$L__BB299_5:
cvt.rmi.f32.f32 %f262, %f142;
mul.f32 %f7, %f146, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f157, %f6, %f6;
mul.f32 %f17, %f157, 0f3F000000;
mov.f32 %f373, %f386;
@%p6 bra $L__BB299_7;
sin.approx.f32 %f158, %f6;
sin.approx.f32 %f159, %f16;
mul.f32 %f160, %f158, %f159;
div.rn.f32 %f373, %f160, %f17;
$L__BB299_7:
sub.f32 %f3, %f142, %f262;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f162, %f7, %f7;
mul.f32 %f21, %f162, 0f3F000000;
mov.f32 %f374, %f386;
@%p7 bra $L__BB299_9;
sin.approx.f32 %f163, %f7;
sin.approx.f32 %f164, %f20;
mul.f32 %f165, %f163, %f164;
div.rn.f32 %f374, %f165, %f21;
$L__BB299_9:
add.f32 %f167, %f3, 0f3F800000;
mul.f32 %f24, %f167, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f168, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f170, %f24, %f24;
mul.f32 %f29, %f170, 0f3F000000;
mov.f32 %f375, %f386;
@%p8 bra $L__BB299_11;
sin.approx.f32 %f171, %f24;
sin.approx.f32 %f172, %f28;
mul.f32 %f173, %f171, %f172;
div.rn.f32 %f375, %f173, %f29;
$L__BB299_11:
add.f32 %f169, %f3, 0fC0000000;
mul.f32 %f26, %f168, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f175, %f25, %f25;
mul.f32 %f33, %f175, 0f3F000000;
mov.f32 %f376, %f386;
@%p9 bra $L__BB299_13;
sin.approx.f32 %f176, %f25;
sin.approx.f32 %f177, %f32;
mul.f32 %f178, %f176, %f177;
div.rn.f32 %f376, %f178, %f33;
$L__BB299_13:
mul.f32 %f27, %f169, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f180, %f26, %f26;
mul.f32 %f37, %f180, 0f3F000000;
mov.f32 %f377, %f386;
@%p10 bra $L__BB299_15;
sin.approx.f32 %f181, %f26;
sin.approx.f32 %f182, %f36;
mul.f32 %f183, %f181, %f182;
div.rn.f32 %f377, %f183, %f37;
$L__BB299_15:
ld.param.u64 %rd5, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_1];
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f185, %f27, %f27;
mul.f32 %f41, %f185, 0f3F000000;
mov.f32 %f378, %f386;
@%p11 bra $L__BB299_17;
sin.approx.f32 %f186, %f27;
sin.approx.f32 %f187, %f40;
mul.f32 %f188, %f186, %f187;
div.rn.f32 %f378, %f188, %f41;
$L__BB299_17:
add.f32 %f253, %f255, 0fBF800000;
add.f32 %f254, %f262, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd5, {%f253, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd5, {%f255, %f254}];
// end inline asm
add.f32 %f257, %f255, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd5, {%f257, %f254}];
// end inline asm
add.f32 %f259, %f255, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd5, {%f259, %f254}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd5, {%f253, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd5, {%f255, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd5, {%f257, %f262}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd5, {%f259, %f262}];
// end inline asm
add.f32 %f270, %f262, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd5, {%f253, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd5, {%f255, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd5, {%f257, %f270}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd5, {%f259, %f270}];
// end inline asm
add.f32 %f278, %f262, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd5, {%f253, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd5, {%f255, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd5, {%f257, %f278}];
// end inline asm
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd5, {%f259, %f278}];
// end inline asm
mov.f32 %f379, %f386;
@%p4 bra $L__BB299_19;
sin.approx.f32 %f222, %f4;
sin.approx.f32 %f223, %f8;
mul.f32 %f224, %f222, %f223;
div.rn.f32 %f379, %f224, %f9;
$L__BB299_19:
mov.f32 %f380, %f386;
@%p5 bra $L__BB299_21;
sin.approx.f32 %f226, %f5;
sin.approx.f32 %f227, %f12;
mul.f32 %f228, %f226, %f227;
div.rn.f32 %f380, %f228, %f13;
$L__BB299_21:
mov.f32 %f381, %f386;
@%p6 bra $L__BB299_23;
sin.approx.f32 %f230, %f6;
sin.approx.f32 %f231, %f16;
mul.f32 %f232, %f230, %f231;
div.rn.f32 %f381, %f232, %f17;
$L__BB299_23:
mov.f32 %f382, %f386;
@%p7 bra $L__BB299_25;
sin.approx.f32 %f234, %f7;
sin.approx.f32 %f235, %f20;
mul.f32 %f236, %f234, %f235;
div.rn.f32 %f382, %f236, %f21;
$L__BB299_25:
mov.f32 %f383, %f386;
@%p8 bra $L__BB299_27;
sin.approx.f32 %f238, %f24;
sin.approx.f32 %f239, %f28;
mul.f32 %f240, %f238, %f239;
div.rn.f32 %f383, %f240, %f29;
$L__BB299_27:
mov.f32 %f384, %f386;
@%p9 bra $L__BB299_29;
sin.approx.f32 %f242, %f25;
sin.approx.f32 %f243, %f32;
mul.f32 %f244, %f242, %f243;
div.rn.f32 %f384, %f244, %f33;
$L__BB299_29:
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_5];
mov.f32 %f385, %f386;
@%p10 bra $L__BB299_31;
sin.approx.f32 %f246, %f26;
sin.approx.f32 %f247, %f36;
mul.f32 %f248, %f246, %f247;
div.rn.f32 %f385, %f248, %f37;
$L__BB299_31:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_10];
ld.param.u64 %rd21, [Subsample_Lanczos_yuv444p16le_p016le_uv_param_2];
cvta.to.global.u64 %rd1, %rd4;
mov.b32 %f46, %r17;
mov.b32 %f50, %r21;
mov.b32 %f55, %r25;
mov.b32 %f60, %r29;
mov.b32 %f64, %r33;
mov.b32 %f68, %r37;
mov.b32 %f72, %r41;
mov.b32 %f76, %r45;
mov.b32 %f81, %r49;
mov.b32 %f85, %r53;
mov.b32 %f89, %r57;
mov.b32 %f93, %r61;
mov.b32 %f98, %r65;
mov.b32 %f102, %r69;
mov.b32 %f106, %r73;
mov.b32 %f110, %r77;
@%p11 bra $L__BB299_33;
sin.approx.f32 %f250, %f27;
sin.approx.f32 %f251, %f40;
mul.f32 %f252, %f250, %f251;
div.rn.f32 %f386, %f252, %f41;
$L__BB299_33:
add.f32 %f285, %f375, %f376;
add.f32 %f286, %f285, %f377;
add.f32 %f287, %f286, %f378;
div.rn.f32 %f288, %f375, %f287;
add.f32 %f289, %f371, %f372;
add.f32 %f290, %f289, %f373;
add.f32 %f291, %f290, %f374;
div.rn.f32 %f292, %f371, %f291;
div.rn.f32 %f293, %f372, %f291;
mul.f32 %f294, %f293, %f50;
fma.rn.f32 %f295, %f292, %f46, %f294;
div.rn.f32 %f296, %f373, %f291;
fma.rn.f32 %f297, %f296, %f55, %f295;
div.rn.f32 %f298, %f374, %f291;
fma.rn.f32 %f299, %f298, %f60, %f297;
div.rn.f32 %f300, %f376, %f287;
mul.f32 %f301, %f293, %f68;
fma.rn.f32 %f302, %f292, %f64, %f301;
fma.rn.f32 %f303, %f296, %f72, %f302;
fma.rn.f32 %f304, %f298, %f76, %f303;
mul.f32 %f305, %f300, %f304;
fma.rn.f32 %f306, %f288, %f299, %f305;
div.rn.f32 %f307, %f377, %f287;
mul.f32 %f308, %f293, %f85;
fma.rn.f32 %f309, %f292, %f81, %f308;
fma.rn.f32 %f310, %f296, %f89, %f309;
fma.rn.f32 %f311, %f298, %f93, %f310;
fma.rn.f32 %f312, %f307, %f311, %f306;
div.rn.f32 %f313, %f378, %f287;
mul.f32 %f314, %f293, %f102;
fma.rn.f32 %f315, %f292, %f98, %f314;
fma.rn.f32 %f316, %f296, %f106, %f315;
fma.rn.f32 %f317, %f298, %f110, %f316;
fma.rn.f32 %f318, %f313, %f317, %f312;
mul.f32 %f319, %f318, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f319;
add.f32 %f320, %f379, %f380;
add.f32 %f321, %f320, %f381;
add.f32 %f322, %f321, %f382;
div.rn.f32 %f323, %f382, %f322;
div.rn.f32 %f324, %f381, %f322;
div.rn.f32 %f325, %f380, %f322;
div.rn.f32 %f326, %f379, %f322;
add.f32 %f327, %f383, %f384;
add.f32 %f328, %f327, %f385;
add.f32 %f329, %f328, %f386;
div.rn.f32 %f330, %f383, %f329;
div.rn.f32 %f331, %f384, %f329;
div.rn.f32 %f332, %f385, %f329;
div.rn.f32 %f333, %f386, %f329;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd21, {%f253, %f254}];
// end inline asm
mov.b32 %f334, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd21, {%f255, %f254}];
// end inline asm
mov.b32 %f335, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd21, {%f257, %f254}];
// end inline asm
mov.b32 %f336, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd21, {%f259, %f254}];
// end inline asm
mov.b32 %f337, %r93;
mul.f32 %f338, %f325, %f335;
fma.rn.f32 %f339, %f326, %f334, %f338;
fma.rn.f32 %f340, %f324, %f336, %f339;
fma.rn.f32 %f341, %f323, %f337, %f340;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd21, {%f253, %f262}];
// end inline asm
mov.b32 %f342, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd21, {%f255, %f262}];
// end inline asm
mov.b32 %f343, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd21, {%f257, %f262}];
// end inline asm
mov.b32 %f344, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd21, {%f259, %f262}];
// end inline asm
mov.b32 %f345, %r109;
mul.f32 %f346, %f325, %f343;
fma.rn.f32 %f347, %f326, %f342, %f346;
fma.rn.f32 %f348, %f324, %f344, %f347;
fma.rn.f32 %f349, %f323, %f345, %f348;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd21, {%f253, %f270}];
// end inline asm
mov.b32 %f350, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd21, {%f255, %f270}];
// end inline asm
mov.b32 %f351, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd21, {%f257, %f270}];
// end inline asm
mov.b32 %f352, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd21, {%f259, %f270}];
// end inline asm
mov.b32 %f353, %r125;
mul.f32 %f354, %f325, %f351;
fma.rn.f32 %f355, %f326, %f350, %f354;
fma.rn.f32 %f356, %f324, %f352, %f355;
fma.rn.f32 %f357, %f323, %f353, %f356;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd21, {%f253, %f278}];
// end inline asm
mov.b32 %f358, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd21, {%f255, %f278}];
// end inline asm
mov.b32 %f359, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd21, {%f257, %f278}];
// end inline asm
mov.b32 %f360, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd21, {%f259, %f278}];
// end inline asm
mov.b32 %f361, %r141;
mul.f32 %f362, %f325, %f359;
fma.rn.f32 %f363, %f326, %f358, %f362;
fma.rn.f32 %f364, %f324, %f360, %f363;
fma.rn.f32 %f365, %f323, %f361, %f364;
mul.f32 %f366, %f331, %f349;
fma.rn.f32 %f367, %f330, %f341, %f366;
fma.rn.f32 %f368, %f332, %f357, %f367;
fma.rn.f32 %f369, %f333, %f365, %f368;
mul.f32 %f370, %f369, 0f477FFF00;
cvt.rzi.u16.f32 %rs2, %f370;
cvt.s64.s32 %rd37, %r2;
cvt.s64.s32 %rd38, %r5;
shr.u64 %rd39, %rd38, 2;
mul.lo.s64 %rd40, %rd39, %rd37;
cvt.s64.s32 %rd41, %r1;
add.s64 %rd42, %rd40, %rd41;
shl.b64 %rd43, %rd42, 2;
add.s64 %rd44, %rd1, %rd43;
st.global.v2.u16 [%rd44], {%rs1, %rs2};
$L__BB299_34:
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv444p16le
.visible .entry Subsample_Lanczos_yuv420p_yuv444p16le(
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB300_18;
bra.uni $L__BB300_1;
$L__BB300_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB300_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB300_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB300_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB300_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB300_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB300_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB300_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB300_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB300_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB300_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB300_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB300_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv420p_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB300_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB300_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv420p_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB300_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB300_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB300_18:
ret;
}
// .globl Subsample_Lanczos_yuv420p_yuv444p16le_uv
.visible .entry Subsample_Lanczos_yuv420p_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<49>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB301_34;
bra.uni $L__BB301_1;
$L__BB301_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB301_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB301_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB301_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB301_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB301_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB301_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB301_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB301_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB301_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB301_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB301_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB301_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB301_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB301_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB301_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB301_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd24, %r2;
cvt.s64.s32 %rd25, %r5;
shr.u64 %rd26, %rd25, 1;
mul.lo.s64 %rd27, %rd26, %rd24;
cvt.s64.s32 %rd28, %r1;
add.s64 %rd3, %rd27, %rd28;
shl.b64 %rd29, %rd3, 1;
add.s64 %rd30, %rd2, %rd29;
st.global.u16 [%rd30], %rs2;
mov.f32 %f331, %f338;
@%p4 bra $L__BB301_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB301_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB301_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB301_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB301_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB301_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB301_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB301_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB301_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB301_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB301_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB301_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB301_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB301_31:
ld.param.u64 %rd31, [Subsample_Lanczos_yuv420p_yuv444p16le_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB301_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB301_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd31, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd31, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd31, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd31, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd31, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd31, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd31, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd31, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd31, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd31, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd31, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd31, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd31, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd31, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd31, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd31, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f322;
mul.lo.s16 %rs4, %rs3, 257;
add.s64 %rd48, %rd1, %rd29;
st.global.u16 [%rd48], %rs4;
$L__BB301_34:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv444p16le
.visible .entry Subsample_Lanczos_nv12_yuv444p16le(
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB302_18;
bra.uni $L__BB302_1;
$L__BB302_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB302_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB302_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB302_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB302_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB302_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB302_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB302_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB302_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB302_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB302_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB302_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB302_13:
ld.param.u64 %rd3, [Subsample_Lanczos_nv12_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB302_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB302_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB302_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB302_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB302_18:
ret;
}
// .globl Subsample_Lanczos_nv12_yuv444p16le_uv
.visible .entry Subsample_Lanczos_nv12_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_nv12_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_nv12_yuv444p16le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<5>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB303_18;
bra.uni $L__BB303_1;
$L__BB303_1:
ld.param.u32 %r7, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB303_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB303_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB303_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB303_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB303_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB303_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB303_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB303_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB303_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB303_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB303_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB303_13:
ld.param.u64 %rd4, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB303_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB303_15:
ld.param.u32 %r5, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_nv12_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB303_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB303_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f437F0000;
mul.f32 %f222, %f220, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
mul.lo.s16 %rs3, %rs1, 257;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs3;
mul.lo.s16 %rs4, %rs2, 257;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs4;
$L__BB303_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv444p16le
.visible .entry Subsample_Lanczos_yuv444p_yuv444p16le(
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB304_18;
bra.uni $L__BB304_1;
$L__BB304_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB304_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB304_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB304_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB304_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB304_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB304_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB304_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB304_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB304_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB304_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB304_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB304_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB304_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB304_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB304_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB304_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f185;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs2;
$L__BB304_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p_yuv444p16le_uv
.visible .entry Subsample_Lanczos_yuv444p_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<5>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<49>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB305_34;
bra.uni $L__BB305_1;
$L__BB305_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB305_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB305_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB305_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB305_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB305_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB305_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB305_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB305_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB305_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB305_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB305_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB305_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB305_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB305_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB305_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB305_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f208;
mul.lo.s16 %rs2, %rs1, 257;
cvt.s64.s32 %rd24, %r2;
cvt.s64.s32 %rd25, %r5;
shr.u64 %rd26, %rd25, 1;
mul.lo.s64 %rd27, %rd26, %rd24;
cvt.s64.s32 %rd28, %r1;
add.s64 %rd3, %rd27, %rd28;
shl.b64 %rd29, %rd3, 1;
add.s64 %rd30, %rd2, %rd29;
st.global.u16 [%rd30], %rs2;
mov.f32 %f331, %f338;
@%p4 bra $L__BB305_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB305_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB305_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB305_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB305_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB305_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB305_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB305_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB305_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB305_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB305_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB305_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB305_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB305_31:
ld.param.u64 %rd31, [Subsample_Lanczos_yuv444p_yuv444p16le_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB305_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB305_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd31, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd31, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd31, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd31, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd31, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd31, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd31, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd31, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd31, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd31, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd31, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd31, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd31, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd31, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd31, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd31, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f437F0000;
cvt.rzi.u16.f32 %rs3, %f322;
mul.lo.s16 %rs4, %rs3, 257;
add.s64 %rd48, %rd1, %rd29;
st.global.u16 [%rd48], %rs4;
$L__BB305_34:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv444p16le
.visible .entry Subsample_Lanczos_p010le_yuv444p16le(
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<4>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB306_18;
bra.uni $L__BB306_1;
$L__BB306_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB306_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB306_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB306_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB306_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB306_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB306_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB306_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB306_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB306_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB306_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB306_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB306_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p010le_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB306_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB306_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB306_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB306_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
shr.u16 %rs2, %rs1, 10;
or.b16 %rs3, %rs2, %rs1;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs3;
$L__BB306_18:
ret;
}
// .globl Subsample_Lanczos_p010le_yuv444p16le_uv
.visible .entry Subsample_Lanczos_p010le_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_p010le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_p010le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<7>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB307_18;
bra.uni $L__BB307_1;
$L__BB307_1:
ld.param.u32 %r7, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB307_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB307_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB307_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB307_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB307_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB307_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB307_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB307_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB307_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB307_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB307_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB307_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB307_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB307_15:
ld.param.u32 %r5, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p010le_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB307_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB307_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
shr.u16 %rs3, %rs1, 10;
or.b16 %rs4, %rs3, %rs1;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs4;
shr.u16 %rs5, %rs2, 10;
or.b16 %rs6, %rs5, %rs2;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs6;
$L__BB307_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv444p16le
.visible .entry Subsample_Lanczos_p016le_yuv444p16le(
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB308_18;
bra.uni $L__BB308_1;
$L__BB308_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB308_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB308_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB308_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB308_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB308_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB308_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB308_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB308_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB308_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB308_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB308_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB308_13:
ld.param.u64 %rd3, [Subsample_Lanczos_p016le_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB308_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB308_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB308_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB308_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB308_18:
ret;
}
// .globl Subsample_Lanczos_p016le_yuv444p16le_uv
.visible .entry Subsample_Lanczos_p016le_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_p016le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_p016le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<3>;
.reg .b32 %r<81>;
.reg .f32 %f<231>;
.reg .b64 %rd<31>;
ld.param.u32 %r4, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB309_18;
bra.uni $L__BB309_1;
$L__BB309_1:
ld.param.u32 %r7, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f230, 0f3F800000;
mov.f32 %f223, %f230;
@%p4 bra $L__BB309_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f223, %f64, %f9;
$L__BB309_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f224, %f230;
@%p5 bra $L__BB309_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f224, %f69, %f13;
$L__BB309_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f225, %f230;
@%p6 bra $L__BB309_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f225, %f74, %f17;
$L__BB309_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f226, %f230;
@%p7 bra $L__BB309_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f226, %f79, %f21;
$L__BB309_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f227, %f230;
@%p8 bra $L__BB309_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f227, %f87, %f29;
$L__BB309_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f228, %f230;
@%p9 bra $L__BB309_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f228, %f92, %f33;
$L__BB309_13:
ld.param.u64 %rd4, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_6];
ld.param.u64 %rd5, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_5];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f229, %f230;
@%p10 bra $L__BB309_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f229, %f97, %f37;
$L__BB309_15:
ld.param.u32 %r5, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_10];
ld.param.u64 %rd6, [Subsample_Lanczos_p016le_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB309_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f230, %f102, %f41;
$L__BB309_17:
add.f32 %f135, %f223, %f224;
add.f32 %f136, %f135, %f225;
add.f32 %f137, %f136, %f226;
div.rn.f32 %f138, %f226, %f137;
div.rn.f32 %f139, %f225, %f137;
div.rn.f32 %f140, %f224, %f137;
div.rn.f32 %f141, %f223, %f137;
add.f32 %f142, %f227, %f228;
add.f32 %f143, %f142, %f229;
add.f32 %f144, %f143, %f230;
div.rn.f32 %f145, %f227, %f144;
div.rn.f32 %f146, %f228, %f144;
div.rn.f32 %f147, %f229, %f144;
div.rn.f32 %f148, %f230, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd6, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r18;
mov.b32 %f150, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd6, {%f105, %f104}];
// end inline asm
mov.b32 %f151, %r22;
mov.b32 %f152, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd6, {%f107, %f104}];
// end inline asm
mov.b32 %f153, %r26;
mov.b32 %f154, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd6, {%f109, %f104}];
// end inline asm
mov.b32 %f155, %r30;
mov.b32 %f156, %r29;
mul.f32 %f157, %f140, %f152;
mul.f32 %f158, %f140, %f151;
fma.rn.f32 %f159, %f141, %f150, %f157;
fma.rn.f32 %f160, %f141, %f149, %f158;
fma.rn.f32 %f161, %f139, %f154, %f159;
fma.rn.f32 %f162, %f139, %f153, %f160;
fma.rn.f32 %f163, %f138, %f156, %f161;
fma.rn.f32 %f164, %f138, %f155, %f162;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd6, {%f103, %f112}];
// end inline asm
mov.b32 %f165, %r34;
mov.b32 %f166, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd6, {%f105, %f112}];
// end inline asm
mov.b32 %f167, %r38;
mov.b32 %f168, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd6, {%f107, %f112}];
// end inline asm
mov.b32 %f169, %r42;
mov.b32 %f170, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd6, {%f109, %f112}];
// end inline asm
mov.b32 %f171, %r46;
mov.b32 %f172, %r45;
mul.f32 %f173, %f140, %f168;
mul.f32 %f174, %f140, %f167;
fma.rn.f32 %f175, %f141, %f166, %f173;
fma.rn.f32 %f176, %f141, %f165, %f174;
fma.rn.f32 %f177, %f139, %f170, %f175;
fma.rn.f32 %f178, %f139, %f169, %f176;
fma.rn.f32 %f179, %f138, %f172, %f177;
fma.rn.f32 %f180, %f138, %f171, %f178;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd6, {%f103, %f120}];
// end inline asm
mov.b32 %f181, %r50;
mov.b32 %f182, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd6, {%f105, %f120}];
// end inline asm
mov.b32 %f183, %r54;
mov.b32 %f184, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd6, {%f107, %f120}];
// end inline asm
mov.b32 %f185, %r58;
mov.b32 %f186, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd6, {%f109, %f120}];
// end inline asm
mov.b32 %f187, %r62;
mov.b32 %f188, %r61;
mul.f32 %f189, %f140, %f184;
mul.f32 %f190, %f140, %f183;
fma.rn.f32 %f191, %f141, %f182, %f189;
fma.rn.f32 %f192, %f141, %f181, %f190;
fma.rn.f32 %f193, %f139, %f186, %f191;
fma.rn.f32 %f194, %f139, %f185, %f192;
fma.rn.f32 %f195, %f138, %f188, %f193;
fma.rn.f32 %f196, %f138, %f187, %f194;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd6, {%f103, %f128}];
// end inline asm
mov.b32 %f197, %r66;
mov.b32 %f198, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd6, {%f105, %f128}];
// end inline asm
mov.b32 %f199, %r70;
mov.b32 %f200, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd6, {%f107, %f128}];
// end inline asm
mov.b32 %f201, %r74;
mov.b32 %f202, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd6, {%f109, %f128}];
// end inline asm
mov.b32 %f203, %r78;
mov.b32 %f204, %r77;
mul.f32 %f205, %f140, %f200;
mul.f32 %f206, %f140, %f199;
fma.rn.f32 %f207, %f141, %f198, %f205;
fma.rn.f32 %f208, %f141, %f197, %f206;
fma.rn.f32 %f209, %f139, %f202, %f207;
fma.rn.f32 %f210, %f139, %f201, %f208;
fma.rn.f32 %f211, %f138, %f204, %f209;
fma.rn.f32 %f212, %f138, %f203, %f210;
mul.f32 %f213, %f146, %f179;
mul.f32 %f214, %f146, %f180;
fma.rn.f32 %f215, %f145, %f163, %f213;
fma.rn.f32 %f216, %f145, %f164, %f214;
fma.rn.f32 %f217, %f147, %f195, %f215;
fma.rn.f32 %f218, %f147, %f196, %f216;
fma.rn.f32 %f219, %f148, %f211, %f217;
fma.rn.f32 %f220, %f148, %f212, %f218;
mul.f32 %f221, %f219, 0f477FFF00;
mul.f32 %f222, %f220, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f221;
cvt.rzi.u16.f32 %rs2, %f222;
cvt.s64.s32 %rd22, %r2;
cvt.s64.s32 %rd23, %r5;
shr.u64 %rd24, %rd23, 1;
mul.lo.s64 %rd25, %rd24, %rd22;
cvt.s64.s32 %rd26, %r1;
add.s64 %rd27, %rd25, %rd26;
shl.b64 %rd28, %rd27, 1;
add.s64 %rd29, %rd2, %rd28;
st.global.u16 [%rd29], %rs1;
add.s64 %rd30, %rd1, %rd28;
st.global.u16 [%rd30], %rs2;
$L__BB309_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv444p16le
.visible .entry Subsample_Lanczos_yuv444p16le_yuv444p16le(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv444p16le_param_13
)
{
.reg .pred %p<12>;
.reg .b16 %rs<2>;
.reg .b32 %r<81>;
.reg .f32 %f<194>;
.reg .b64 %rd<28>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB310_18;
bra.uni $L__BB310_1;
$L__BB310_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_11];
cvt.rn.f32.s32 %f45, %r6;
cvt.rn.f32.s32 %f46, %r3;
div.rn.f32 %f47, %f45, %f46;
cvt.rn.f32.s32 %f48, %r7;
cvt.rn.f32.s32 %f49, %r4;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r1;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f47, %f52, 0fBF000000;
cvt.rn.f32.s32 %f54, %r2;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mov.f32 %f193, 0f3F800000;
mov.f32 %f186, %f193;
@%p4 bra $L__BB310_3;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f186, %f64, %f9;
$L__BB310_3:
fma.rn.f32 %f56, %f50, %f55, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mov.f32 %f187, %f193;
@%p5 bra $L__BB310_5;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f187, %f69, %f13;
$L__BB310_5:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mov.f32 %f188, %f193;
@%p6 bra $L__BB310_7;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f188, %f74, %f17;
$L__BB310_7:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p7, %f7, 0f00000000;
mov.f32 %f189, %f193;
@%p7 bra $L__BB310_9;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f189, %f79, %f21;
$L__BB310_9:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mov.f32 %f190, %f193;
@%p8 bra $L__BB310_11;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f190, %f87, %f29;
$L__BB310_11:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mov.f32 %f191, %f193;
@%p9 bra $L__BB310_13;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f191, %f92, %f33;
$L__BB310_13:
ld.param.u64 %rd3, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_4];
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mov.f32 %f192, %f193;
@%p10 bra $L__BB310_15;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f192, %f97, %f37;
$L__BB310_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_10];
ld.param.u64 %rd4, [Subsample_Lanczos_yuv444p16le_yuv444p16le_param_0];
cvta.to.global.u64 %rd1, %rd3;
setp.eq.f32 %p11, %f27, 0f00000000;
@%p11 bra $L__BB310_17;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f193, %f102, %f41;
$L__BB310_17:
add.f32 %f135, %f186, %f187;
add.f32 %f136, %f135, %f188;
add.f32 %f137, %f136, %f189;
div.rn.f32 %f138, %f189, %f137;
div.rn.f32 %f139, %f188, %f137;
div.rn.f32 %f140, %f187, %f137;
div.rn.f32 %f141, %f186, %f137;
add.f32 %f142, %f190, %f191;
add.f32 %f143, %f142, %f192;
add.f32 %f144, %f143, %f193;
div.rn.f32 %f145, %f190, %f144;
div.rn.f32 %f146, %f191, %f144;
div.rn.f32 %f147, %f192, %f144;
div.rn.f32 %f148, %f193, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd4, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd4, {%f105, %f104}];
// end inline asm
mov.b32 %f150, %r21;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd4, {%f107, %f104}];
// end inline asm
mov.b32 %f151, %r25;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd4, {%f109, %f104}];
// end inline asm
mov.b32 %f152, %r29;
mul.f32 %f153, %f140, %f150;
fma.rn.f32 %f154, %f141, %f149, %f153;
fma.rn.f32 %f155, %f139, %f151, %f154;
fma.rn.f32 %f156, %f138, %f152, %f155;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd4, {%f103, %f112}];
// end inline asm
mov.b32 %f157, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd4, {%f105, %f112}];
// end inline asm
mov.b32 %f158, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd4, {%f107, %f112}];
// end inline asm
mov.b32 %f159, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd4, {%f109, %f112}];
// end inline asm
mov.b32 %f160, %r45;
mul.f32 %f161, %f140, %f158;
fma.rn.f32 %f162, %f141, %f157, %f161;
fma.rn.f32 %f163, %f139, %f159, %f162;
fma.rn.f32 %f164, %f138, %f160, %f163;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd4, {%f103, %f120}];
// end inline asm
mov.b32 %f165, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd4, {%f105, %f120}];
// end inline asm
mov.b32 %f166, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd4, {%f107, %f120}];
// end inline asm
mov.b32 %f167, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd4, {%f109, %f120}];
// end inline asm
mov.b32 %f168, %r61;
mul.f32 %f169, %f140, %f166;
fma.rn.f32 %f170, %f141, %f165, %f169;
fma.rn.f32 %f171, %f139, %f167, %f170;
fma.rn.f32 %f172, %f138, %f168, %f171;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd4, {%f103, %f128}];
// end inline asm
mov.b32 %f173, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd4, {%f105, %f128}];
// end inline asm
mov.b32 %f174, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd4, {%f107, %f128}];
// end inline asm
mov.b32 %f175, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd4, {%f109, %f128}];
// end inline asm
mov.b32 %f176, %r77;
mul.f32 %f177, %f140, %f174;
fma.rn.f32 %f178, %f141, %f173, %f177;
fma.rn.f32 %f179, %f139, %f175, %f178;
fma.rn.f32 %f180, %f138, %f176, %f179;
mul.f32 %f181, %f146, %f164;
fma.rn.f32 %f182, %f145, %f156, %f181;
fma.rn.f32 %f183, %f147, %f172, %f182;
fma.rn.f32 %f184, %f148, %f180, %f183;
mul.f32 %f185, %f184, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f185;
cvt.s64.s32 %rd20, %r2;
cvt.s64.s32 %rd21, %r5;
shr.u64 %rd22, %rd21, 1;
mul.lo.s64 %rd23, %rd22, %rd20;
cvt.s64.s32 %rd24, %r1;
add.s64 %rd25, %rd23, %rd24;
shl.b64 %rd26, %rd25, 1;
add.s64 %rd27, %rd1, %rd26;
st.global.u16 [%rd27], %rs1;
$L__BB310_18:
ret;
}
// .globl Subsample_Lanczos_yuv444p16le_yuv444p16le_uv
.visible .entry Subsample_Lanczos_yuv444p16le_yuv444p16le_uv(
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_0,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_1,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_2,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_3,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_4,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_5,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_6,
.param .u64 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_7,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_8,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_9,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_10,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_11,
.param .u32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_12,
.param .f32 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_13
)
{
.reg .pred %p<20>;
.reg .b16 %rs<3>;
.reg .b32 %r<145>;
.reg .f32 %f<339>;
.reg .b64 %rd<49>;
ld.param.u32 %r4, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB311_34;
bra.uni $L__BB311_1;
$L__BB311_1:
ld.param.u32 %r7, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_11];
cvt.rn.f32.s32 %f67, %r6;
cvt.rn.f32.s32 %f68, %r3;
div.rn.f32 %f69, %f67, %f68;
cvt.rn.f32.s32 %f70, %r7;
cvt.rn.f32.s32 %f71, %r4;
div.rn.f32 %f72, %f70, %f71;
cvt.rn.f32.s32 %f73, %r1;
add.f32 %f74, %f73, 0f3F000000;
fma.rn.f32 %f75, %f69, %f74, 0fBF000000;
cvt.rn.f32.s32 %f76, %r2;
add.f32 %f77, %f76, 0f3F000000;
cvt.rmi.f32.f32 %f242, %f75;
sub.f32 %f79, %f75, %f242;
add.f32 %f80, %f79, 0f3F800000;
mul.f32 %f4, %f80, 0f40490FDB;
mul.f32 %f5, %f79, 0f40490FDB;
add.f32 %f81, %f79, 0fBF800000;
setp.eq.f32 %p4, %f4, 0f00000000;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f83, %f4, %f4;
mul.f32 %f9, %f83, 0f3F000000;
mov.f32 %f338, 0f3F800000;
mov.f32 %f323, %f338;
@%p4 bra $L__BB311_3;
sin.approx.f32 %f84, %f4;
sin.approx.f32 %f85, %f8;
mul.f32 %f86, %f84, %f85;
div.rn.f32 %f323, %f86, %f9;
$L__BB311_3:
fma.rn.f32 %f78, %f72, %f77, 0fBF000000;
add.f32 %f82, %f79, 0fC0000000;
mul.f32 %f6, %f81, 0f40490FDB;
setp.eq.f32 %p5, %f5, 0f00000000;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f88, %f5, %f5;
mul.f32 %f13, %f88, 0f3F000000;
mov.f32 %f324, %f338;
@%p5 bra $L__BB311_5;
sin.approx.f32 %f89, %f5;
sin.approx.f32 %f90, %f12;
mul.f32 %f91, %f89, %f90;
div.rn.f32 %f324, %f91, %f13;
$L__BB311_5:
cvt.rmi.f32.f32 %f249, %f78;
mul.f32 %f7, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f6, 0f00000000;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f93, %f6, %f6;
mul.f32 %f17, %f93, 0f3F000000;
mov.f32 %f325, %f338;
@%p6 bra $L__BB311_7;
sin.approx.f32 %f94, %f6;
sin.approx.f32 %f95, %f16;
mul.f32 %f96, %f94, %f95;
div.rn.f32 %f325, %f96, %f17;
$L__BB311_7:
sub.f32 %f3, %f78, %f249;
setp.eq.f32 %p7, %f7, 0f00000000;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f98, %f7, %f7;
mul.f32 %f21, %f98, 0f3F000000;
mov.f32 %f326, %f338;
@%p7 bra $L__BB311_9;
sin.approx.f32 %f99, %f7;
sin.approx.f32 %f100, %f20;
mul.f32 %f101, %f99, %f100;
div.rn.f32 %f326, %f101, %f21;
$L__BB311_9:
add.f32 %f103, %f3, 0f3F800000;
mul.f32 %f24, %f103, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f104, %f3, 0fBF800000;
setp.eq.f32 %p8, %f24, 0f00000000;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f106, %f24, %f24;
mul.f32 %f29, %f106, 0f3F000000;
mov.f32 %f327, %f338;
@%p8 bra $L__BB311_11;
sin.approx.f32 %f107, %f24;
sin.approx.f32 %f108, %f28;
mul.f32 %f109, %f107, %f108;
div.rn.f32 %f327, %f109, %f29;
$L__BB311_11:
add.f32 %f105, %f3, 0fC0000000;
mul.f32 %f26, %f104, 0f40490FDB;
setp.eq.f32 %p9, %f25, 0f00000000;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f111, %f25, %f25;
mul.f32 %f33, %f111, 0f3F000000;
mov.f32 %f328, %f338;
@%p9 bra $L__BB311_13;
sin.approx.f32 %f112, %f25;
sin.approx.f32 %f113, %f32;
mul.f32 %f114, %f112, %f113;
div.rn.f32 %f328, %f114, %f33;
$L__BB311_13:
ld.param.u64 %rd7, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_5];
mul.f32 %f27, %f105, 0f40490FDB;
setp.eq.f32 %p10, %f26, 0f00000000;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f116, %f26, %f26;
mul.f32 %f37, %f116, 0f3F000000;
mov.f32 %f329, %f338;
@%p10 bra $L__BB311_15;
sin.approx.f32 %f117, %f26;
sin.approx.f32 %f118, %f36;
mul.f32 %f119, %f117, %f118;
div.rn.f32 %f329, %f119, %f37;
$L__BB311_15:
ld.param.u32 %r5, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_10];
ld.param.u64 %rd8, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_1];
cvta.to.global.u64 %rd2, %rd7;
setp.eq.f32 %p11, %f27, 0f00000000;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f121, %f27, %f27;
mul.f32 %f41, %f121, 0f3F000000;
mov.f32 %f330, %f338;
@%p11 bra $L__BB311_17;
sin.approx.f32 %f122, %f27;
sin.approx.f32 %f123, %f40;
mul.f32 %f124, %f122, %f123;
div.rn.f32 %f330, %f124, %f41;
$L__BB311_17:
add.f32 %f158, %f323, %f324;
add.f32 %f159, %f158, %f325;
add.f32 %f160, %f159, %f326;
div.rn.f32 %f161, %f326, %f160;
div.rn.f32 %f162, %f325, %f160;
div.rn.f32 %f163, %f324, %f160;
div.rn.f32 %f164, %f323, %f160;
add.f32 %f165, %f327, %f328;
add.f32 %f166, %f165, %f329;
add.f32 %f167, %f166, %f330;
div.rn.f32 %f168, %f327, %f167;
div.rn.f32 %f169, %f328, %f167;
div.rn.f32 %f170, %f329, %f167;
div.rn.f32 %f171, %f330, %f167;
add.f32 %f240, %f242, 0fBF800000;
add.f32 %f241, %f249, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r17, %r18, %r19, %r20}, [%rd8, {%f240, %f241}];
// end inline asm
mov.b32 %f172, %r17;
// begin inline asm
tex.2d.v4.f32.f32 {%r21, %r22, %r23, %r24}, [%rd8, {%f242, %f241}];
// end inline asm
mov.b32 %f173, %r21;
add.f32 %f244, %f242, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r25, %r26, %r27, %r28}, [%rd8, {%f244, %f241}];
// end inline asm
mov.b32 %f174, %r25;
add.f32 %f246, %f242, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r29, %r30, %r31, %r32}, [%rd8, {%f246, %f241}];
// end inline asm
mov.b32 %f175, %r29;
mul.f32 %f176, %f163, %f173;
fma.rn.f32 %f177, %f164, %f172, %f176;
fma.rn.f32 %f178, %f162, %f174, %f177;
fma.rn.f32 %f179, %f161, %f175, %f178;
// begin inline asm
tex.2d.v4.f32.f32 {%r33, %r34, %r35, %r36}, [%rd8, {%f240, %f249}];
// end inline asm
mov.b32 %f180, %r33;
// begin inline asm
tex.2d.v4.f32.f32 {%r37, %r38, %r39, %r40}, [%rd8, {%f242, %f249}];
// end inline asm
mov.b32 %f181, %r37;
// begin inline asm
tex.2d.v4.f32.f32 {%r41, %r42, %r43, %r44}, [%rd8, {%f244, %f249}];
// end inline asm
mov.b32 %f182, %r41;
// begin inline asm
tex.2d.v4.f32.f32 {%r45, %r46, %r47, %r48}, [%rd8, {%f246, %f249}];
// end inline asm
mov.b32 %f183, %r45;
mul.f32 %f184, %f163, %f181;
fma.rn.f32 %f185, %f164, %f180, %f184;
fma.rn.f32 %f186, %f162, %f182, %f185;
fma.rn.f32 %f187, %f161, %f183, %f186;
add.f32 %f257, %f249, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r49, %r50, %r51, %r52}, [%rd8, {%f240, %f257}];
// end inline asm
mov.b32 %f188, %r49;
// begin inline asm
tex.2d.v4.f32.f32 {%r53, %r54, %r55, %r56}, [%rd8, {%f242, %f257}];
// end inline asm
mov.b32 %f189, %r53;
// begin inline asm
tex.2d.v4.f32.f32 {%r57, %r58, %r59, %r60}, [%rd8, {%f244, %f257}];
// end inline asm
mov.b32 %f190, %r57;
// begin inline asm
tex.2d.v4.f32.f32 {%r61, %r62, %r63, %r64}, [%rd8, {%f246, %f257}];
// end inline asm
mov.b32 %f191, %r61;
mul.f32 %f192, %f163, %f189;
fma.rn.f32 %f193, %f164, %f188, %f192;
fma.rn.f32 %f194, %f162, %f190, %f193;
fma.rn.f32 %f195, %f161, %f191, %f194;
add.f32 %f265, %f249, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r65, %r66, %r67, %r68}, [%rd8, {%f240, %f265}];
// end inline asm
mov.b32 %f196, %r65;
// begin inline asm
tex.2d.v4.f32.f32 {%r69, %r70, %r71, %r72}, [%rd8, {%f242, %f265}];
// end inline asm
mov.b32 %f197, %r69;
// begin inline asm
tex.2d.v4.f32.f32 {%r73, %r74, %r75, %r76}, [%rd8, {%f244, %f265}];
// end inline asm
mov.b32 %f198, %r73;
// begin inline asm
tex.2d.v4.f32.f32 {%r77, %r78, %r79, %r80}, [%rd8, {%f246, %f265}];
// end inline asm
mov.b32 %f199, %r77;
mul.f32 %f200, %f163, %f197;
fma.rn.f32 %f201, %f164, %f196, %f200;
fma.rn.f32 %f202, %f162, %f198, %f201;
fma.rn.f32 %f203, %f161, %f199, %f202;
mul.f32 %f204, %f169, %f187;
fma.rn.f32 %f205, %f168, %f179, %f204;
fma.rn.f32 %f206, %f170, %f195, %f205;
fma.rn.f32 %f207, %f171, %f203, %f206;
mul.f32 %f208, %f207, 0f477FFF00;
cvt.rzi.u16.f32 %rs1, %f208;
cvt.s64.s32 %rd24, %r2;
cvt.s64.s32 %rd25, %r5;
shr.u64 %rd26, %rd25, 1;
mul.lo.s64 %rd27, %rd26, %rd24;
cvt.s64.s32 %rd28, %r1;
add.s64 %rd3, %rd27, %rd28;
shl.b64 %rd29, %rd3, 1;
add.s64 %rd30, %rd2, %rd29;
st.global.u16 [%rd30], %rs1;
mov.f32 %f331, %f338;
@%p4 bra $L__BB311_19;
sin.approx.f32 %f209, %f4;
sin.approx.f32 %f210, %f8;
mul.f32 %f211, %f209, %f210;
div.rn.f32 %f331, %f211, %f9;
$L__BB311_19:
mov.f32 %f332, %f338;
@%p5 bra $L__BB311_21;
sin.approx.f32 %f213, %f5;
sin.approx.f32 %f214, %f12;
mul.f32 %f215, %f213, %f214;
div.rn.f32 %f332, %f215, %f13;
$L__BB311_21:
mov.f32 %f333, %f338;
@%p6 bra $L__BB311_23;
sin.approx.f32 %f217, %f6;
sin.approx.f32 %f218, %f16;
mul.f32 %f219, %f217, %f218;
div.rn.f32 %f333, %f219, %f17;
$L__BB311_23:
mov.f32 %f334, %f338;
@%p7 bra $L__BB311_25;
sin.approx.f32 %f221, %f7;
sin.approx.f32 %f222, %f20;
mul.f32 %f223, %f221, %f222;
div.rn.f32 %f334, %f223, %f21;
$L__BB311_25:
mov.f32 %f335, %f338;
@%p8 bra $L__BB311_27;
sin.approx.f32 %f225, %f24;
sin.approx.f32 %f226, %f28;
mul.f32 %f227, %f225, %f226;
div.rn.f32 %f335, %f227, %f29;
$L__BB311_27:
mov.f32 %f336, %f338;
@%p9 bra $L__BB311_29;
sin.approx.f32 %f229, %f25;
sin.approx.f32 %f230, %f32;
mul.f32 %f231, %f229, %f230;
div.rn.f32 %f336, %f231, %f33;
$L__BB311_29:
ld.param.u64 %rd6, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_6];
mov.f32 %f337, %f338;
@%p10 bra $L__BB311_31;
sin.approx.f32 %f233, %f26;
sin.approx.f32 %f234, %f36;
mul.f32 %f235, %f233, %f234;
div.rn.f32 %f337, %f235, %f37;
$L__BB311_31:
ld.param.u64 %rd31, [Subsample_Lanczos_yuv444p16le_yuv444p16le_uv_param_2];
cvta.to.global.u64 %rd1, %rd6;
@%p11 bra $L__BB311_33;
sin.approx.f32 %f237, %f27;
sin.approx.f32 %f238, %f40;
mul.f32 %f239, %f237, %f238;
div.rn.f32 %f338, %f239, %f41;
$L__BB311_33:
add.f32 %f272, %f331, %f332;
add.f32 %f273, %f272, %f333;
add.f32 %f274, %f273, %f334;
div.rn.f32 %f275, %f334, %f274;
div.rn.f32 %f276, %f333, %f274;
div.rn.f32 %f277, %f332, %f274;
div.rn.f32 %f278, %f331, %f274;
add.f32 %f279, %f335, %f336;
add.f32 %f280, %f279, %f337;
add.f32 %f281, %f280, %f338;
div.rn.f32 %f282, %f335, %f281;
div.rn.f32 %f283, %f336, %f281;
div.rn.f32 %f284, %f337, %f281;
div.rn.f32 %f285, %f338, %f281;
// begin inline asm
tex.2d.v4.f32.f32 {%r81, %r82, %r83, %r84}, [%rd31, {%f240, %f241}];
// end inline asm
mov.b32 %f286, %r81;
// begin inline asm
tex.2d.v4.f32.f32 {%r85, %r86, %r87, %r88}, [%rd31, {%f242, %f241}];
// end inline asm
mov.b32 %f287, %r85;
// begin inline asm
tex.2d.v4.f32.f32 {%r89, %r90, %r91, %r92}, [%rd31, {%f244, %f241}];
// end inline asm
mov.b32 %f288, %r89;
// begin inline asm
tex.2d.v4.f32.f32 {%r93, %r94, %r95, %r96}, [%rd31, {%f246, %f241}];
// end inline asm
mov.b32 %f289, %r93;
mul.f32 %f290, %f277, %f287;
fma.rn.f32 %f291, %f278, %f286, %f290;
fma.rn.f32 %f292, %f276, %f288, %f291;
fma.rn.f32 %f293, %f275, %f289, %f292;
// begin inline asm
tex.2d.v4.f32.f32 {%r97, %r98, %r99, %r100}, [%rd31, {%f240, %f249}];
// end inline asm
mov.b32 %f294, %r97;
// begin inline asm
tex.2d.v4.f32.f32 {%r101, %r102, %r103, %r104}, [%rd31, {%f242, %f249}];
// end inline asm
mov.b32 %f295, %r101;
// begin inline asm
tex.2d.v4.f32.f32 {%r105, %r106, %r107, %r108}, [%rd31, {%f244, %f249}];
// end inline asm
mov.b32 %f296, %r105;
// begin inline asm
tex.2d.v4.f32.f32 {%r109, %r110, %r111, %r112}, [%rd31, {%f246, %f249}];
// end inline asm
mov.b32 %f297, %r109;
mul.f32 %f298, %f277, %f295;
fma.rn.f32 %f299, %f278, %f294, %f298;
fma.rn.f32 %f300, %f276, %f296, %f299;
fma.rn.f32 %f301, %f275, %f297, %f300;
// begin inline asm
tex.2d.v4.f32.f32 {%r113, %r114, %r115, %r116}, [%rd31, {%f240, %f257}];
// end inline asm
mov.b32 %f302, %r113;
// begin inline asm
tex.2d.v4.f32.f32 {%r117, %r118, %r119, %r120}, [%rd31, {%f242, %f257}];
// end inline asm
mov.b32 %f303, %r117;
// begin inline asm
tex.2d.v4.f32.f32 {%r121, %r122, %r123, %r124}, [%rd31, {%f244, %f257}];
// end inline asm
mov.b32 %f304, %r121;
// begin inline asm
tex.2d.v4.f32.f32 {%r125, %r126, %r127, %r128}, [%rd31, {%f246, %f257}];
// end inline asm
mov.b32 %f305, %r125;
mul.f32 %f306, %f277, %f303;
fma.rn.f32 %f307, %f278, %f302, %f306;
fma.rn.f32 %f308, %f276, %f304, %f307;
fma.rn.f32 %f309, %f275, %f305, %f308;
// begin inline asm
tex.2d.v4.f32.f32 {%r129, %r130, %r131, %r132}, [%rd31, {%f240, %f265}];
// end inline asm
mov.b32 %f310, %r129;
// begin inline asm
tex.2d.v4.f32.f32 {%r133, %r134, %r135, %r136}, [%rd31, {%f242, %f265}];
// end inline asm
mov.b32 %f311, %r133;
// begin inline asm
tex.2d.v4.f32.f32 {%r137, %r138, %r139, %r140}, [%rd31, {%f244, %f265}];
// end inline asm
mov.b32 %f312, %r137;
// begin inline asm
tex.2d.v4.f32.f32 {%r141, %r142, %r143, %r144}, [%rd31, {%f246, %f265}];
// end inline asm
mov.b32 %f313, %r141;
mul.f32 %f314, %f277, %f311;
fma.rn.f32 %f315, %f278, %f310, %f314;
fma.rn.f32 %f316, %f276, %f312, %f315;
fma.rn.f32 %f317, %f275, %f313, %f316;
mul.f32 %f318, %f283, %f301;
fma.rn.f32 %f319, %f282, %f293, %f318;
fma.rn.f32 %f320, %f284, %f309, %f319;
fma.rn.f32 %f321, %f285, %f317, %f320;
mul.f32 %f322, %f321, 0f477FFF00;
cvt.rzi.u16.f32 %rs2, %f322;
add.s64 %rd48, %rd1, %rd29;
st.global.u16 [%rd48], %rs2;
$L__BB311_34:
ret;
}
// .globl Subsample_Lanczos_bgr0_bgr0
.visible .entry Subsample_Lanczos_bgr0_bgr0(
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_0,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_1,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_2,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_3,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_4,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_5,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_6,
.param .u64 Subsample_Lanczos_bgr0_bgr0_param_7,
.param .u32 Subsample_Lanczos_bgr0_bgr0_param_8,
.param .u32 Subsample_Lanczos_bgr0_bgr0_param_9,
.param .u32 Subsample_Lanczos_bgr0_bgr0_param_10,
.param .u32 Subsample_Lanczos_bgr0_bgr0_param_11,
.param .u32 Subsample_Lanczos_bgr0_bgr0_param_12,
.param .f32 Subsample_Lanczos_bgr0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<17>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Lanczos_bgr0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_bgr0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB312_2;
bra.uni $L__BB312_1;
$L__BB312_1:
ld.param.u32 %r7, [Subsample_Lanczos_bgr0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_bgr0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Lanczos_bgr0_bgr0_param_10];
ld.param.u64 %rd2, [Subsample_Lanczos_bgr0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Lanczos_bgr0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
{ // callseq 0, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b32 param2;
st.param.b32 [param2+0], %r2;
.param .b32 param3;
st.param.b32 [param3+0], %r3;
.param .b32 param4;
st.param.b32 [param4+0], %r4;
.param .b32 param5;
st.param.b32 [param5+0], %r6;
.param .b32 param6;
st.param.b32 [param6+0], %r7;
.param .align 16 .b8 retval0[4];
call.uni (retval0),
_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6
);
ld.param.v4.b8 {%rs1, %rs2, %rs3, %rs4}, [retval0+0];
} // callseq 0
cvt.s64.s32 %rd4, %r2;
cvt.s64.s32 %rd5, %r5;
shr.u64 %rd6, %rd5, 2;
mul.lo.s64 %rd7, %rd6, %rd4;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
shl.b64 %rd10, %rd9, 2;
add.s64 %rd11, %rd1, %rd10;
st.global.v4.u8 [%rd11], {%rs1, %rs2, %rs3, %rs4};
$L__BB312_2:
ret;
}
// .globl Subsample_Lanczos_bgr0_bgr0_uv
.visible .entry Subsample_Lanczos_bgr0_bgr0_uv(
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_0,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_1,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_2,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_3,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_4,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_5,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_6,
.param .u64 Subsample_Lanczos_bgr0_bgr0_uv_param_7,
.param .u32 Subsample_Lanczos_bgr0_bgr0_uv_param_8,
.param .u32 Subsample_Lanczos_bgr0_bgr0_uv_param_9,
.param .u32 Subsample_Lanczos_bgr0_bgr0_uv_param_10,
.param .u32 Subsample_Lanczos_bgr0_bgr0_uv_param_11,
.param .u32 Subsample_Lanczos_bgr0_bgr0_uv_param_12,
.param .f32 Subsample_Lanczos_bgr0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Lanczos_rgb0_rgb0
.visible .entry Subsample_Lanczos_rgb0_rgb0(
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_0,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_1,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_2,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_3,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_4,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_5,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_6,
.param .u64 Subsample_Lanczos_rgb0_rgb0_param_7,
.param .u32 Subsample_Lanczos_rgb0_rgb0_param_8,
.param .u32 Subsample_Lanczos_rgb0_rgb0_param_9,
.param .u32 Subsample_Lanczos_rgb0_rgb0_param_10,
.param .u32 Subsample_Lanczos_rgb0_rgb0_param_11,
.param .u32 Subsample_Lanczos_rgb0_rgb0_param_12,
.param .f32 Subsample_Lanczos_rgb0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<17>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Lanczos_rgb0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_rgb0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB314_2;
bra.uni $L__BB314_1;
$L__BB314_1:
ld.param.u32 %r7, [Subsample_Lanczos_rgb0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_rgb0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Lanczos_rgb0_rgb0_param_10];
ld.param.u64 %rd2, [Subsample_Lanczos_rgb0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Lanczos_rgb0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
{ // callseq 1, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b32 param2;
st.param.b32 [param2+0], %r2;
.param .b32 param3;
st.param.b32 [param3+0], %r3;
.param .b32 param4;
st.param.b32 [param4+0], %r4;
.param .b32 param5;
st.param.b32 [param5+0], %r6;
.param .b32 param6;
st.param.b32 [param6+0], %r7;
.param .align 16 .b8 retval0[4];
call.uni (retval0),
_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6
);
ld.param.v4.b8 {%rs1, %rs2, %rs3, %rs4}, [retval0+0];
} // callseq 1
cvt.s64.s32 %rd4, %r2;
cvt.s64.s32 %rd5, %r5;
shr.u64 %rd6, %rd5, 2;
mul.lo.s64 %rd7, %rd6, %rd4;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
shl.b64 %rd10, %rd9, 2;
add.s64 %rd11, %rd1, %rd10;
st.global.v4.u8 [%rd11], {%rs1, %rs2, %rs3, %rs4};
$L__BB314_2:
ret;
}
// .globl Subsample_Lanczos_rgb0_rgb0_uv
.visible .entry Subsample_Lanczos_rgb0_rgb0_uv(
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_0,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_1,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_2,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_3,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_4,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_5,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_6,
.param .u64 Subsample_Lanczos_rgb0_rgb0_uv_param_7,
.param .u32 Subsample_Lanczos_rgb0_rgb0_uv_param_8,
.param .u32 Subsample_Lanczos_rgb0_rgb0_uv_param_9,
.param .u32 Subsample_Lanczos_rgb0_rgb0_uv_param_10,
.param .u32 Subsample_Lanczos_rgb0_rgb0_uv_param_11,
.param .u32 Subsample_Lanczos_rgb0_rgb0_uv_param_12,
.param .f32 Subsample_Lanczos_rgb0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Lanczos_bgr0_rgb0
.visible .entry Subsample_Lanczos_bgr0_rgb0(
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_0,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_1,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_2,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_3,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_4,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_5,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_6,
.param .u64 Subsample_Lanczos_bgr0_rgb0_param_7,
.param .u32 Subsample_Lanczos_bgr0_rgb0_param_8,
.param .u32 Subsample_Lanczos_bgr0_rgb0_param_9,
.param .u32 Subsample_Lanczos_bgr0_rgb0_param_10,
.param .u32 Subsample_Lanczos_bgr0_rgb0_param_11,
.param .u32 Subsample_Lanczos_bgr0_rgb0_param_12,
.param .f32 Subsample_Lanczos_bgr0_rgb0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<17>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Lanczos_bgr0_rgb0_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_bgr0_rgb0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB316_2;
bra.uni $L__BB316_1;
$L__BB316_1:
ld.param.u32 %r7, [Subsample_Lanczos_bgr0_rgb0_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_bgr0_rgb0_param_11];
ld.param.u32 %r5, [Subsample_Lanczos_bgr0_rgb0_param_10];
ld.param.u64 %rd2, [Subsample_Lanczos_bgr0_rgb0_param_0];
ld.param.u64 %rd3, [Subsample_Lanczos_bgr0_rgb0_param_4];
cvta.to.global.u64 %rd1, %rd3;
{ // callseq 2, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b32 param2;
st.param.b32 [param2+0], %r2;
.param .b32 param3;
st.param.b32 [param3+0], %r3;
.param .b32 param4;
st.param.b32 [param4+0], %r4;
.param .b32 param5;
st.param.b32 [param5+0], %r6;
.param .b32 param6;
st.param.b32 [param6+0], %r7;
.param .align 16 .b8 retval0[4];
call.uni (retval0),
_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6
);
ld.param.v4.b8 {%rs1, %rs2, %rs3, %rs4}, [retval0+0];
} // callseq 2
cvt.s64.s32 %rd4, %r2;
cvt.s64.s32 %rd5, %r5;
shr.u64 %rd6, %rd5, 2;
mul.lo.s64 %rd7, %rd6, %rd4;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
shl.b64 %rd10, %rd9, 2;
add.s64 %rd11, %rd1, %rd10;
st.global.v4.u8 [%rd11], {%rs3, %rs2, %rs1, %rs4};
$L__BB316_2:
ret;
}
// .globl Subsample_Lanczos_bgr0_rgb0_uv
.visible .entry Subsample_Lanczos_bgr0_rgb0_uv(
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_0,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_1,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_2,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_3,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_4,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_5,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_6,
.param .u64 Subsample_Lanczos_bgr0_rgb0_uv_param_7,
.param .u32 Subsample_Lanczos_bgr0_rgb0_uv_param_8,
.param .u32 Subsample_Lanczos_bgr0_rgb0_uv_param_9,
.param .u32 Subsample_Lanczos_bgr0_rgb0_uv_param_10,
.param .u32 Subsample_Lanczos_bgr0_rgb0_uv_param_11,
.param .u32 Subsample_Lanczos_bgr0_rgb0_uv_param_12,
.param .f32 Subsample_Lanczos_bgr0_rgb0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
// .globl Subsample_Lanczos_rgb0_bgr0
.visible .entry Subsample_Lanczos_rgb0_bgr0(
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_0,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_1,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_2,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_3,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_4,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_5,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_6,
.param .u64 Subsample_Lanczos_rgb0_bgr0_param_7,
.param .u32 Subsample_Lanczos_rgb0_bgr0_param_8,
.param .u32 Subsample_Lanczos_rgb0_bgr0_param_9,
.param .u32 Subsample_Lanczos_rgb0_bgr0_param_10,
.param .u32 Subsample_Lanczos_rgb0_bgr0_param_11,
.param .u32 Subsample_Lanczos_rgb0_bgr0_param_12,
.param .f32 Subsample_Lanczos_rgb0_bgr0_param_13
)
{
.reg .pred %p<4>;
.reg .b16 %rs<9>;
.reg .b32 %r<17>;
.reg .b64 %rd<12>;
ld.param.u32 %r4, [Subsample_Lanczos_rgb0_bgr0_param_9];
ld.param.u32 %r3, [Subsample_Lanczos_rgb0_bgr0_param_8];
// begin inline asm
mov.u32 %r8, %ctaid.x;
// end inline asm
// begin inline asm
mov.u32 %r9, %ctaid.y;
// end inline asm
// begin inline asm
mov.u32 %r11, %ntid.x;
// end inline asm
// begin inline asm
mov.u32 %r12, %ntid.y;
// end inline asm
// begin inline asm
mov.u32 %r14, %tid.x;
// end inline asm
// begin inline asm
mov.u32 %r15, %tid.y;
// end inline asm
mad.lo.s32 %r1, %r11, %r8, %r14;
mad.lo.s32 %r2, %r12, %r9, %r15;
setp.lt.s32 %p1, %r2, %r4;
setp.lt.s32 %p2, %r1, %r3;
and.pred %p3, %p1, %p2;
@!%p3 bra $L__BB318_2;
bra.uni $L__BB318_1;
$L__BB318_1:
ld.param.u32 %r7, [Subsample_Lanczos_rgb0_bgr0_param_12];
ld.param.u32 %r6, [Subsample_Lanczos_rgb0_bgr0_param_11];
ld.param.u32 %r5, [Subsample_Lanczos_rgb0_bgr0_param_10];
ld.param.u64 %rd2, [Subsample_Lanczos_rgb0_bgr0_param_0];
ld.param.u64 %rd3, [Subsample_Lanczos_rgb0_bgr0_param_4];
cvta.to.global.u64 %rd1, %rd3;
{ // callseq 3, 0
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.b64 [param0+0], %rd2;
.param .b32 param1;
st.param.b32 [param1+0], %r1;
.param .b32 param2;
st.param.b32 [param2+0], %r2;
.param .b32 param3;
st.param.b32 [param3+0], %r3;
.param .b32 param4;
st.param.b32 [param4+0], %r4;
.param .b32 param5;
st.param.b32 [param5+0], %r6;
.param .b32 param6;
st.param.b32 [param6+0], %r7;
.param .align 16 .b8 retval0[4];
call.uni (retval0),
_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif,
(
param0,
param1,
param2,
param3,
param4,
param5,
param6
);
ld.param.v4.b8 {%rs1, %rs2, %rs3, %rs4}, [retval0+0];
} // callseq 3
cvt.s64.s32 %rd4, %r2;
cvt.s64.s32 %rd5, %r5;
shr.u64 %rd6, %rd5, 2;
mul.lo.s64 %rd7, %rd6, %rd4;
cvt.s64.s32 %rd8, %r1;
add.s64 %rd9, %rd7, %rd8;
shl.b64 %rd10, %rd9, 2;
add.s64 %rd11, %rd1, %rd10;
st.global.v4.u8 [%rd11], {%rs3, %rs2, %rs1, %rs4};
$L__BB318_2:
ret;
}
// .globl Subsample_Lanczos_rgb0_bgr0_uv
.visible .entry Subsample_Lanczos_rgb0_bgr0_uv(
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_0,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_1,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_2,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_3,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_4,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_5,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_6,
.param .u64 Subsample_Lanczos_rgb0_bgr0_uv_param_7,
.param .u32 Subsample_Lanczos_rgb0_bgr0_uv_param_8,
.param .u32 Subsample_Lanczos_rgb0_bgr0_uv_param_9,
.param .u32 Subsample_Lanczos_rgb0_bgr0_uv_param_10,
.param .u32 Subsample_Lanczos_rgb0_bgr0_uv_param_11,
.param .u32 Subsample_Lanczos_rgb0_bgr0_uv_param_12,
.param .f32 Subsample_Lanczos_rgb0_bgr0_uv_param_13
)
{
.reg .b32 %r<10>;
ret;
}
.func (.param .align 16 .b8 func_retval0[4]) _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif(
.param .b64 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_0,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_1,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_2,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_3,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_4,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_5,
.param .b32 _ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_6
)
{
.reg .pred %p<9>;
.reg .b16 %rs<5>;
.reg .b32 %r<71>;
.reg .f32 %f<305>;
.reg .b64 %rd<18>;
ld.param.u32 %r1, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_5];
cvt.rn.f32.s32 %f45, %r1;
ld.param.u32 %r2, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_1];
ld.param.u32 %r3, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_3];
cvt.rn.f32.s32 %f46, %r3;
ld.param.u32 %r4, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_2];
div.rn.f32 %f47, %f45, %f46;
ld.param.u32 %r5, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_6];
cvt.rn.f32.s32 %f48, %r5;
ld.param.u32 %r6, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_4];
cvt.rn.f32.s32 %f49, %r6;
div.rn.f32 %f50, %f48, %f49;
cvt.rn.f32.s32 %f51, %r2;
add.f32 %f52, %f51, 0f3F000000;
fma.rn.f32 %f53, %f52, %f47, 0fBF000000;
cvt.rn.f32.s32 %f54, %r4;
add.f32 %f55, %f54, 0f3F000000;
cvt.rmi.f32.f32 %f105, %f53;
sub.f32 %f57, %f53, %f105;
add.f32 %f58, %f57, 0f3F800000;
mul.f32 %f4, %f58, 0f40490FDB;
mul.f32 %f5, %f57, 0f40490FDB;
add.f32 %f59, %f57, 0fBF800000;
setp.eq.f32 %p1, %f4, 0f00000000;
mov.f32 %f304, 0f3F800000;
mov.f32 %f297, %f304;
@%p1 bra $L__BB320_2;
mul.f32 %f8, %f4, 0f3F000000;
mul.f32 %f61, %f4, %f4;
mul.f32 %f9, %f61, 0f3F000000;
sin.approx.f32 %f62, %f4;
sin.approx.f32 %f63, %f8;
mul.f32 %f64, %f62, %f63;
div.rn.f32 %f297, %f64, %f9;
$L__BB320_2:
fma.rn.f32 %f56, %f55, %f50, 0fBF000000;
add.f32 %f60, %f57, 0fC0000000;
mul.f32 %f6, %f59, 0f40490FDB;
setp.eq.f32 %p2, %f5, 0f00000000;
mov.f32 %f298, %f304;
@%p2 bra $L__BB320_4;
mul.f32 %f12, %f5, 0f3F000000;
mul.f32 %f66, %f5, %f5;
mul.f32 %f13, %f66, 0f3F000000;
sin.approx.f32 %f67, %f5;
sin.approx.f32 %f68, %f12;
mul.f32 %f69, %f67, %f68;
div.rn.f32 %f298, %f69, %f13;
$L__BB320_4:
cvt.rmi.f32.f32 %f112, %f56;
mul.f32 %f7, %f60, 0f40490FDB;
setp.eq.f32 %p3, %f6, 0f00000000;
mov.f32 %f299, %f304;
@%p3 bra $L__BB320_6;
mul.f32 %f16, %f6, 0f3F000000;
mul.f32 %f71, %f6, %f6;
mul.f32 %f17, %f71, 0f3F000000;
sin.approx.f32 %f72, %f6;
sin.approx.f32 %f73, %f16;
mul.f32 %f74, %f72, %f73;
div.rn.f32 %f299, %f74, %f17;
$L__BB320_6:
sub.f32 %f3, %f56, %f112;
setp.eq.f32 %p4, %f7, 0f00000000;
mov.f32 %f300, %f304;
@%p4 bra $L__BB320_8;
mul.f32 %f20, %f7, 0f3F000000;
mul.f32 %f76, %f7, %f7;
mul.f32 %f21, %f76, 0f3F000000;
sin.approx.f32 %f77, %f7;
sin.approx.f32 %f78, %f20;
mul.f32 %f79, %f77, %f78;
div.rn.f32 %f300, %f79, %f21;
$L__BB320_8:
add.f32 %f81, %f3, 0f3F800000;
mul.f32 %f24, %f81, 0f40490FDB;
mul.f32 %f25, %f3, 0f40490FDB;
add.f32 %f82, %f3, 0fBF800000;
setp.eq.f32 %p5, %f24, 0f00000000;
mov.f32 %f301, %f304;
@%p5 bra $L__BB320_10;
mul.f32 %f28, %f24, 0f3F000000;
mul.f32 %f84, %f24, %f24;
mul.f32 %f29, %f84, 0f3F000000;
sin.approx.f32 %f85, %f24;
sin.approx.f32 %f86, %f28;
mul.f32 %f87, %f85, %f86;
div.rn.f32 %f301, %f87, %f29;
$L__BB320_10:
add.f32 %f83, %f3, 0fC0000000;
mul.f32 %f26, %f82, 0f40490FDB;
setp.eq.f32 %p6, %f25, 0f00000000;
mov.f32 %f302, %f304;
@%p6 bra $L__BB320_12;
mul.f32 %f32, %f25, 0f3F000000;
mul.f32 %f89, %f25, %f25;
mul.f32 %f33, %f89, 0f3F000000;
sin.approx.f32 %f90, %f25;
sin.approx.f32 %f91, %f32;
mul.f32 %f92, %f90, %f91;
div.rn.f32 %f302, %f92, %f33;
$L__BB320_12:
mul.f32 %f27, %f83, 0f40490FDB;
setp.eq.f32 %p7, %f26, 0f00000000;
mov.f32 %f303, %f304;
@%p7 bra $L__BB320_14;
mul.f32 %f36, %f26, 0f3F000000;
mul.f32 %f94, %f26, %f26;
mul.f32 %f37, %f94, 0f3F000000;
sin.approx.f32 %f95, %f26;
sin.approx.f32 %f96, %f36;
mul.f32 %f97, %f95, %f96;
div.rn.f32 %f303, %f97, %f37;
$L__BB320_14:
ld.param.u64 %rd2, [_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif_param_0];
setp.eq.f32 %p8, %f27, 0f00000000;
@%p8 bra $L__BB320_16;
mul.f32 %f40, %f27, 0f3F000000;
mul.f32 %f99, %f27, %f27;
mul.f32 %f41, %f99, 0f3F000000;
sin.approx.f32 %f100, %f27;
sin.approx.f32 %f101, %f40;
mul.f32 %f102, %f100, %f101;
div.rn.f32 %f304, %f102, %f41;
$L__BB320_16:
add.f32 %f135, %f297, %f298;
add.f32 %f136, %f135, %f299;
add.f32 %f137, %f136, %f300;
div.rn.f32 %f138, %f300, %f137;
div.rn.f32 %f139, %f299, %f137;
div.rn.f32 %f140, %f298, %f137;
div.rn.f32 %f141, %f297, %f137;
add.f32 %f142, %f301, %f302;
add.f32 %f143, %f142, %f303;
add.f32 %f144, %f143, %f304;
div.rn.f32 %f145, %f301, %f144;
div.rn.f32 %f146, %f302, %f144;
div.rn.f32 %f147, %f303, %f144;
div.rn.f32 %f148, %f304, %f144;
add.f32 %f103, %f105, 0fBF800000;
add.f32 %f104, %f112, 0fBF800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r7, %r8, %r9, %r10}, [%rd2, {%f103, %f104}];
// end inline asm
mov.b32 %f149, %r10;
mov.b32 %f150, %r9;
mov.b32 %f151, %r8;
mov.b32 %f152, %r7;
// begin inline asm
tex.2d.v4.f32.f32 {%r11, %r12, %r13, %r14}, [%rd2, {%f105, %f104}];
// end inline asm
mov.b32 %f153, %r14;
mov.b32 %f154, %r13;
mov.b32 %f155, %r12;
mov.b32 %f156, %r11;
add.f32 %f107, %f105, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r15, %r16, %r17, %r18}, [%rd2, {%f107, %f104}];
// end inline asm
mov.b32 %f157, %r18;
mov.b32 %f158, %r17;
mov.b32 %f159, %r16;
mov.b32 %f160, %r15;
add.f32 %f109, %f105, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r19, %r20, %r21, %r22}, [%rd2, {%f109, %f104}];
// end inline asm
mov.b32 %f161, %r22;
mov.b32 %f162, %r21;
mov.b32 %f163, %r20;
mov.b32 %f164, %r19;
mul.f32 %f165, %f140, %f156;
mul.f32 %f166, %f140, %f155;
mul.f32 %f167, %f140, %f154;
mul.f32 %f168, %f140, %f153;
fma.rn.f32 %f169, %f141, %f152, %f165;
fma.rn.f32 %f170, %f141, %f151, %f166;
fma.rn.f32 %f171, %f141, %f150, %f167;
fma.rn.f32 %f172, %f141, %f149, %f168;
fma.rn.f32 %f173, %f139, %f160, %f169;
fma.rn.f32 %f174, %f139, %f159, %f170;
fma.rn.f32 %f175, %f139, %f158, %f171;
fma.rn.f32 %f176, %f139, %f157, %f172;
fma.rn.f32 %f177, %f138, %f164, %f173;
fma.rn.f32 %f178, %f138, %f163, %f174;
fma.rn.f32 %f179, %f138, %f162, %f175;
fma.rn.f32 %f180, %f138, %f161, %f176;
// begin inline asm
tex.2d.v4.f32.f32 {%r23, %r24, %r25, %r26}, [%rd2, {%f103, %f112}];
// end inline asm
mov.b32 %f181, %r26;
mov.b32 %f182, %r25;
mov.b32 %f183, %r24;
mov.b32 %f184, %r23;
// begin inline asm
tex.2d.v4.f32.f32 {%r27, %r28, %r29, %r30}, [%rd2, {%f105, %f112}];
// end inline asm
mov.b32 %f185, %r30;
mov.b32 %f186, %r29;
mov.b32 %f187, %r28;
mov.b32 %f188, %r27;
// begin inline asm
tex.2d.v4.f32.f32 {%r31, %r32, %r33, %r34}, [%rd2, {%f107, %f112}];
// end inline asm
mov.b32 %f189, %r34;
mov.b32 %f190, %r33;
mov.b32 %f191, %r32;
mov.b32 %f192, %r31;
// begin inline asm
tex.2d.v4.f32.f32 {%r35, %r36, %r37, %r38}, [%rd2, {%f109, %f112}];
// end inline asm
mov.b32 %f193, %r38;
mov.b32 %f194, %r37;
mov.b32 %f195, %r36;
mov.b32 %f196, %r35;
mul.f32 %f197, %f140, %f188;
mul.f32 %f198, %f140, %f187;
mul.f32 %f199, %f140, %f186;
mul.f32 %f200, %f140, %f185;
fma.rn.f32 %f201, %f141, %f184, %f197;
fma.rn.f32 %f202, %f141, %f183, %f198;
fma.rn.f32 %f203, %f141, %f182, %f199;
fma.rn.f32 %f204, %f141, %f181, %f200;
fma.rn.f32 %f205, %f139, %f192, %f201;
fma.rn.f32 %f206, %f139, %f191, %f202;
fma.rn.f32 %f207, %f139, %f190, %f203;
fma.rn.f32 %f208, %f139, %f189, %f204;
fma.rn.f32 %f209, %f138, %f196, %f205;
fma.rn.f32 %f210, %f138, %f195, %f206;
fma.rn.f32 %f211, %f138, %f194, %f207;
fma.rn.f32 %f212, %f138, %f193, %f208;
add.f32 %f120, %f112, 0f3F800000;
// begin inline asm
tex.2d.v4.f32.f32 {%r39, %r40, %r41, %r42}, [%rd2, {%f103, %f120}];
// end inline asm
mov.b32 %f213, %r42;
mov.b32 %f214, %r41;
mov.b32 %f215, %r40;
mov.b32 %f216, %r39;
// begin inline asm
tex.2d.v4.f32.f32 {%r43, %r44, %r45, %r46}, [%rd2, {%f105, %f120}];
// end inline asm
mov.b32 %f217, %r46;
mov.b32 %f218, %r45;
mov.b32 %f219, %r44;
mov.b32 %f220, %r43;
// begin inline asm
tex.2d.v4.f32.f32 {%r47, %r48, %r49, %r50}, [%rd2, {%f107, %f120}];
// end inline asm
mov.b32 %f221, %r50;
mov.b32 %f222, %r49;
mov.b32 %f223, %r48;
mov.b32 %f224, %r47;
// begin inline asm
tex.2d.v4.f32.f32 {%r51, %r52, %r53, %r54}, [%rd2, {%f109, %f120}];
// end inline asm
mov.b32 %f225, %r54;
mov.b32 %f226, %r53;
mov.b32 %f227, %r52;
mov.b32 %f228, %r51;
mul.f32 %f229, %f140, %f220;
mul.f32 %f230, %f140, %f219;
mul.f32 %f231, %f140, %f218;
mul.f32 %f232, %f140, %f217;
fma.rn.f32 %f233, %f141, %f216, %f229;
fma.rn.f32 %f234, %f141, %f215, %f230;
fma.rn.f32 %f235, %f141, %f214, %f231;
fma.rn.f32 %f236, %f141, %f213, %f232;
fma.rn.f32 %f237, %f139, %f224, %f233;
fma.rn.f32 %f238, %f139, %f223, %f234;
fma.rn.f32 %f239, %f139, %f222, %f235;
fma.rn.f32 %f240, %f139, %f221, %f236;
fma.rn.f32 %f241, %f138, %f228, %f237;
fma.rn.f32 %f242, %f138, %f227, %f238;
fma.rn.f32 %f243, %f138, %f226, %f239;
fma.rn.f32 %f244, %f138, %f225, %f240;
add.f32 %f128, %f112, 0f40000000;
// begin inline asm
tex.2d.v4.f32.f32 {%r55, %r56, %r57, %r58}, [%rd2, {%f103, %f128}];
// end inline asm
mov.b32 %f245, %r58;
mov.b32 %f246, %r57;
mov.b32 %f247, %r56;
mov.b32 %f248, %r55;
// begin inline asm
tex.2d.v4.f32.f32 {%r59, %r60, %r61, %r62}, [%rd2, {%f105, %f128}];
// end inline asm
mov.b32 %f249, %r62;
mov.b32 %f250, %r61;
mov.b32 %f251, %r60;
mov.b32 %f252, %r59;
// begin inline asm
tex.2d.v4.f32.f32 {%r63, %r64, %r65, %r66}, [%rd2, {%f107, %f128}];
// end inline asm
mov.b32 %f253, %r66;
mov.b32 %f254, %r65;
mov.b32 %f255, %r64;
mov.b32 %f256, %r63;
// begin inline asm
tex.2d.v4.f32.f32 {%r67, %r68, %r69, %r70}, [%rd2, {%f109, %f128}];
// end inline asm
mov.b32 %f257, %r70;
mov.b32 %f258, %r69;
mov.b32 %f259, %r68;
mov.b32 %f260, %r67;
mul.f32 %f261, %f140, %f252;
mul.f32 %f262, %f140, %f251;
mul.f32 %f263, %f140, %f250;
mul.f32 %f264, %f140, %f249;
fma.rn.f32 %f265, %f141, %f248, %f261;
fma.rn.f32 %f266, %f141, %f247, %f262;
fma.rn.f32 %f267, %f141, %f246, %f263;
fma.rn.f32 %f268, %f141, %f245, %f264;
fma.rn.f32 %f269, %f139, %f256, %f265;
fma.rn.f32 %f270, %f139, %f255, %f266;
fma.rn.f32 %f271, %f139, %f254, %f267;
fma.rn.f32 %f272, %f139, %f253, %f268;
fma.rn.f32 %f273, %f138, %f260, %f269;
fma.rn.f32 %f274, %f138, %f259, %f270;
fma.rn.f32 %f275, %f138, %f258, %f271;
fma.rn.f32 %f276, %f138, %f257, %f272;
mul.f32 %f277, %f146, %f209;
mul.f32 %f278, %f146, %f210;
mul.f32 %f279, %f146, %f211;
mul.f32 %f280, %f146, %f212;
fma.rn.f32 %f281, %f145, %f177, %f277;
fma.rn.f32 %f282, %f145, %f178, %f278;
fma.rn.f32 %f283, %f145, %f179, %f279;
fma.rn.f32 %f284, %f145, %f180, %f280;
fma.rn.f32 %f285, %f147, %f241, %f281;
fma.rn.f32 %f286, %f147, %f242, %f282;
fma.rn.f32 %f287, %f147, %f243, %f283;
fma.rn.f32 %f288, %f147, %f244, %f284;
fma.rn.f32 %f289, %f148, %f273, %f285;
fma.rn.f32 %f290, %f148, %f274, %f286;
fma.rn.f32 %f291, %f148, %f275, %f287;
fma.rn.f32 %f292, %f148, %f276, %f288;
mul.f32 %f293, %f289, 0f437F0000;
mul.f32 %f294, %f290, 0f437F0000;
mul.f32 %f295, %f291, 0f437F0000;
mul.f32 %f296, %f292, 0f437F0000;
cvt.rzi.u16.f32 %rs1, %f293;
cvt.rzi.u16.f32 %rs2, %f294;
cvt.rzi.u16.f32 %rs3, %f295;
cvt.rzi.u16.f32 %rs4, %f296;
st.param.v4.b8 [func_retval0+0], {%rs1, %rs2, %rs3, %rs4};
ret;
}
ELF3 � | �X � = @ 8 @ � .shstrtab .strtab .symtab .symtab_shndx .nv.info .text.Subsample_Lanczos_rgb0_bgr0_uv .nv.info.Subsample_Lanczos_rgb0_bgr0_uv .nv.shared.Subsample_Lanczos_rgb0_bgr0_uv .nv.constant0.Subsample_Lanczos_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Lanczos_rgb0_bgr0_uv .text.Subsample_Lanczos_rgb0_bgr0 .nv.info.Subsample_Lanczos_rgb0_bgr0 .nv.shared.Subsample_Lanczos_rgb0_bgr0 .nv.constant2.Subsample_Lanczos_rgb0_bgr0 .nv.constant0.Subsample_Lanczos_rgb0_bgr0 .rel.nv.constant0.Subsample_Lanczos_rgb0_bgr0 .text.Subsample_Lanczos_bgr0_rgb0_uv .nv.info.Subsample_Lanczos_bgr0_rgb0_uv .nv.shared.Subsample_Lanczos_bgr0_rgb0_uv .nv.constant0.Subsample_Lanczos_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Lanczos_bgr0_rgb0_uv .text.Subsample_Lanczos_bgr0_rgb0 .nv.info.Subsample_Lanczos_bgr0_rgb0 .nv.shared.Subsample_Lanczos_bgr0_rgb0 .nv.constant2.Subsample_Lanczos_bgr0_rgb0 .nv.constant0.Subsample_Lanczos_bgr0_rgb0 .rel.nv.constant0.Subsample_Lanczos_bgr0_rgb0 .text.Subsample_Lanczos_rgb0_rgb0_uv .nv.info.Subsample_Lanczos_rgb0_rgb0_uv .nv.shared.Subsample_Lanczos_rgb0_rgb0_uv .nv.constant0.Subsample_Lanczos_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Lanczos_rgb0_rgb0_uv .text.Subsample_Lanczos_rgb0_rgb0 .nv.info.Subsample_Lanczos_rgb0_rgb0 .nv.shared.Subsample_Lanczos_rgb0_rgb0 .nv.constant2.Subsample_Lanczos_rgb0_rgb0 .nv.constant0.Subsample_Lanczos_rgb0_rgb0 .rel.nv.constant0.Subsample_Lanczos_rgb0_rgb0 .text.Subsample_Lanczos_bgr0_bgr0_uv .nv.info.Subsample_Lanczos_bgr0_bgr0_uv .nv.shared.Subsample_Lanczos_bgr0_bgr0_uv .nv.constant0.Subsample_Lanczos_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Lanczos_bgr0_bgr0_uv .text.Subsample_Lanczos_bgr0_bgr0 .nv.info.Subsample_Lanczos_bgr0_bgr0 .nv.shared.Subsample_Lanczos_bgr0_bgr0 .nv.constant2.Subsample_Lanczos_bgr0_bgr0 .nv.constant0.Subsample_Lanczos_bgr0_bgr0 .rel.nv.constant0.Subsample_Lanczos_bgr0_bgr0 .text.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .text.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le .text.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.info.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_p016le_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p16le_uv .text.Subsample_Lanczos_p016le_yuv444p16le .nv.info.Subsample_Lanczos_p016le_yuv444p16le .nv.shared.Subsample_Lanczos_p016le_yuv444p16le .nv.constant2.Subsample_Lanczos_p016le_yuv444p16le .nv.constant0.Subsample_Lanczos_p016le_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p16le .text.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.info.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_p010le_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p16le_uv .text.Subsample_Lanczos_p010le_yuv444p16le .nv.info.Subsample_Lanczos_p010le_yuv444p16le .nv.shared.Subsample_Lanczos_p010le_yuv444p16le .nv.constant2.Subsample_Lanczos_p010le_yuv444p16le .nv.constant0.Subsample_Lanczos_p010le_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p16le .text.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le_uv .text.Subsample_Lanczos_yuv444p_yuv444p16le .nv.info.Subsample_Lanczos_yuv444p_yuv444p16le .nv.shared.Subsample_Lanczos_yuv444p_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le .text.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.info.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.shared.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_nv12_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p16le_uv .text.Subsample_Lanczos_nv12_yuv444p16le .nv.info.Subsample_Lanczos_nv12_yuv444p16le .nv.shared.Subsample_Lanczos_nv12_yuv444p16le .nv.constant2.Subsample_Lanczos_nv12_yuv444p16le .nv.constant0.Subsample_Lanczos_nv12_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p16le .text.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le_uv .text.Subsample_Lanczos_yuv420p_yuv444p16le .nv.info.Subsample_Lanczos_yuv420p_yuv444p16le .nv.shared.Subsample_Lanczos_yuv420p_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le .text.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.info.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_p016le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p016le_uv .text.Subsample_Lanczos_yuv444p16le_p016le .nv.info.Subsample_Lanczos_yuv444p16le_p016le .nv.shared.Subsample_Lanczos_yuv444p16le_p016le .nv.constant2.Subsample_Lanczos_yuv444p16le_p016le .nv.constant0.Subsample_Lanczos_yuv444p16le_p016le .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p016le .text.Subsample_Lanczos_p016le_p016le_uv .nv.info.Subsample_Lanczos_p016le_p016le_uv .nv.shared.Subsample_Lanczos_p016le_p016le_uv .nv.constant2.Subsample_Lanczos_p016le_p016le_uv .nv.constant0.Subsample_Lanczos_p016le_p016le_uv .rel.nv.constant0.Subsample_Lanczos_p016le_p016le_uv .text.Subsample_Lanczos_p016le_p016le .nv.info.Subsample_Lanczos_p016le_p016le .nv.shared.Subsample_Lanczos_p016le_p016le .nv.constant2.Subsample_Lanczos_p016le_p016le .nv.constant0.Subsample_Lanczos_p016le_p016le .rel.nv.constant0.Subsample_Lanczos_p016le_p016le .text.Subsample_Lanczos_p010le_p016le_uv .nv.info.Subsample_Lanczos_p010le_p016le_uv .nv.shared.Subsample_Lanczos_p010le_p016le_uv .nv.constant2.Subsample_Lanczos_p010le_p016le_uv .nv.constant0.Subsample_Lanczos_p010le_p016le_uv .rel.nv.constant0.Subsample_Lanczos_p010le_p016le_uv .text.Subsample_Lanczos_p010le_p016le .nv.info.Subsample_Lanczos_p010le_p016le .nv.shared.Subsample_Lanczos_p010le_p016le .nv.constant2.Subsample_Lanczos_p010le_p016le .nv.constant0.Subsample_Lanczos_p010le_p016le .rel.nv.constant0.Subsample_Lanczos_p010le_p016le .text.Subsample_Lanczos_yuv444p_p016le_uv .nv.info.Subsample_Lanczos_yuv444p_p016le_uv .nv.shared.Subsample_Lanczos_yuv444p_p016le_uv .nv.constant2.Subsample_Lanczos_yuv444p_p016le_uv .nv.constant0.Subsample_Lanczos_yuv444p_p016le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_p016le_uv .text.Subsample_Lanczos_yuv444p_p016le .nv.info.Subsample_Lanczos_yuv444p_p016le .nv.shared.Subsample_Lanczos_yuv444p_p016le .nv.constant2.Subsample_Lanczos_yuv444p_p016le .nv.constant0.Subsample_Lanczos_yuv444p_p016le .rel.nv.constant0.Subsample_Lanczos_yuv444p_p016le .text.Subsample_Lanczos_nv12_p016le_uv .nv.info.Subsample_Lanczos_nv12_p016le_uv .nv.shared.Subsample_Lanczos_nv12_p016le_uv .nv.constant2.Subsample_Lanczos_nv12_p016le_uv .nv.constant0.Subsample_Lanczos_nv12_p016le_uv .rel.nv.constant0.Subsample_Lanczos_nv12_p016le_uv .text.Subsample_Lanczos_nv12_p016le .nv.info.Subsample_Lanczos_nv12_p016le .nv.shared.Subsample_Lanczos_nv12_p016le .nv.constant2.Subsample_Lanczos_nv12_p016le .nv.constant0.Subsample_Lanczos_nv12_p016le .rel.nv.constant0.Subsample_Lanczos_nv12_p016le .text.Subsample_Lanczos_yuv420p_p016le_uv .nv.info.Subsample_Lanczos_yuv420p_p016le_uv .nv.shared.Subsample_Lanczos_yuv420p_p016le_uv .nv.constant2.Subsample_Lanczos_yuv420p_p016le_uv .nv.constant0.Subsample_Lanczos_yuv420p_p016le_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_p016le_uv .text.Subsample_Lanczos_yuv420p_p016le .nv.info.Subsample_Lanczos_yuv420p_p016le .nv.shared.Subsample_Lanczos_yuv420p_p016le .nv.constant2.Subsample_Lanczos_yuv420p_p016le .nv.constant0.Subsample_Lanczos_yuv420p_p016le .rel.nv.constant0.Subsample_Lanczos_yuv420p_p016le .text.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.info.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_p010le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p010le_uv .text.Subsample_Lanczos_yuv444p16le_p010le .nv.info.Subsample_Lanczos_yuv444p16le_p010le .nv.shared.Subsample_Lanczos_yuv444p16le_p010le .nv.constant2.Subsample_Lanczos_yuv444p16le_p010le .nv.constant0.Subsample_Lanczos_yuv444p16le_p010le .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p010le .text.Subsample_Lanczos_p016le_p010le_uv .nv.info.Subsample_Lanczos_p016le_p010le_uv .nv.shared.Subsample_Lanczos_p016le_p010le_uv .nv.constant2.Subsample_Lanczos_p016le_p010le_uv .nv.constant0.Subsample_Lanczos_p016le_p010le_uv .rel.nv.constant0.Subsample_Lanczos_p016le_p010le_uv .text.Subsample_Lanczos_p016le_p010le .nv.info.Subsample_Lanczos_p016le_p010le .nv.shared.Subsample_Lanczos_p016le_p010le .nv.constant2.Subsample_Lanczos_p016le_p010le .nv.constant0.Subsample_Lanczos_p016le_p010le .rel.nv.constant0.Subsample_Lanczos_p016le_p010le .text.Subsample_Lanczos_p010le_p010le_uv .nv.info.Subsample_Lanczos_p010le_p010le_uv .nv.shared.Subsample_Lanczos_p010le_p010le_uv .nv.constant2.Subsample_Lanczos_p010le_p010le_uv .nv.constant0.Subsample_Lanczos_p010le_p010le_uv .rel.nv.constant0.Subsample_Lanczos_p010le_p010le_uv .text.Subsample_Lanczos_p010le_p010le .nv.info.Subsample_Lanczos_p010le_p010le .nv.shared.Subsample_Lanczos_p010le_p010le .nv.constant2.Subsample_Lanczos_p010le_p010le .nv.constant0.Subsample_Lanczos_p010le_p010le .rel.nv.constant0.Subsample_Lanczos_p010le_p010le .text.Subsample_Lanczos_yuv444p_p010le_uv .nv.info.Subsample_Lanczos_yuv444p_p010le_uv .nv.shared.Subsample_Lanczos_yuv444p_p010le_uv .nv.constant2.Subsample_Lanczos_yuv444p_p010le_uv .nv.constant0.Subsample_Lanczos_yuv444p_p010le_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_p010le_uv .text.Subsample_Lanczos_yuv444p_p010le .nv.info.Subsample_Lanczos_yuv444p_p010le .nv.shared.Subsample_Lanczos_yuv444p_p010le .nv.constant2.Subsample_Lanczos_yuv444p_p010le .nv.constant0.Subsample_Lanczos_yuv444p_p010le .rel.nv.constant0.Subsample_Lanczos_yuv444p_p010le .text.Subsample_Lanczos_nv12_p010le_uv .nv.info.Subsample_Lanczos_nv12_p010le_uv .nv.shared.Subsample_Lanczos_nv12_p010le_uv .nv.constant2.Subsample_Lanczos_nv12_p010le_uv .nv.constant0.Subsample_Lanczos_nv12_p010le_uv .rel.nv.constant0.Subsample_Lanczos_nv12_p010le_uv .text.Subsample_Lanczos_nv12_p010le .nv.info.Subsample_Lanczos_nv12_p010le .nv.shared.Subsample_Lanczos_nv12_p010le .nv.constant2.Subsample_Lanczos_nv12_p010le .nv.constant0.Subsample_Lanczos_nv12_p010le .rel.nv.constant0.Subsample_Lanczos_nv12_p010le .text.Subsample_Lanczos_yuv420p_p010le_uv .nv.info.Subsample_Lanczos_yuv420p_p010le_uv .nv.shared.Subsample_Lanczos_yuv420p_p010le_uv .nv.constant2.Subsample_Lanczos_yuv420p_p010le_uv .nv.constant0.Subsample_Lanczos_yuv420p_p010le_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_p010le_uv .text.Subsample_Lanczos_yuv420p_p010le .nv.info.Subsample_Lanczos_yuv420p_p010le .nv.shared.Subsample_Lanczos_yuv420p_p010le .nv.constant2.Subsample_Lanczos_yuv420p_p010le .nv.constant0.Subsample_Lanczos_yuv420p_p010le .rel.nv.constant0.Subsample_Lanczos_yuv420p_p010le .text.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p_uv .text.Subsample_Lanczos_yuv444p16le_yuv444p .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p .text.Subsample_Lanczos_p016le_yuv444p_uv .nv.info.Subsample_Lanczos_p016le_yuv444p_uv .nv.shared.Subsample_Lanczos_p016le_yuv444p_uv .nv.constant2.Subsample_Lanczos_p016le_yuv444p_uv .nv.constant0.Subsample_Lanczos_p016le_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p_uv .text.Subsample_Lanczos_p016le_yuv444p .nv.info.Subsample_Lanczos_p016le_yuv444p .nv.shared.Subsample_Lanczos_p016le_yuv444p .nv.constant2.Subsample_Lanczos_p016le_yuv444p .nv.constant0.Subsample_Lanczos_p016le_yuv444p .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p .text.Subsample_Lanczos_p010le_yuv444p_uv .nv.info.Subsample_Lanczos_p010le_yuv444p_uv .nv.shared.Subsample_Lanczos_p010le_yuv444p_uv .nv.constant2.Subsample_Lanczos_p010le_yuv444p_uv .nv.constant0.Subsample_Lanczos_p010le_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p_uv .text.Subsample_Lanczos_p010le_yuv444p .nv.info.Subsample_Lanczos_p010le_yuv444p .nv.shared.Subsample_Lanczos_p010le_yuv444p .nv.constant2.Subsample_Lanczos_p010le_yuv444p .nv.constant0.Subsample_Lanczos_p010le_yuv444p .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p .text.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.info.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p_uv .text.Subsample_Lanczos_yuv444p_yuv444p .nv.info.Subsample_Lanczos_yuv444p_yuv444p .nv.shared.Subsample_Lanczos_yuv444p_yuv444p .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p .text.Subsample_Lanczos_nv12_yuv444p_uv .nv.info.Subsample_Lanczos_nv12_yuv444p_uv .nv.shared.Subsample_Lanczos_nv12_yuv444p_uv .nv.constant2.Subsample_Lanczos_nv12_yuv444p_uv .nv.constant0.Subsample_Lanczos_nv12_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p_uv .text.Subsample_Lanczos_nv12_yuv444p .nv.info.Subsample_Lanczos_nv12_yuv444p .nv.shared.Subsample_Lanczos_nv12_yuv444p .nv.constant2.Subsample_Lanczos_nv12_yuv444p .nv.constant0.Subsample_Lanczos_nv12_yuv444p .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p .text.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.info.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p_uv .text.Subsample_Lanczos_yuv420p_yuv444p .nv.info.Subsample_Lanczos_yuv420p_yuv444p .nv.shared.Subsample_Lanczos_yuv420p_yuv444p .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p .text.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.info.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.shared.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_nv12_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_nv12_uv .text.Subsample_Lanczos_yuv444p16le_nv12 .nv.info.Subsample_Lanczos_yuv444p16le_nv12 .nv.shared.Subsample_Lanczos_yuv444p16le_nv12 .nv.constant2.Subsample_Lanczos_yuv444p16le_nv12 .nv.constant0.Subsample_Lanczos_yuv444p16le_nv12 .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_nv12 .text.Subsample_Lanczos_p016le_nv12_uv .nv.info.Subsample_Lanczos_p016le_nv12_uv .nv.shared.Subsample_Lanczos_p016le_nv12_uv .nv.constant2.Subsample_Lanczos_p016le_nv12_uv .nv.constant0.Subsample_Lanczos_p016le_nv12_uv .rel.nv.constant0.Subsample_Lanczos_p016le_nv12_uv .text.Subsample_Lanczos_p016le_nv12 .nv.info.Subsample_Lanczos_p016le_nv12 .nv.shared.Subsample_Lanczos_p016le_nv12 .nv.constant2.Subsample_Lanczos_p016le_nv12 .nv.constant0.Subsample_Lanczos_p016le_nv12 .rel.nv.constant0.Subsample_Lanczos_p016le_nv12 .text.Subsample_Lanczos_p010le_nv12_uv .nv.info.Subsample_Lanczos_p010le_nv12_uv .nv.shared.Subsample_Lanczos_p010le_nv12_uv .nv.constant2.Subsample_Lanczos_p010le_nv12_uv .nv.constant0.Subsample_Lanczos_p010le_nv12_uv .rel.nv.constant0.Subsample_Lanczos_p010le_nv12_uv .text.Subsample_Lanczos_p010le_nv12 .nv.info.Subsample_Lanczos_p010le_nv12 .nv.shared.Subsample_Lanczos_p010le_nv12 .nv.constant2.Subsample_Lanczos_p010le_nv12 .nv.constant0.Subsample_Lanczos_p010le_nv12 .rel.nv.constant0.Subsample_Lanczos_p010le_nv12 .text.Subsample_Lanczos_yuv444p_nv12_uv .nv.info.Subsample_Lanczos_yuv444p_nv12_uv .nv.shared.Subsample_Lanczos_yuv444p_nv12_uv .nv.constant2.Subsample_Lanczos_yuv444p_nv12_uv .nv.constant0.Subsample_Lanczos_yuv444p_nv12_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_nv12_uv .text.Subsample_Lanczos_yuv444p_nv12 .nv.info.Subsample_Lanczos_yuv444p_nv12 .nv.shared.Subsample_Lanczos_yuv444p_nv12 .nv.constant2.Subsample_Lanczos_yuv444p_nv12 .nv.constant0.Subsample_Lanczos_yuv444p_nv12 .rel.nv.constant0.Subsample_Lanczos_yuv444p_nv12 .text.Subsample_Lanczos_nv12_nv12_uv .nv.info.Subsample_Lanczos_nv12_nv12_uv .nv.shared.Subsample_Lanczos_nv12_nv12_uv .nv.constant2.Subsample_Lanczos_nv12_nv12_uv .nv.constant0.Subsample_Lanczos_nv12_nv12_uv .rel.nv.constant0.Subsample_Lanczos_nv12_nv12_uv .text.Subsample_Lanczos_nv12_nv12 .nv.info.Subsample_Lanczos_nv12_nv12 .nv.shared.Subsample_Lanczos_nv12_nv12 .nv.constant2.Subsample_Lanczos_nv12_nv12 .nv.constant0.Subsample_Lanczos_nv12_nv12 .rel.nv.constant0.Subsample_Lanczos_nv12_nv12 .text.Subsample_Lanczos_yuv420p_nv12_uv .nv.info.Subsample_Lanczos_yuv420p_nv12_uv .nv.shared.Subsample_Lanczos_yuv420p_nv12_uv .nv.constant2.Subsample_Lanczos_yuv420p_nv12_uv .nv.constant0.Subsample_Lanczos_yuv420p_nv12_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_nv12_uv .text.Subsample_Lanczos_yuv420p_nv12 .nv.info.Subsample_Lanczos_yuv420p_nv12 .nv.shared.Subsample_Lanczos_yuv420p_nv12 .nv.constant2.Subsample_Lanczos_yuv420p_nv12 .nv.constant0.Subsample_Lanczos_yuv420p_nv12 .rel.nv.constant0.Subsample_Lanczos_yuv420p_nv12 .text.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p_uv .text.Subsample_Lanczos_yuv444p16le_yuv420p .nv.info.Subsample_Lanczos_yuv444p16le_yuv420p .nv.shared.Subsample_Lanczos_yuv444p16le_yuv420p .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv420p .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p .text.Subsample_Lanczos_p016le_yuv420p_uv .nv.info.Subsample_Lanczos_p016le_yuv420p_uv .nv.shared.Subsample_Lanczos_p016le_yuv420p_uv .nv.constant2.Subsample_Lanczos_p016le_yuv420p_uv .nv.constant0.Subsample_Lanczos_p016le_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_p016le_yuv420p_uv .text.Subsample_Lanczos_p016le_yuv420p .nv.info.Subsample_Lanczos_p016le_yuv420p .nv.shared.Subsample_Lanczos_p016le_yuv420p .nv.constant2.Subsample_Lanczos_p016le_yuv420p .nv.constant0.Subsample_Lanczos_p016le_yuv420p .rel.nv.constant0.Subsample_Lanczos_p016le_yuv420p .text.Subsample_Lanczos_p010le_yuv420p_uv .nv.info.Subsample_Lanczos_p010le_yuv420p_uv .nv.shared.Subsample_Lanczos_p010le_yuv420p_uv .nv.constant2.Subsample_Lanczos_p010le_yuv420p_uv .nv.constant0.Subsample_Lanczos_p010le_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_p010le_yuv420p_uv .text.Subsample_Lanczos_p010le_yuv420p .nv.info.Subsample_Lanczos_p010le_yuv420p .nv.shared.Subsample_Lanczos_p010le_yuv420p .nv.constant2.Subsample_Lanczos_p010le_yuv420p .nv.constant0.Subsample_Lanczos_p010le_yuv420p .rel.nv.constant0.Subsample_Lanczos_p010le_yuv420p .text.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.info.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv420p_uv .text.Subsample_Lanczos_yuv444p_yuv420p .nv.info.Subsample_Lanczos_yuv444p_yuv420p .nv.shared.Subsample_Lanczos_yuv444p_yuv420p .nv.constant2.Subsample_Lanczos_yuv444p_yuv420p .nv.constant0.Subsample_Lanczos_yuv444p_yuv420p .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv420p .text.Subsample_Lanczos_nv12_yuv420p_uv .nv.info.Subsample_Lanczos_nv12_yuv420p_uv .nv.shared.Subsample_Lanczos_nv12_yuv420p_uv .nv.constant2.Subsample_Lanczos_nv12_yuv420p_uv .nv.constant0.Subsample_Lanczos_nv12_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_nv12_yuv420p_uv .text.Subsample_Lanczos_nv12_yuv420p .nv.info.Subsample_Lanczos_nv12_yuv420p .nv.shared.Subsample_Lanczos_nv12_yuv420p .nv.constant2.Subsample_Lanczos_nv12_yuv420p .nv.constant0.Subsample_Lanczos_nv12_yuv420p .rel.nv.constant0.Subsample_Lanczos_nv12_yuv420p .text.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.info.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv420p_uv .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv420p_uv .text.Subsample_Lanczos_yuv420p_yuv420p .nv.info.Subsample_Lanczos_yuv420p_yuv420p .nv.shared.Subsample_Lanczos_yuv420p_yuv420p .nv.constant2.Subsample_Lanczos_yuv420p_yuv420p .nv.constant0.Subsample_Lanczos_yuv420p_yuv420p .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv420p .text.Subsample_Bicubic_rgb0_bgr0_uv .nv.info.Subsample_Bicubic_rgb0_bgr0_uv .nv.shared.Subsample_Bicubic_rgb0_bgr0_uv .nv.constant0.Subsample_Bicubic_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Bicubic_rgb0_bgr0_uv .text.Subsample_Bicubic_rgb0_bgr0 .nv.info.Subsample_Bicubic_rgb0_bgr0 .nv.shared.Subsample_Bicubic_rgb0_bgr0 .nv.constant2.Subsample_Bicubic_rgb0_bgr0 .nv.constant0.Subsample_Bicubic_rgb0_bgr0 .rel.nv.constant0.Subsample_Bicubic_rgb0_bgr0 .text.Subsample_Bicubic_bgr0_rgb0_uv .nv.info.Subsample_Bicubic_bgr0_rgb0_uv .nv.shared.Subsample_Bicubic_bgr0_rgb0_uv .nv.constant0.Subsample_Bicubic_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Bicubic_bgr0_rgb0_uv .text.Subsample_Bicubic_bgr0_rgb0 .nv.info.Subsample_Bicubic_bgr0_rgb0 .nv.shared.Subsample_Bicubic_bgr0_rgb0 .nv.constant2.Subsample_Bicubic_bgr0_rgb0 .nv.constant0.Subsample_Bicubic_bgr0_rgb0 .rel.nv.constant0.Subsample_Bicubic_bgr0_rgb0 .text.Subsample_Bicubic_rgb0_rgb0_uv .nv.info.Subsample_Bicubic_rgb0_rgb0_uv .nv.shared.Subsample_Bicubic_rgb0_rgb0_uv .nv.constant0.Subsample_Bicubic_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Bicubic_rgb0_rgb0_uv .text.Subsample_Bicubic_rgb0_rgb0 .nv.info.Subsample_Bicubic_rgb0_rgb0 .nv.shared.Subsample_Bicubic_rgb0_rgb0 .nv.constant2.Subsample_Bicubic_rgb0_rgb0 .nv.constant0.Subsample_Bicubic_rgb0_rgb0 .rel.nv.constant0.Subsample_Bicubic_rgb0_rgb0 .text.Subsample_Bicubic_bgr0_bgr0_uv .nv.info.Subsample_Bicubic_bgr0_bgr0_uv .nv.shared.Subsample_Bicubic_bgr0_bgr0_uv .nv.constant0.Subsample_Bicubic_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Bicubic_bgr0_bgr0_uv .text.Subsample_Bicubic_bgr0_bgr0 .nv.info.Subsample_Bicubic_bgr0_bgr0 .nv.shared.Subsample_Bicubic_bgr0_bgr0 .nv.constant2.Subsample_Bicubic_bgr0_bgr0 .nv.constant0.Subsample_Bicubic_bgr0_bgr0 .rel.nv.constant0.Subsample_Bicubic_bgr0_bgr0 .text.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .text.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le .text.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.info.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_p016le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p16le_uv .text.Subsample_Bicubic_p016le_yuv444p16le .nv.info.Subsample_Bicubic_p016le_yuv444p16le .nv.shared.Subsample_Bicubic_p016le_yuv444p16le .nv.constant2.Subsample_Bicubic_p016le_yuv444p16le .nv.constant0.Subsample_Bicubic_p016le_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p16le .text.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.info.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_p010le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p16le_uv .text.Subsample_Bicubic_p010le_yuv444p16le .nv.info.Subsample_Bicubic_p010le_yuv444p16le .nv.shared.Subsample_Bicubic_p010le_yuv444p16le .nv.constant2.Subsample_Bicubic_p010le_yuv444p16le .nv.constant0.Subsample_Bicubic_p010le_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p16le .text.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le_uv .text.Subsample_Bicubic_yuv444p_yuv444p16le .nv.info.Subsample_Bicubic_yuv444p_yuv444p16le .nv.shared.Subsample_Bicubic_yuv444p_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le .text.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.info.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.shared.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_nv12_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p16le_uv .text.Subsample_Bicubic_nv12_yuv444p16le .nv.info.Subsample_Bicubic_nv12_yuv444p16le .nv.shared.Subsample_Bicubic_nv12_yuv444p16le .nv.constant2.Subsample_Bicubic_nv12_yuv444p16le .nv.constant0.Subsample_Bicubic_nv12_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p16le .text.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le_uv .text.Subsample_Bicubic_yuv420p_yuv444p16le .nv.info.Subsample_Bicubic_yuv420p_yuv444p16le .nv.shared.Subsample_Bicubic_yuv420p_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le .text.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.info.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_p016le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p016le_uv .text.Subsample_Bicubic_yuv444p16le_p016le .nv.info.Subsample_Bicubic_yuv444p16le_p016le .nv.shared.Subsample_Bicubic_yuv444p16le_p016le .nv.constant2.Subsample_Bicubic_yuv444p16le_p016le .nv.constant0.Subsample_Bicubic_yuv444p16le_p016le .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p016le .text.Subsample_Bicubic_p016le_p016le_uv .nv.info.Subsample_Bicubic_p016le_p016le_uv .nv.shared.Subsample_Bicubic_p016le_p016le_uv .nv.constant2.Subsample_Bicubic_p016le_p016le_uv .nv.constant0.Subsample_Bicubic_p016le_p016le_uv .rel.nv.constant0.Subsample_Bicubic_p016le_p016le_uv .text.Subsample_Bicubic_p016le_p016le .nv.info.Subsample_Bicubic_p016le_p016le .nv.shared.Subsample_Bicubic_p016le_p016le .nv.constant2.Subsample_Bicubic_p016le_p016le .nv.constant0.Subsample_Bicubic_p016le_p016le .rel.nv.constant0.Subsample_Bicubic_p016le_p016le .text.Subsample_Bicubic_p010le_p016le_uv .nv.info.Subsample_Bicubic_p010le_p016le_uv .nv.shared.Subsample_Bicubic_p010le_p016le_uv .nv.constant2.Subsample_Bicubic_p010le_p016le_uv .nv.constant0.Subsample_Bicubic_p010le_p016le_uv .rel.nv.constant0.Subsample_Bicubic_p010le_p016le_uv .text.Subsample_Bicubic_p010le_p016le .nv.info.Subsample_Bicubic_p010le_p016le .nv.shared.Subsample_Bicubic_p010le_p016le .nv.constant2.Subsample_Bicubic_p010le_p016le .nv.constant0.Subsample_Bicubic_p010le_p016le .rel.nv.constant0.Subsample_Bicubic_p010le_p016le .text.Subsample_Bicubic_yuv444p_p016le_uv .nv.info.Subsample_Bicubic_yuv444p_p016le_uv .nv.shared.Subsample_Bicubic_yuv444p_p016le_uv .nv.constant2.Subsample_Bicubic_yuv444p_p016le_uv .nv.constant0.Subsample_Bicubic_yuv444p_p016le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_p016le_uv .text.Subsample_Bicubic_yuv444p_p016le .nv.info.Subsample_Bicubic_yuv444p_p016le .nv.shared.Subsample_Bicubic_yuv444p_p016le .nv.constant2.Subsample_Bicubic_yuv444p_p016le .nv.constant0.Subsample_Bicubic_yuv444p_p016le .rel.nv.constant0.Subsample_Bicubic_yuv444p_p016le .text.Subsample_Bicubic_nv12_p016le_uv .nv.info.Subsample_Bicubic_nv12_p016le_uv .nv.shared.Subsample_Bicubic_nv12_p016le_uv .nv.constant2.Subsample_Bicubic_nv12_p016le_uv .nv.constant0.Subsample_Bicubic_nv12_p016le_uv .rel.nv.constant0.Subsample_Bicubic_nv12_p016le_uv .text.Subsample_Bicubic_nv12_p016le .nv.info.Subsample_Bicubic_nv12_p016le .nv.shared.Subsample_Bicubic_nv12_p016le .nv.constant2.Subsample_Bicubic_nv12_p016le .nv.constant0.Subsample_Bicubic_nv12_p016le .rel.nv.constant0.Subsample_Bicubic_nv12_p016le .text.Subsample_Bicubic_yuv420p_p016le_uv .nv.info.Subsample_Bicubic_yuv420p_p016le_uv .nv.shared.Subsample_Bicubic_yuv420p_p016le_uv .nv.constant2.Subsample_Bicubic_yuv420p_p016le_uv .nv.constant0.Subsample_Bicubic_yuv420p_p016le_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_p016le_uv .text.Subsample_Bicubic_yuv420p_p016le .nv.info.Subsample_Bicubic_yuv420p_p016le .nv.shared.Subsample_Bicubic_yuv420p_p016le .nv.constant2.Subsample_Bicubic_yuv420p_p016le .nv.constant0.Subsample_Bicubic_yuv420p_p016le .rel.nv.constant0.Subsample_Bicubic_yuv420p_p016le .text.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.info.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_p010le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p010le_uv .text.Subsample_Bicubic_yuv444p16le_p010le .nv.info.Subsample_Bicubic_yuv444p16le_p010le .nv.shared.Subsample_Bicubic_yuv444p16le_p010le .nv.constant2.Subsample_Bicubic_yuv444p16le_p010le .nv.constant0.Subsample_Bicubic_yuv444p16le_p010le .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p010le .text.Subsample_Bicubic_p016le_p010le_uv .nv.info.Subsample_Bicubic_p016le_p010le_uv .nv.shared.Subsample_Bicubic_p016le_p010le_uv .nv.constant2.Subsample_Bicubic_p016le_p010le_uv .nv.constant0.Subsample_Bicubic_p016le_p010le_uv .rel.nv.constant0.Subsample_Bicubic_p016le_p010le_uv .text.Subsample_Bicubic_p016le_p010le .nv.info.Subsample_Bicubic_p016le_p010le .nv.shared.Subsample_Bicubic_p016le_p010le .nv.constant2.Subsample_Bicubic_p016le_p010le .nv.constant0.Subsample_Bicubic_p016le_p010le .rel.nv.constant0.Subsample_Bicubic_p016le_p010le .text.Subsample_Bicubic_p010le_p010le_uv .nv.info.Subsample_Bicubic_p010le_p010le_uv .nv.shared.Subsample_Bicubic_p010le_p010le_uv .nv.constant2.Subsample_Bicubic_p010le_p010le_uv .nv.constant0.Subsample_Bicubic_p010le_p010le_uv .rel.nv.constant0.Subsample_Bicubic_p010le_p010le_uv .text.Subsample_Bicubic_p010le_p010le .nv.info.Subsample_Bicubic_p010le_p010le .nv.shared.Subsample_Bicubic_p010le_p010le .nv.constant2.Subsample_Bicubic_p010le_p010le .nv.constant0.Subsample_Bicubic_p010le_p010le .rel.nv.constant0.Subsample_Bicubic_p010le_p010le .text.Subsample_Bicubic_yuv444p_p010le_uv .nv.info.Subsample_Bicubic_yuv444p_p010le_uv .nv.shared.Subsample_Bicubic_yuv444p_p010le_uv .nv.constant2.Subsample_Bicubic_yuv444p_p010le_uv .nv.constant0.Subsample_Bicubic_yuv444p_p010le_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_p010le_uv .text.Subsample_Bicubic_yuv444p_p010le .nv.info.Subsample_Bicubic_yuv444p_p010le .nv.shared.Subsample_Bicubic_yuv444p_p010le .nv.constant2.Subsample_Bicubic_yuv444p_p010le .nv.constant0.Subsample_Bicubic_yuv444p_p010le .rel.nv.constant0.Subsample_Bicubic_yuv444p_p010le .text.Subsample_Bicubic_nv12_p010le_uv .nv.info.Subsample_Bicubic_nv12_p010le_uv .nv.shared.Subsample_Bicubic_nv12_p010le_uv .nv.constant2.Subsample_Bicubic_nv12_p010le_uv .nv.constant0.Subsample_Bicubic_nv12_p010le_uv .rel.nv.constant0.Subsample_Bicubic_nv12_p010le_uv .text.Subsample_Bicubic_nv12_p010le .nv.info.Subsample_Bicubic_nv12_p010le .nv.shared.Subsample_Bicubic_nv12_p010le .nv.constant2.Subsample_Bicubic_nv12_p010le .nv.constant0.Subsample_Bicubic_nv12_p010le .rel.nv.constant0.Subsample_Bicubic_nv12_p010le .text.Subsample_Bicubic_yuv420p_p010le_uv .nv.info.Subsample_Bicubic_yuv420p_p010le_uv .nv.shared.Subsample_Bicubic_yuv420p_p010le_uv .nv.constant2.Subsample_Bicubic_yuv420p_p010le_uv .nv.constant0.Subsample_Bicubic_yuv420p_p010le_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_p010le_uv .text.Subsample_Bicubic_yuv420p_p010le .nv.info.Subsample_Bicubic_yuv420p_p010le .nv.shared.Subsample_Bicubic_yuv420p_p010le .nv.constant2.Subsample_Bicubic_yuv420p_p010le .nv.constant0.Subsample_Bicubic_yuv420p_p010le .rel.nv.constant0.Subsample_Bicubic_yuv420p_p010le .text.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p_uv .text.Subsample_Bicubic_yuv444p16le_yuv444p .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p .text.Subsample_Bicubic_p016le_yuv444p_uv .nv.info.Subsample_Bicubic_p016le_yuv444p_uv .nv.shared.Subsample_Bicubic_p016le_yuv444p_uv .nv.constant2.Subsample_Bicubic_p016le_yuv444p_uv .nv.constant0.Subsample_Bicubic_p016le_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p_uv .text.Subsample_Bicubic_p016le_yuv444p .nv.info.Subsample_Bicubic_p016le_yuv444p .nv.shared.Subsample_Bicubic_p016le_yuv444p .nv.constant2.Subsample_Bicubic_p016le_yuv444p .nv.constant0.Subsample_Bicubic_p016le_yuv444p .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p .text.Subsample_Bicubic_p010le_yuv444p_uv .nv.info.Subsample_Bicubic_p010le_yuv444p_uv .nv.shared.Subsample_Bicubic_p010le_yuv444p_uv .nv.constant2.Subsample_Bicubic_p010le_yuv444p_uv .nv.constant0.Subsample_Bicubic_p010le_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p_uv .text.Subsample_Bicubic_p010le_yuv444p .nv.info.Subsample_Bicubic_p010le_yuv444p .nv.shared.Subsample_Bicubic_p010le_yuv444p .nv.constant2.Subsample_Bicubic_p010le_yuv444p .nv.constant0.Subsample_Bicubic_p010le_yuv444p .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p .text.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.info.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p_uv .text.Subsample_Bicubic_yuv444p_yuv444p .nv.info.Subsample_Bicubic_yuv444p_yuv444p .nv.shared.Subsample_Bicubic_yuv444p_yuv444p .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p .text.Subsample_Bicubic_nv12_yuv444p_uv .nv.info.Subsample_Bicubic_nv12_yuv444p_uv .nv.shared.Subsample_Bicubic_nv12_yuv444p_uv .nv.constant2.Subsample_Bicubic_nv12_yuv444p_uv .nv.constant0.Subsample_Bicubic_nv12_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p_uv .text.Subsample_Bicubic_nv12_yuv444p .nv.info.Subsample_Bicubic_nv12_yuv444p .nv.shared.Subsample_Bicubic_nv12_yuv444p .nv.constant2.Subsample_Bicubic_nv12_yuv444p .nv.constant0.Subsample_Bicubic_nv12_yuv444p .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p .text.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.info.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p_uv .text.Subsample_Bicubic_yuv420p_yuv444p .nv.info.Subsample_Bicubic_yuv420p_yuv444p .nv.shared.Subsample_Bicubic_yuv420p_yuv444p .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p .text.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.info.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.shared.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_nv12_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_nv12_uv .text.Subsample_Bicubic_yuv444p16le_nv12 .nv.info.Subsample_Bicubic_yuv444p16le_nv12 .nv.shared.Subsample_Bicubic_yuv444p16le_nv12 .nv.constant2.Subsample_Bicubic_yuv444p16le_nv12 .nv.constant0.Subsample_Bicubic_yuv444p16le_nv12 .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_nv12 .text.Subsample_Bicubic_p016le_nv12_uv .nv.info.Subsample_Bicubic_p016le_nv12_uv .nv.shared.Subsample_Bicubic_p016le_nv12_uv .nv.constant2.Subsample_Bicubic_p016le_nv12_uv .nv.constant0.Subsample_Bicubic_p016le_nv12_uv .rel.nv.constant0.Subsample_Bicubic_p016le_nv12_uv .text.Subsample_Bicubic_p016le_nv12 .nv.info.Subsample_Bicubic_p016le_nv12 .nv.shared.Subsample_Bicubic_p016le_nv12 .nv.constant2.Subsample_Bicubic_p016le_nv12 .nv.constant0.Subsample_Bicubic_p016le_nv12 .rel.nv.constant0.Subsample_Bicubic_p016le_nv12 .text.Subsample_Bicubic_p010le_nv12_uv .nv.info.Subsample_Bicubic_p010le_nv12_uv .nv.shared.Subsample_Bicubic_p010le_nv12_uv .nv.constant2.Subsample_Bicubic_p010le_nv12_uv .nv.constant0.Subsample_Bicubic_p010le_nv12_uv .rel.nv.constant0.Subsample_Bicubic_p010le_nv12_uv .text.Subsample_Bicubic_p010le_nv12 .nv.info.Subsample_Bicubic_p010le_nv12 .nv.shared.Subsample_Bicubic_p010le_nv12 .nv.constant2.Subsample_Bicubic_p010le_nv12 .nv.constant0.Subsample_Bicubic_p010le_nv12 .rel.nv.constant0.Subsample_Bicubic_p010le_nv12 .text.Subsample_Bicubic_yuv444p_nv12_uv .nv.info.Subsample_Bicubic_yuv444p_nv12_uv .nv.shared.Subsample_Bicubic_yuv444p_nv12_uv .nv.constant2.Subsample_Bicubic_yuv444p_nv12_uv .nv.constant0.Subsample_Bicubic_yuv444p_nv12_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_nv12_uv .text.Subsample_Bicubic_yuv444p_nv12 .nv.info.Subsample_Bicubic_yuv444p_nv12 .nv.shared.Subsample_Bicubic_yuv444p_nv12 .nv.constant2.Subsample_Bicubic_yuv444p_nv12 .nv.constant0.Subsample_Bicubic_yuv444p_nv12 .rel.nv.constant0.Subsample_Bicubic_yuv444p_nv12 .text.Subsample_Bicubic_nv12_nv12_uv .nv.info.Subsample_Bicubic_nv12_nv12_uv .nv.shared.Subsample_Bicubic_nv12_nv12_uv .nv.constant2.Subsample_Bicubic_nv12_nv12_uv .nv.constant0.Subsample_Bicubic_nv12_nv12_uv .rel.nv.constant0.Subsample_Bicubic_nv12_nv12_uv .text.Subsample_Bicubic_nv12_nv12 .nv.info.Subsample_Bicubic_nv12_nv12 .nv.shared.Subsample_Bicubic_nv12_nv12 .nv.constant2.Subsample_Bicubic_nv12_nv12 .nv.constant0.Subsample_Bicubic_nv12_nv12 .rel.nv.constant0.Subsample_Bicubic_nv12_nv12 .text.Subsample_Bicubic_yuv420p_nv12_uv .nv.info.Subsample_Bicubic_yuv420p_nv12_uv .nv.shared.Subsample_Bicubic_yuv420p_nv12_uv .nv.constant2.Subsample_Bicubic_yuv420p_nv12_uv .nv.constant0.Subsample_Bicubic_yuv420p_nv12_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_nv12_uv .text.Subsample_Bicubic_yuv420p_nv12 .nv.info.Subsample_Bicubic_yuv420p_nv12 .nv.shared.Subsample_Bicubic_yuv420p_nv12 .nv.constant2.Subsample_Bicubic_yuv420p_nv12 .nv.constant0.Subsample_Bicubic_yuv420p_nv12 .rel.nv.constant0.Subsample_Bicubic_yuv420p_nv12 .text.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p_uv .text.Subsample_Bicubic_yuv444p16le_yuv420p .nv.info.Subsample_Bicubic_yuv444p16le_yuv420p .nv.shared.Subsample_Bicubic_yuv444p16le_yuv420p .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv420p .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p .text.Subsample_Bicubic_p016le_yuv420p_uv .nv.info.Subsample_Bicubic_p016le_yuv420p_uv .nv.shared.Subsample_Bicubic_p016le_yuv420p_uv .nv.constant2.Subsample_Bicubic_p016le_yuv420p_uv .nv.constant0.Subsample_Bicubic_p016le_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_p016le_yuv420p_uv .text.Subsample_Bicubic_p016le_yuv420p .nv.info.Subsample_Bicubic_p016le_yuv420p .nv.shared.Subsample_Bicubic_p016le_yuv420p .nv.constant2.Subsample_Bicubic_p016le_yuv420p .nv.constant0.Subsample_Bicubic_p016le_yuv420p .rel.nv.constant0.Subsample_Bicubic_p016le_yuv420p .text.Subsample_Bicubic_p010le_yuv420p_uv .nv.info.Subsample_Bicubic_p010le_yuv420p_uv .nv.shared.Subsample_Bicubic_p010le_yuv420p_uv .nv.constant2.Subsample_Bicubic_p010le_yuv420p_uv .nv.constant0.Subsample_Bicubic_p010le_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_p010le_yuv420p_uv .text.Subsample_Bicubic_p010le_yuv420p .nv.info.Subsample_Bicubic_p010le_yuv420p .nv.shared.Subsample_Bicubic_p010le_yuv420p .nv.constant2.Subsample_Bicubic_p010le_yuv420p .nv.constant0.Subsample_Bicubic_p010le_yuv420p .rel.nv.constant0.Subsample_Bicubic_p010le_yuv420p .text.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.info.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv420p_uv .text.Subsample_Bicubic_yuv444p_yuv420p .nv.info.Subsample_Bicubic_yuv444p_yuv420p .nv.shared.Subsample_Bicubic_yuv444p_yuv420p .nv.constant2.Subsample_Bicubic_yuv444p_yuv420p .nv.constant0.Subsample_Bicubic_yuv444p_yuv420p .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv420p .text.Subsample_Bicubic_nv12_yuv420p_uv .nv.info.Subsample_Bicubic_nv12_yuv420p_uv .nv.shared.Subsample_Bicubic_nv12_yuv420p_uv .nv.constant2.Subsample_Bicubic_nv12_yuv420p_uv .nv.constant0.Subsample_Bicubic_nv12_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_nv12_yuv420p_uv .text.Subsample_Bicubic_nv12_yuv420p .nv.info.Subsample_Bicubic_nv12_yuv420p .nv.shared.Subsample_Bicubic_nv12_yuv420p .nv.constant2.Subsample_Bicubic_nv12_yuv420p .nv.constant0.Subsample_Bicubic_nv12_yuv420p .rel.nv.constant0.Subsample_Bicubic_nv12_yuv420p .text.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.info.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv420p_uv .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv420p_uv .text.Subsample_Bicubic_yuv420p_yuv420p .nv.info.Subsample_Bicubic_yuv420p_yuv420p .nv.shared.Subsample_Bicubic_yuv420p_yuv420p .nv.constant2.Subsample_Bicubic_yuv420p_yuv420p .nv.constant0.Subsample_Bicubic_yuv420p_yuv420p .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv420p .text.Subsample_Bilinear_rgb0_bgr0_uv .nv.info.Subsample_Bilinear_rgb0_bgr0_uv .nv.shared.Subsample_Bilinear_rgb0_bgr0_uv .nv.constant0.Subsample_Bilinear_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Bilinear_rgb0_bgr0_uv .text.Subsample_Bilinear_rgb0_bgr0 .nv.info.Subsample_Bilinear_rgb0_bgr0 .nv.shared.Subsample_Bilinear_rgb0_bgr0 .nv.constant2.Subsample_Bilinear_rgb0_bgr0 .nv.constant0.Subsample_Bilinear_rgb0_bgr0 .rel.nv.constant0.Subsample_Bilinear_rgb0_bgr0 .text.Subsample_Bilinear_bgr0_rgb0_uv .nv.info.Subsample_Bilinear_bgr0_rgb0_uv .nv.shared.Subsample_Bilinear_bgr0_rgb0_uv .nv.constant0.Subsample_Bilinear_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Bilinear_bgr0_rgb0_uv .text.Subsample_Bilinear_bgr0_rgb0 .nv.info.Subsample_Bilinear_bgr0_rgb0 .nv.shared.Subsample_Bilinear_bgr0_rgb0 .nv.constant2.Subsample_Bilinear_bgr0_rgb0 .nv.constant0.Subsample_Bilinear_bgr0_rgb0 .rel.nv.constant0.Subsample_Bilinear_bgr0_rgb0 .text.Subsample_Bilinear_rgb0_rgb0_uv .nv.info.Subsample_Bilinear_rgb0_rgb0_uv .nv.shared.Subsample_Bilinear_rgb0_rgb0_uv .nv.constant0.Subsample_Bilinear_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Bilinear_rgb0_rgb0_uv .text.Subsample_Bilinear_rgb0_rgb0 .nv.info.Subsample_Bilinear_rgb0_rgb0 .nv.shared.Subsample_Bilinear_rgb0_rgb0 .nv.constant2.Subsample_Bilinear_rgb0_rgb0 .nv.constant0.Subsample_Bilinear_rgb0_rgb0 .rel.nv.constant0.Subsample_Bilinear_rgb0_rgb0 .text.Subsample_Bilinear_bgr0_bgr0_uv .nv.info.Subsample_Bilinear_bgr0_bgr0_uv .nv.shared.Subsample_Bilinear_bgr0_bgr0_uv .nv.constant0.Subsample_Bilinear_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Bilinear_bgr0_bgr0_uv .text.Subsample_Bilinear_bgr0_bgr0 .nv.info.Subsample_Bilinear_bgr0_bgr0 .nv.shared.Subsample_Bilinear_bgr0_bgr0 .nv.constant2.Subsample_Bilinear_bgr0_bgr0 .nv.constant0.Subsample_Bilinear_bgr0_bgr0 .rel.nv.constant0.Subsample_Bilinear_bgr0_bgr0 .text.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .text.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le .text.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.info.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_p016le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p16le_uv .text.Subsample_Bilinear_p016le_yuv444p16le .nv.info.Subsample_Bilinear_p016le_yuv444p16le .nv.shared.Subsample_Bilinear_p016le_yuv444p16le .nv.constant2.Subsample_Bilinear_p016le_yuv444p16le .nv.constant0.Subsample_Bilinear_p016le_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p16le .text.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.info.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_p010le_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p16le_uv .text.Subsample_Bilinear_p010le_yuv444p16le .nv.info.Subsample_Bilinear_p010le_yuv444p16le .nv.shared.Subsample_Bilinear_p010le_yuv444p16le .nv.constant2.Subsample_Bilinear_p010le_yuv444p16le .nv.constant0.Subsample_Bilinear_p010le_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p16le .text.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le_uv .text.Subsample_Bilinear_yuv444p_yuv444p16le .nv.info.Subsample_Bilinear_yuv444p_yuv444p16le .nv.shared.Subsample_Bilinear_yuv444p_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le .text.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.info.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.shared.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_nv12_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p16le_uv .text.Subsample_Bilinear_nv12_yuv444p16le .nv.info.Subsample_Bilinear_nv12_yuv444p16le .nv.shared.Subsample_Bilinear_nv12_yuv444p16le .nv.constant2.Subsample_Bilinear_nv12_yuv444p16le .nv.constant0.Subsample_Bilinear_nv12_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p16le .text.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le_uv .text.Subsample_Bilinear_yuv420p_yuv444p16le .nv.info.Subsample_Bilinear_yuv420p_yuv444p16le .nv.shared.Subsample_Bilinear_yuv420p_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le .text.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.info.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_p016le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p016le_uv .text.Subsample_Bilinear_yuv444p16le_p016le .nv.info.Subsample_Bilinear_yuv444p16le_p016le .nv.shared.Subsample_Bilinear_yuv444p16le_p016le .nv.constant2.Subsample_Bilinear_yuv444p16le_p016le .nv.constant0.Subsample_Bilinear_yuv444p16le_p016le .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p016le .text.Subsample_Bilinear_p016le_p016le_uv .nv.info.Subsample_Bilinear_p016le_p016le_uv .nv.shared.Subsample_Bilinear_p016le_p016le_uv .nv.constant2.Subsample_Bilinear_p016le_p016le_uv .nv.constant0.Subsample_Bilinear_p016le_p016le_uv .rel.nv.constant0.Subsample_Bilinear_p016le_p016le_uv .text.Subsample_Bilinear_p016le_p016le .nv.info.Subsample_Bilinear_p016le_p016le .nv.shared.Subsample_Bilinear_p016le_p016le .nv.constant2.Subsample_Bilinear_p016le_p016le .nv.constant0.Subsample_Bilinear_p016le_p016le .rel.nv.constant0.Subsample_Bilinear_p016le_p016le .text.Subsample_Bilinear_p010le_p016le_uv .nv.info.Subsample_Bilinear_p010le_p016le_uv .nv.shared.Subsample_Bilinear_p010le_p016le_uv .nv.constant2.Subsample_Bilinear_p010le_p016le_uv .nv.constant0.Subsample_Bilinear_p010le_p016le_uv .rel.nv.constant0.Subsample_Bilinear_p010le_p016le_uv .text.Subsample_Bilinear_p010le_p016le .nv.info.Subsample_Bilinear_p010le_p016le .nv.shared.Subsample_Bilinear_p010le_p016le .nv.constant2.Subsample_Bilinear_p010le_p016le .nv.constant0.Subsample_Bilinear_p010le_p016le .rel.nv.constant0.Subsample_Bilinear_p010le_p016le .text.Subsample_Bilinear_yuv444p_p016le_uv .nv.info.Subsample_Bilinear_yuv444p_p016le_uv .nv.shared.Subsample_Bilinear_yuv444p_p016le_uv .nv.constant2.Subsample_Bilinear_yuv444p_p016le_uv .nv.constant0.Subsample_Bilinear_yuv444p_p016le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_p016le_uv .text.Subsample_Bilinear_yuv444p_p016le .nv.info.Subsample_Bilinear_yuv444p_p016le .nv.shared.Subsample_Bilinear_yuv444p_p016le .nv.constant2.Subsample_Bilinear_yuv444p_p016le .nv.constant0.Subsample_Bilinear_yuv444p_p016le .rel.nv.constant0.Subsample_Bilinear_yuv444p_p016le .text.Subsample_Bilinear_nv12_p016le_uv .nv.info.Subsample_Bilinear_nv12_p016le_uv .nv.shared.Subsample_Bilinear_nv12_p016le_uv .nv.constant2.Subsample_Bilinear_nv12_p016le_uv .nv.constant0.Subsample_Bilinear_nv12_p016le_uv .rel.nv.constant0.Subsample_Bilinear_nv12_p016le_uv .text.Subsample_Bilinear_nv12_p016le .nv.info.Subsample_Bilinear_nv12_p016le .nv.shared.Subsample_Bilinear_nv12_p016le .nv.constant2.Subsample_Bilinear_nv12_p016le .nv.constant0.Subsample_Bilinear_nv12_p016le .rel.nv.constant0.Subsample_Bilinear_nv12_p016le .text.Subsample_Bilinear_yuv420p_p016le_uv .nv.info.Subsample_Bilinear_yuv420p_p016le_uv .nv.shared.Subsample_Bilinear_yuv420p_p016le_uv .nv.constant2.Subsample_Bilinear_yuv420p_p016le_uv .nv.constant0.Subsample_Bilinear_yuv420p_p016le_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_p016le_uv .text.Subsample_Bilinear_yuv420p_p016le .nv.info.Subsample_Bilinear_yuv420p_p016le .nv.shared.Subsample_Bilinear_yuv420p_p016le .nv.constant2.Subsample_Bilinear_yuv420p_p016le .nv.constant0.Subsample_Bilinear_yuv420p_p016le .rel.nv.constant0.Subsample_Bilinear_yuv420p_p016le .text.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.info.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_p010le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p010le_uv .text.Subsample_Bilinear_yuv444p16le_p010le .nv.info.Subsample_Bilinear_yuv444p16le_p010le .nv.shared.Subsample_Bilinear_yuv444p16le_p010le .nv.constant2.Subsample_Bilinear_yuv444p16le_p010le .nv.constant0.Subsample_Bilinear_yuv444p16le_p010le .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p010le .text.Subsample_Bilinear_p016le_p010le_uv .nv.info.Subsample_Bilinear_p016le_p010le_uv .nv.shared.Subsample_Bilinear_p016le_p010le_uv .nv.constant2.Subsample_Bilinear_p016le_p010le_uv .nv.constant0.Subsample_Bilinear_p016le_p010le_uv .rel.nv.constant0.Subsample_Bilinear_p016le_p010le_uv .text.Subsample_Bilinear_p016le_p010le .nv.info.Subsample_Bilinear_p016le_p010le .nv.shared.Subsample_Bilinear_p016le_p010le .nv.constant2.Subsample_Bilinear_p016le_p010le .nv.constant0.Subsample_Bilinear_p016le_p010le .rel.nv.constant0.Subsample_Bilinear_p016le_p010le .text.Subsample_Bilinear_p010le_p010le_uv .nv.info.Subsample_Bilinear_p010le_p010le_uv .nv.shared.Subsample_Bilinear_p010le_p010le_uv .nv.constant2.Subsample_Bilinear_p010le_p010le_uv .nv.constant0.Subsample_Bilinear_p010le_p010le_uv .rel.nv.constant0.Subsample_Bilinear_p010le_p010le_uv .text.Subsample_Bilinear_p010le_p010le .nv.info.Subsample_Bilinear_p010le_p010le .nv.shared.Subsample_Bilinear_p010le_p010le .nv.constant2.Subsample_Bilinear_p010le_p010le .nv.constant0.Subsample_Bilinear_p010le_p010le .rel.nv.constant0.Subsample_Bilinear_p010le_p010le .text.Subsample_Bilinear_yuv444p_p010le_uv .nv.info.Subsample_Bilinear_yuv444p_p010le_uv .nv.shared.Subsample_Bilinear_yuv444p_p010le_uv .nv.constant2.Subsample_Bilinear_yuv444p_p010le_uv .nv.constant0.Subsample_Bilinear_yuv444p_p010le_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_p010le_uv .text.Subsample_Bilinear_yuv444p_p010le .nv.info.Subsample_Bilinear_yuv444p_p010le .nv.shared.Subsample_Bilinear_yuv444p_p010le .nv.constant2.Subsample_Bilinear_yuv444p_p010le .nv.constant0.Subsample_Bilinear_yuv444p_p010le .rel.nv.constant0.Subsample_Bilinear_yuv444p_p010le .text.Subsample_Bilinear_nv12_p010le_uv .nv.info.Subsample_Bilinear_nv12_p010le_uv .nv.shared.Subsample_Bilinear_nv12_p010le_uv .nv.constant2.Subsample_Bilinear_nv12_p010le_uv .nv.constant0.Subsample_Bilinear_nv12_p010le_uv .rel.nv.constant0.Subsample_Bilinear_nv12_p010le_uv .text.Subsample_Bilinear_nv12_p010le .nv.info.Subsample_Bilinear_nv12_p010le .nv.shared.Subsample_Bilinear_nv12_p010le .nv.constant2.Subsample_Bilinear_nv12_p010le .nv.constant0.Subsample_Bilinear_nv12_p010le .rel.nv.constant0.Subsample_Bilinear_nv12_p010le .text.Subsample_Bilinear_yuv420p_p010le_uv .nv.info.Subsample_Bilinear_yuv420p_p010le_uv .nv.shared.Subsample_Bilinear_yuv420p_p010le_uv .nv.constant2.Subsample_Bilinear_yuv420p_p010le_uv .nv.constant0.Subsample_Bilinear_yuv420p_p010le_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_p010le_uv .text.Subsample_Bilinear_yuv420p_p010le .nv.info.Subsample_Bilinear_yuv420p_p010le .nv.shared.Subsample_Bilinear_yuv420p_p010le .nv.constant2.Subsample_Bilinear_yuv420p_p010le .nv.constant0.Subsample_Bilinear_yuv420p_p010le .rel.nv.constant0.Subsample_Bilinear_yuv420p_p010le .text.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p_uv .text.Subsample_Bilinear_yuv444p16le_yuv444p .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p .text.Subsample_Bilinear_p016le_yuv444p_uv .nv.info.Subsample_Bilinear_p016le_yuv444p_uv .nv.shared.Subsample_Bilinear_p016le_yuv444p_uv .nv.constant2.Subsample_Bilinear_p016le_yuv444p_uv .nv.constant0.Subsample_Bilinear_p016le_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p_uv .text.Subsample_Bilinear_p016le_yuv444p .nv.info.Subsample_Bilinear_p016le_yuv444p .nv.shared.Subsample_Bilinear_p016le_yuv444p .nv.constant2.Subsample_Bilinear_p016le_yuv444p .nv.constant0.Subsample_Bilinear_p016le_yuv444p .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p .text.Subsample_Bilinear_p010le_yuv444p_uv .nv.info.Subsample_Bilinear_p010le_yuv444p_uv .nv.shared.Subsample_Bilinear_p010le_yuv444p_uv .nv.constant2.Subsample_Bilinear_p010le_yuv444p_uv .nv.constant0.Subsample_Bilinear_p010le_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p_uv .text.Subsample_Bilinear_p010le_yuv444p .nv.info.Subsample_Bilinear_p010le_yuv444p .nv.shared.Subsample_Bilinear_p010le_yuv444p .nv.constant2.Subsample_Bilinear_p010le_yuv444p .nv.constant0.Subsample_Bilinear_p010le_yuv444p .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p .text.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.info.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p_uv .text.Subsample_Bilinear_yuv444p_yuv444p .nv.info.Subsample_Bilinear_yuv444p_yuv444p .nv.shared.Subsample_Bilinear_yuv444p_yuv444p .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p .text.Subsample_Bilinear_nv12_yuv444p_uv .nv.info.Subsample_Bilinear_nv12_yuv444p_uv .nv.shared.Subsample_Bilinear_nv12_yuv444p_uv .nv.constant2.Subsample_Bilinear_nv12_yuv444p_uv .nv.constant0.Subsample_Bilinear_nv12_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p_uv .text.Subsample_Bilinear_nv12_yuv444p .nv.info.Subsample_Bilinear_nv12_yuv444p .nv.shared.Subsample_Bilinear_nv12_yuv444p .nv.constant2.Subsample_Bilinear_nv12_yuv444p .nv.constant0.Subsample_Bilinear_nv12_yuv444p .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p .text.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.info.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p_uv .text.Subsample_Bilinear_yuv420p_yuv444p .nv.info.Subsample_Bilinear_yuv420p_yuv444p .nv.shared.Subsample_Bilinear_yuv420p_yuv444p .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p .text.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.info.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.shared.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_nv12_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_nv12_uv .text.Subsample_Bilinear_yuv444p16le_nv12 .nv.info.Subsample_Bilinear_yuv444p16le_nv12 .nv.shared.Subsample_Bilinear_yuv444p16le_nv12 .nv.constant2.Subsample_Bilinear_yuv444p16le_nv12 .nv.constant0.Subsample_Bilinear_yuv444p16le_nv12 .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_nv12 .text.Subsample_Bilinear_p016le_nv12_uv .nv.info.Subsample_Bilinear_p016le_nv12_uv .nv.shared.Subsample_Bilinear_p016le_nv12_uv .nv.constant2.Subsample_Bilinear_p016le_nv12_uv .nv.constant0.Subsample_Bilinear_p016le_nv12_uv .rel.nv.constant0.Subsample_Bilinear_p016le_nv12_uv .text.Subsample_Bilinear_p016le_nv12 .nv.info.Subsample_Bilinear_p016le_nv12 .nv.shared.Subsample_Bilinear_p016le_nv12 .nv.constant2.Subsample_Bilinear_p016le_nv12 .nv.constant0.Subsample_Bilinear_p016le_nv12 .rel.nv.constant0.Subsample_Bilinear_p016le_nv12 .text.Subsample_Bilinear_p010le_nv12_uv .nv.info.Subsample_Bilinear_p010le_nv12_uv .nv.shared.Subsample_Bilinear_p010le_nv12_uv .nv.constant2.Subsample_Bilinear_p010le_nv12_uv .nv.constant0.Subsample_Bilinear_p010le_nv12_uv .rel.nv.constant0.Subsample_Bilinear_p010le_nv12_uv .text.Subsample_Bilinear_p010le_nv12 .nv.info.Subsample_Bilinear_p010le_nv12 .nv.shared.Subsample_Bilinear_p010le_nv12 .nv.constant2.Subsample_Bilinear_p010le_nv12 .nv.constant0.Subsample_Bilinear_p010le_nv12 .rel.nv.constant0.Subsample_Bilinear_p010le_nv12 .text.Subsample_Bilinear_yuv444p_nv12_uv .nv.info.Subsample_Bilinear_yuv444p_nv12_uv .nv.shared.Subsample_Bilinear_yuv444p_nv12_uv .nv.constant2.Subsample_Bilinear_yuv444p_nv12_uv .nv.constant0.Subsample_Bilinear_yuv444p_nv12_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_nv12_uv .text.Subsample_Bilinear_yuv444p_nv12 .nv.info.Subsample_Bilinear_yuv444p_nv12 .nv.shared.Subsample_Bilinear_yuv444p_nv12 .nv.constant2.Subsample_Bilinear_yuv444p_nv12 .nv.constant0.Subsample_Bilinear_yuv444p_nv12 .rel.nv.constant0.Subsample_Bilinear_yuv444p_nv12 .text.Subsample_Bilinear_nv12_nv12_uv .nv.info.Subsample_Bilinear_nv12_nv12_uv .nv.shared.Subsample_Bilinear_nv12_nv12_uv .nv.constant2.Subsample_Bilinear_nv12_nv12_uv .nv.constant0.Subsample_Bilinear_nv12_nv12_uv .rel.nv.constant0.Subsample_Bilinear_nv12_nv12_uv .text.Subsample_Bilinear_nv12_nv12 .nv.info.Subsample_Bilinear_nv12_nv12 .nv.shared.Subsample_Bilinear_nv12_nv12 .nv.constant2.Subsample_Bilinear_nv12_nv12 .nv.constant0.Subsample_Bilinear_nv12_nv12 .rel.nv.constant0.Subsample_Bilinear_nv12_nv12 .text.Subsample_Bilinear_yuv420p_nv12_uv .nv.info.Subsample_Bilinear_yuv420p_nv12_uv .nv.shared.Subsample_Bilinear_yuv420p_nv12_uv .nv.constant2.Subsample_Bilinear_yuv420p_nv12_uv .nv.constant0.Subsample_Bilinear_yuv420p_nv12_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_nv12_uv .text.Subsample_Bilinear_yuv420p_nv12 .nv.info.Subsample_Bilinear_yuv420p_nv12 .nv.shared.Subsample_Bilinear_yuv420p_nv12 .nv.constant2.Subsample_Bilinear_yuv420p_nv12 .nv.constant0.Subsample_Bilinear_yuv420p_nv12 .rel.nv.constant0.Subsample_Bilinear_yuv420p_nv12 .text.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p_uv .text.Subsample_Bilinear_yuv444p16le_yuv420p .nv.info.Subsample_Bilinear_yuv444p16le_yuv420p .nv.shared.Subsample_Bilinear_yuv444p16le_yuv420p .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv420p .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p .text.Subsample_Bilinear_p016le_yuv420p_uv .nv.info.Subsample_Bilinear_p016le_yuv420p_uv .nv.shared.Subsample_Bilinear_p016le_yuv420p_uv .nv.constant2.Subsample_Bilinear_p016le_yuv420p_uv .nv.constant0.Subsample_Bilinear_p016le_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_p016le_yuv420p_uv .text.Subsample_Bilinear_p016le_yuv420p .nv.info.Subsample_Bilinear_p016le_yuv420p .nv.shared.Subsample_Bilinear_p016le_yuv420p .nv.constant2.Subsample_Bilinear_p016le_yuv420p .nv.constant0.Subsample_Bilinear_p016le_yuv420p .rel.nv.constant0.Subsample_Bilinear_p016le_yuv420p .text.Subsample_Bilinear_p010le_yuv420p_uv .nv.info.Subsample_Bilinear_p010le_yuv420p_uv .nv.shared.Subsample_Bilinear_p010le_yuv420p_uv .nv.constant2.Subsample_Bilinear_p010le_yuv420p_uv .nv.constant0.Subsample_Bilinear_p010le_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_p010le_yuv420p_uv .text.Subsample_Bilinear_p010le_yuv420p .nv.info.Subsample_Bilinear_p010le_yuv420p .nv.shared.Subsample_Bilinear_p010le_yuv420p .nv.constant2.Subsample_Bilinear_p010le_yuv420p .nv.constant0.Subsample_Bilinear_p010le_yuv420p .rel.nv.constant0.Subsample_Bilinear_p010le_yuv420p .text.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.info.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv420p_uv .text.Subsample_Bilinear_yuv444p_yuv420p .nv.info.Subsample_Bilinear_yuv444p_yuv420p .nv.shared.Subsample_Bilinear_yuv444p_yuv420p .nv.constant2.Subsample_Bilinear_yuv444p_yuv420p .nv.constant0.Subsample_Bilinear_yuv444p_yuv420p .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv420p .text.Subsample_Bilinear_nv12_yuv420p_uv .nv.info.Subsample_Bilinear_nv12_yuv420p_uv .nv.shared.Subsample_Bilinear_nv12_yuv420p_uv .nv.constant2.Subsample_Bilinear_nv12_yuv420p_uv .nv.constant0.Subsample_Bilinear_nv12_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_nv12_yuv420p_uv .text.Subsample_Bilinear_nv12_yuv420p .nv.info.Subsample_Bilinear_nv12_yuv420p .nv.shared.Subsample_Bilinear_nv12_yuv420p .nv.constant2.Subsample_Bilinear_nv12_yuv420p .nv.constant0.Subsample_Bilinear_nv12_yuv420p .rel.nv.constant0.Subsample_Bilinear_nv12_yuv420p .text.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.info.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv420p_uv .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv420p_uv .text.Subsample_Bilinear_yuv420p_yuv420p .nv.info.Subsample_Bilinear_yuv420p_yuv420p .nv.shared.Subsample_Bilinear_yuv420p_yuv420p .nv.constant2.Subsample_Bilinear_yuv420p_yuv420p .nv.constant0.Subsample_Bilinear_yuv420p_yuv420p .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv420p .text.Subsample_Nearest_rgb0_bgr0_uv .nv.info.Subsample_Nearest_rgb0_bgr0_uv .nv.shared.Subsample_Nearest_rgb0_bgr0_uv .nv.constant0.Subsample_Nearest_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Nearest_rgb0_bgr0_uv .text.Subsample_Nearest_rgb0_bgr0 .nv.info.Subsample_Nearest_rgb0_bgr0 .nv.shared.Subsample_Nearest_rgb0_bgr0 .nv.constant2.Subsample_Nearest_rgb0_bgr0 .nv.constant0.Subsample_Nearest_rgb0_bgr0 .rel.nv.constant0.Subsample_Nearest_rgb0_bgr0 .text.Subsample_Nearest_bgr0_rgb0_uv .nv.info.Subsample_Nearest_bgr0_rgb0_uv .nv.shared.Subsample_Nearest_bgr0_rgb0_uv .nv.constant0.Subsample_Nearest_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Nearest_bgr0_rgb0_uv .text.Subsample_Nearest_bgr0_rgb0 .nv.info.Subsample_Nearest_bgr0_rgb0 .nv.shared.Subsample_Nearest_bgr0_rgb0 .nv.constant2.Subsample_Nearest_bgr0_rgb0 .nv.constant0.Subsample_Nearest_bgr0_rgb0 .rel.nv.constant0.Subsample_Nearest_bgr0_rgb0 .text.Subsample_Nearest_rgb0_rgb0_uv .nv.info.Subsample_Nearest_rgb0_rgb0_uv .nv.shared.Subsample_Nearest_rgb0_rgb0_uv .nv.constant0.Subsample_Nearest_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Nearest_rgb0_rgb0_uv .text.Subsample_Nearest_rgb0_rgb0 .nv.info.Subsample_Nearest_rgb0_rgb0 .nv.shared.Subsample_Nearest_rgb0_rgb0 .nv.constant2.Subsample_Nearest_rgb0_rgb0 .nv.constant0.Subsample_Nearest_rgb0_rgb0 .rel.nv.constant0.Subsample_Nearest_rgb0_rgb0 .text.Subsample_Nearest_bgr0_bgr0_uv .nv.info.Subsample_Nearest_bgr0_bgr0_uv .nv.shared.Subsample_Nearest_bgr0_bgr0_uv .nv.constant0.Subsample_Nearest_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Nearest_bgr0_bgr0_uv .text.Subsample_Nearest_bgr0_bgr0 .nv.info.Subsample_Nearest_bgr0_bgr0 .nv.shared.Subsample_Nearest_bgr0_bgr0 .nv.constant2.Subsample_Nearest_bgr0_bgr0 .nv.constant0.Subsample_Nearest_bgr0_bgr0 .rel.nv.constant0.Subsample_Nearest_bgr0_bgr0 .text.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .text.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.info.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le .text.Subsample_Nearest_p016le_yuv444p16le_uv .nv.info.Subsample_Nearest_p016le_yuv444p16le_uv .nv.shared.Subsample_Nearest_p016le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_p016le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_p016le_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p16le_uv .text.Subsample_Nearest_p016le_yuv444p16le .nv.info.Subsample_Nearest_p016le_yuv444p16le .nv.shared.Subsample_Nearest_p016le_yuv444p16le .nv.constant2.Subsample_Nearest_p016le_yuv444p16le .nv.constant0.Subsample_Nearest_p016le_yuv444p16le .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p16le .text.Subsample_Nearest_p010le_yuv444p16le_uv .nv.info.Subsample_Nearest_p010le_yuv444p16le_uv .nv.shared.Subsample_Nearest_p010le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_p010le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_p010le_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p16le_uv .text.Subsample_Nearest_p010le_yuv444p16le .nv.info.Subsample_Nearest_p010le_yuv444p16le .nv.shared.Subsample_Nearest_p010le_yuv444p16le .nv.constant2.Subsample_Nearest_p010le_yuv444p16le .nv.constant0.Subsample_Nearest_p010le_yuv444p16le .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p16le .text.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le_uv .text.Subsample_Nearest_yuv444p_yuv444p16le .nv.info.Subsample_Nearest_yuv444p_yuv444p16le .nv.shared.Subsample_Nearest_yuv444p_yuv444p16le .nv.constant2.Subsample_Nearest_yuv444p_yuv444p16le .nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le .text.Subsample_Nearest_nv12_yuv444p16le_uv .nv.info.Subsample_Nearest_nv12_yuv444p16le_uv .nv.shared.Subsample_Nearest_nv12_yuv444p16le_uv .nv.constant2.Subsample_Nearest_nv12_yuv444p16le_uv .nv.constant0.Subsample_Nearest_nv12_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p16le_uv .text.Subsample_Nearest_nv12_yuv444p16le .nv.info.Subsample_Nearest_nv12_yuv444p16le .nv.shared.Subsample_Nearest_nv12_yuv444p16le .nv.constant2.Subsample_Nearest_nv12_yuv444p16le .nv.constant0.Subsample_Nearest_nv12_yuv444p16le .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p16le .text.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le_uv .text.Subsample_Nearest_yuv420p_yuv444p16le .nv.info.Subsample_Nearest_yuv420p_yuv444p16le .nv.shared.Subsample_Nearest_yuv420p_yuv444p16le .nv.constant2.Subsample_Nearest_yuv420p_yuv444p16le .nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le .text.Subsample_Nearest_yuv444p16le_p016le_uv .nv.info.Subsample_Nearest_yuv444p16le_p016le_uv .nv.shared.Subsample_Nearest_yuv444p16le_p016le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_p016le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_p016le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p016le_uv .text.Subsample_Nearest_yuv444p16le_p016le .nv.info.Subsample_Nearest_yuv444p16le_p016le .nv.shared.Subsample_Nearest_yuv444p16le_p016le .nv.constant2.Subsample_Nearest_yuv444p16le_p016le .nv.constant0.Subsample_Nearest_yuv444p16le_p016le .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p016le .text.Subsample_Nearest_p016le_p016le_uv .nv.info.Subsample_Nearest_p016le_p016le_uv .nv.shared.Subsample_Nearest_p016le_p016le_uv .nv.constant2.Subsample_Nearest_p016le_p016le_uv .nv.constant0.Subsample_Nearest_p016le_p016le_uv .rel.nv.constant0.Subsample_Nearest_p016le_p016le_uv .text.Subsample_Nearest_p016le_p016le .nv.info.Subsample_Nearest_p016le_p016le .nv.shared.Subsample_Nearest_p016le_p016le .nv.constant2.Subsample_Nearest_p016le_p016le .nv.constant0.Subsample_Nearest_p016le_p016le .rel.nv.constant0.Subsample_Nearest_p016le_p016le .text.Subsample_Nearest_p010le_p016le_uv .nv.info.Subsample_Nearest_p010le_p016le_uv .nv.shared.Subsample_Nearest_p010le_p016le_uv .nv.constant2.Subsample_Nearest_p010le_p016le_uv .nv.constant0.Subsample_Nearest_p010le_p016le_uv .rel.nv.constant0.Subsample_Nearest_p010le_p016le_uv .text.Subsample_Nearest_p010le_p016le .nv.info.Subsample_Nearest_p010le_p016le .nv.shared.Subsample_Nearest_p010le_p016le .nv.constant2.Subsample_Nearest_p010le_p016le .nv.constant0.Subsample_Nearest_p010le_p016le .rel.nv.constant0.Subsample_Nearest_p010le_p016le .text.Subsample_Nearest_yuv444p_p016le_uv .nv.info.Subsample_Nearest_yuv444p_p016le_uv .nv.shared.Subsample_Nearest_yuv444p_p016le_uv .nv.constant2.Subsample_Nearest_yuv444p_p016le_uv .nv.constant0.Subsample_Nearest_yuv444p_p016le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_p016le_uv .text.Subsample_Nearest_yuv444p_p016le .nv.info.Subsample_Nearest_yuv444p_p016le .nv.shared.Subsample_Nearest_yuv444p_p016le .nv.constant2.Subsample_Nearest_yuv444p_p016le .nv.constant0.Subsample_Nearest_yuv444p_p016le .rel.nv.constant0.Subsample_Nearest_yuv444p_p016le .text.Subsample_Nearest_nv12_p016le_uv .nv.info.Subsample_Nearest_nv12_p016le_uv .nv.shared.Subsample_Nearest_nv12_p016le_uv .nv.constant2.Subsample_Nearest_nv12_p016le_uv .nv.constant0.Subsample_Nearest_nv12_p016le_uv .rel.nv.constant0.Subsample_Nearest_nv12_p016le_uv .text.Subsample_Nearest_nv12_p016le .nv.info.Subsample_Nearest_nv12_p016le .nv.shared.Subsample_Nearest_nv12_p016le .nv.constant2.Subsample_Nearest_nv12_p016le .nv.constant0.Subsample_Nearest_nv12_p016le .rel.nv.constant0.Subsample_Nearest_nv12_p016le .text.Subsample_Nearest_yuv420p_p016le_uv .nv.info.Subsample_Nearest_yuv420p_p016le_uv .nv.shared.Subsample_Nearest_yuv420p_p016le_uv .nv.constant2.Subsample_Nearest_yuv420p_p016le_uv .nv.constant0.Subsample_Nearest_yuv420p_p016le_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_p016le_uv .text.Subsample_Nearest_yuv420p_p016le .nv.info.Subsample_Nearest_yuv420p_p016le .nv.shared.Subsample_Nearest_yuv420p_p016le .nv.constant2.Subsample_Nearest_yuv420p_p016le .nv.constant0.Subsample_Nearest_yuv420p_p016le .rel.nv.constant0.Subsample_Nearest_yuv420p_p016le .text.Subsample_Nearest_yuv444p16le_p010le_uv .nv.info.Subsample_Nearest_yuv444p16le_p010le_uv .nv.shared.Subsample_Nearest_yuv444p16le_p010le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_p010le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_p010le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p010le_uv .text.Subsample_Nearest_yuv444p16le_p010le .nv.info.Subsample_Nearest_yuv444p16le_p010le .nv.shared.Subsample_Nearest_yuv444p16le_p010le .nv.constant2.Subsample_Nearest_yuv444p16le_p010le .nv.constant0.Subsample_Nearest_yuv444p16le_p010le .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p010le .text.Subsample_Nearest_p016le_p010le_uv .nv.info.Subsample_Nearest_p016le_p010le_uv .nv.shared.Subsample_Nearest_p016le_p010le_uv .nv.constant2.Subsample_Nearest_p016le_p010le_uv .nv.constant0.Subsample_Nearest_p016le_p010le_uv .rel.nv.constant0.Subsample_Nearest_p016le_p010le_uv .text.Subsample_Nearest_p016le_p010le .nv.info.Subsample_Nearest_p016le_p010le .nv.shared.Subsample_Nearest_p016le_p010le .nv.constant2.Subsample_Nearest_p016le_p010le .nv.constant0.Subsample_Nearest_p016le_p010le .rel.nv.constant0.Subsample_Nearest_p016le_p010le .text.Subsample_Nearest_p010le_p010le_uv .nv.info.Subsample_Nearest_p010le_p010le_uv .nv.shared.Subsample_Nearest_p010le_p010le_uv .nv.constant2.Subsample_Nearest_p010le_p010le_uv .nv.constant0.Subsample_Nearest_p010le_p010le_uv .rel.nv.constant0.Subsample_Nearest_p010le_p010le_uv .text.Subsample_Nearest_p010le_p010le .nv.info.Subsample_Nearest_p010le_p010le .nv.shared.Subsample_Nearest_p010le_p010le .nv.constant2.Subsample_Nearest_p010le_p010le .nv.constant0.Subsample_Nearest_p010le_p010le .rel.nv.constant0.Subsample_Nearest_p010le_p010le .text.Subsample_Nearest_yuv444p_p010le_uv .nv.info.Subsample_Nearest_yuv444p_p010le_uv .nv.shared.Subsample_Nearest_yuv444p_p010le_uv .nv.constant2.Subsample_Nearest_yuv444p_p010le_uv .nv.constant0.Subsample_Nearest_yuv444p_p010le_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_p010le_uv .text.Subsample_Nearest_yuv444p_p010le .nv.info.Subsample_Nearest_yuv444p_p010le .nv.shared.Subsample_Nearest_yuv444p_p010le .nv.constant2.Subsample_Nearest_yuv444p_p010le .nv.constant0.Subsample_Nearest_yuv444p_p010le .rel.nv.constant0.Subsample_Nearest_yuv444p_p010le .text.Subsample_Nearest_nv12_p010le_uv .nv.info.Subsample_Nearest_nv12_p010le_uv .nv.shared.Subsample_Nearest_nv12_p010le_uv .nv.constant2.Subsample_Nearest_nv12_p010le_uv .nv.constant0.Subsample_Nearest_nv12_p010le_uv .rel.nv.constant0.Subsample_Nearest_nv12_p010le_uv .text.Subsample_Nearest_nv12_p010le .nv.info.Subsample_Nearest_nv12_p010le .nv.shared.Subsample_Nearest_nv12_p010le .nv.constant2.Subsample_Nearest_nv12_p010le .nv.constant0.Subsample_Nearest_nv12_p010le .rel.nv.constant0.Subsample_Nearest_nv12_p010le .text.Subsample_Nearest_yuv420p_p010le_uv .nv.info.Subsample_Nearest_yuv420p_p010le_uv .nv.shared.Subsample_Nearest_yuv420p_p010le_uv .nv.constant2.Subsample_Nearest_yuv420p_p010le_uv .nv.constant0.Subsample_Nearest_yuv420p_p010le_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_p010le_uv .text.Subsample_Nearest_yuv420p_p010le .nv.info.Subsample_Nearest_yuv420p_p010le .nv.shared.Subsample_Nearest_yuv420p_p010le .nv.constant2.Subsample_Nearest_yuv420p_p010le .nv.constant0.Subsample_Nearest_yuv420p_p010le .rel.nv.constant0.Subsample_Nearest_yuv420p_p010le .text.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p_uv .text.Subsample_Nearest_yuv444p16le_yuv444p .nv.info.Subsample_Nearest_yuv444p16le_yuv444p .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p .text.Subsample_Nearest_p016le_yuv444p_uv .nv.info.Subsample_Nearest_p016le_yuv444p_uv .nv.shared.Subsample_Nearest_p016le_yuv444p_uv .nv.constant2.Subsample_Nearest_p016le_yuv444p_uv .nv.constant0.Subsample_Nearest_p016le_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p_uv .text.Subsample_Nearest_p016le_yuv444p .nv.info.Subsample_Nearest_p016le_yuv444p .nv.shared.Subsample_Nearest_p016le_yuv444p .nv.constant2.Subsample_Nearest_p016le_yuv444p .nv.constant0.Subsample_Nearest_p016le_yuv444p .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p .text.Subsample_Nearest_p010le_yuv444p_uv .nv.info.Subsample_Nearest_p010le_yuv444p_uv .nv.shared.Subsample_Nearest_p010le_yuv444p_uv .nv.constant2.Subsample_Nearest_p010le_yuv444p_uv .nv.constant0.Subsample_Nearest_p010le_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p_uv .text.Subsample_Nearest_p010le_yuv444p .nv.info.Subsample_Nearest_p010le_yuv444p .nv.shared.Subsample_Nearest_p010le_yuv444p .nv.constant2.Subsample_Nearest_p010le_yuv444p .nv.constant0.Subsample_Nearest_p010le_yuv444p .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p .text.Subsample_Nearest_yuv444p_yuv444p_uv .nv.info.Subsample_Nearest_yuv444p_yuv444p_uv .nv.shared.Subsample_Nearest_yuv444p_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p_uv .text.Subsample_Nearest_yuv444p_yuv444p .nv.info.Subsample_Nearest_yuv444p_yuv444p .nv.shared.Subsample_Nearest_yuv444p_yuv444p .nv.constant2.Subsample_Nearest_yuv444p_yuv444p .nv.constant0.Subsample_Nearest_yuv444p_yuv444p .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p .text.Subsample_Nearest_nv12_yuv444p_uv .nv.info.Subsample_Nearest_nv12_yuv444p_uv .nv.shared.Subsample_Nearest_nv12_yuv444p_uv .nv.constant2.Subsample_Nearest_nv12_yuv444p_uv .nv.constant0.Subsample_Nearest_nv12_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p_uv .text.Subsample_Nearest_nv12_yuv444p .nv.info.Subsample_Nearest_nv12_yuv444p .nv.shared.Subsample_Nearest_nv12_yuv444p .nv.constant2.Subsample_Nearest_nv12_yuv444p .nv.constant0.Subsample_Nearest_nv12_yuv444p .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p .text.Subsample_Nearest_yuv420p_yuv444p_uv .nv.info.Subsample_Nearest_yuv420p_yuv444p_uv .nv.shared.Subsample_Nearest_yuv420p_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv444p_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p_uv .text.Subsample_Nearest_yuv420p_yuv444p .nv.info.Subsample_Nearest_yuv420p_yuv444p .nv.shared.Subsample_Nearest_yuv420p_yuv444p .nv.constant2.Subsample_Nearest_yuv420p_yuv444p .nv.constant0.Subsample_Nearest_yuv420p_yuv444p .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p .text.Subsample_Nearest_yuv444p16le_nv12_uv .nv.info.Subsample_Nearest_yuv444p16le_nv12_uv .nv.shared.Subsample_Nearest_yuv444p16le_nv12_uv .nv.constant2.Subsample_Nearest_yuv444p16le_nv12_uv .nv.constant0.Subsample_Nearest_yuv444p16le_nv12_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_nv12_uv .text.Subsample_Nearest_yuv444p16le_nv12 .nv.info.Subsample_Nearest_yuv444p16le_nv12 .nv.shared.Subsample_Nearest_yuv444p16le_nv12 .nv.constant2.Subsample_Nearest_yuv444p16le_nv12 .nv.constant0.Subsample_Nearest_yuv444p16le_nv12 .rel.nv.constant0.Subsample_Nearest_yuv444p16le_nv12 .text.Subsample_Nearest_p016le_nv12_uv .nv.info.Subsample_Nearest_p016le_nv12_uv .nv.shared.Subsample_Nearest_p016le_nv12_uv .nv.constant2.Subsample_Nearest_p016le_nv12_uv .nv.constant0.Subsample_Nearest_p016le_nv12_uv .rel.nv.constant0.Subsample_Nearest_p016le_nv12_uv .text.Subsample_Nearest_p016le_nv12 .nv.info.Subsample_Nearest_p016le_nv12 .nv.shared.Subsample_Nearest_p016le_nv12 .nv.constant2.Subsample_Nearest_p016le_nv12 .nv.constant0.Subsample_Nearest_p016le_nv12 .rel.nv.constant0.Subsample_Nearest_p016le_nv12 .text.Subsample_Nearest_p010le_nv12_uv .nv.info.Subsample_Nearest_p010le_nv12_uv .nv.shared.Subsample_Nearest_p010le_nv12_uv .nv.constant2.Subsample_Nearest_p010le_nv12_uv .nv.constant0.Subsample_Nearest_p010le_nv12_uv .rel.nv.constant0.Subsample_Nearest_p010le_nv12_uv .text.Subsample_Nearest_p010le_nv12 .nv.info.Subsample_Nearest_p010le_nv12 .nv.shared.Subsample_Nearest_p010le_nv12 .nv.constant2.Subsample_Nearest_p010le_nv12 .nv.constant0.Subsample_Nearest_p010le_nv12 .rel.nv.constant0.Subsample_Nearest_p010le_nv12 .text.Subsample_Nearest_yuv444p_nv12_uv .nv.info.Subsample_Nearest_yuv444p_nv12_uv .nv.shared.Subsample_Nearest_yuv444p_nv12_uv .nv.constant2.Subsample_Nearest_yuv444p_nv12_uv .nv.constant0.Subsample_Nearest_yuv444p_nv12_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_nv12_uv .text.Subsample_Nearest_yuv444p_nv12 .nv.info.Subsample_Nearest_yuv444p_nv12 .nv.shared.Subsample_Nearest_yuv444p_nv12 .nv.constant2.Subsample_Nearest_yuv444p_nv12 .nv.constant0.Subsample_Nearest_yuv444p_nv12 .rel.nv.constant0.Subsample_Nearest_yuv444p_nv12 .text.Subsample_Nearest_nv12_nv12_uv .nv.info.Subsample_Nearest_nv12_nv12_uv .nv.shared.Subsample_Nearest_nv12_nv12_uv .nv.constant2.Subsample_Nearest_nv12_nv12_uv .nv.constant0.Subsample_Nearest_nv12_nv12_uv .rel.nv.constant0.Subsample_Nearest_nv12_nv12_uv .text.Subsample_Nearest_nv12_nv12 .nv.info.Subsample_Nearest_nv12_nv12 .nv.shared.Subsample_Nearest_nv12_nv12 .nv.constant2.Subsample_Nearest_nv12_nv12 .nv.constant0.Subsample_Nearest_nv12_nv12 .rel.nv.constant0.Subsample_Nearest_nv12_nv12 .text.Subsample_Nearest_yuv420p_nv12_uv .nv.info.Subsample_Nearest_yuv420p_nv12_uv .nv.shared.Subsample_Nearest_yuv420p_nv12_uv .nv.constant2.Subsample_Nearest_yuv420p_nv12_uv .nv.constant0.Subsample_Nearest_yuv420p_nv12_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_nv12_uv .text.Subsample_Nearest_yuv420p_nv12 .nv.info.Subsample_Nearest_yuv420p_nv12 .nv.shared.Subsample_Nearest_yuv420p_nv12 .nv.constant2.Subsample_Nearest_yuv420p_nv12 .nv.constant0.Subsample_Nearest_yuv420p_nv12 .rel.nv.constant0.Subsample_Nearest_yuv420p_nv12 .text.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p_uv .text.Subsample_Nearest_yuv444p16le_yuv420p .nv.info.Subsample_Nearest_yuv444p16le_yuv420p .nv.shared.Subsample_Nearest_yuv444p16le_yuv420p .nv.constant2.Subsample_Nearest_yuv444p16le_yuv420p .nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p .text.Subsample_Nearest_p016le_yuv420p_uv .nv.info.Subsample_Nearest_p016le_yuv420p_uv .nv.shared.Subsample_Nearest_p016le_yuv420p_uv .nv.constant2.Subsample_Nearest_p016le_yuv420p_uv .nv.constant0.Subsample_Nearest_p016le_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_p016le_yuv420p_uv .text.Subsample_Nearest_p016le_yuv420p .nv.info.Subsample_Nearest_p016le_yuv420p .nv.shared.Subsample_Nearest_p016le_yuv420p .nv.constant2.Subsample_Nearest_p016le_yuv420p .nv.constant0.Subsample_Nearest_p016le_yuv420p .rel.nv.constant0.Subsample_Nearest_p016le_yuv420p .text.Subsample_Nearest_p010le_yuv420p_uv .nv.info.Subsample_Nearest_p010le_yuv420p_uv .nv.shared.Subsample_Nearest_p010le_yuv420p_uv .nv.constant2.Subsample_Nearest_p010le_yuv420p_uv .nv.constant0.Subsample_Nearest_p010le_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_p010le_yuv420p_uv .text.Subsample_Nearest_p010le_yuv420p .nv.info.Subsample_Nearest_p010le_yuv420p .nv.shared.Subsample_Nearest_p010le_yuv420p .nv.constant2.Subsample_Nearest_p010le_yuv420p .nv.constant0.Subsample_Nearest_p010le_yuv420p .rel.nv.constant0.Subsample_Nearest_p010le_yuv420p .text.Subsample_Nearest_yuv444p_yuv420p_uv .nv.info.Subsample_Nearest_yuv444p_yuv420p_uv .nv.shared.Subsample_Nearest_yuv444p_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv420p_uv .text.Subsample_Nearest_yuv444p_yuv420p .nv.info.Subsample_Nearest_yuv444p_yuv420p .nv.shared.Subsample_Nearest_yuv444p_yuv420p .nv.constant2.Subsample_Nearest_yuv444p_yuv420p .nv.constant0.Subsample_Nearest_yuv444p_yuv420p .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv420p .text.Subsample_Nearest_nv12_yuv420p_uv .nv.info.Subsample_Nearest_nv12_yuv420p_uv .nv.shared.Subsample_Nearest_nv12_yuv420p_uv .nv.constant2.Subsample_Nearest_nv12_yuv420p_uv .nv.constant0.Subsample_Nearest_nv12_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_nv12_yuv420p_uv .text.Subsample_Nearest_nv12_yuv420p .nv.info.Subsample_Nearest_nv12_yuv420p .nv.shared.Subsample_Nearest_nv12_yuv420p .nv.constant2.Subsample_Nearest_nv12_yuv420p .nv.constant0.Subsample_Nearest_nv12_yuv420p .rel.nv.constant0.Subsample_Nearest_nv12_yuv420p .text.Subsample_Nearest_yuv420p_yuv420p_uv .nv.info.Subsample_Nearest_yuv420p_yuv420p_uv .nv.shared.Subsample_Nearest_yuv420p_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv420p_uv .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv420p_uv .text.Subsample_Nearest_yuv420p_yuv420p .nv.info.Subsample_Nearest_yuv420p_yuv420p .nv.shared.Subsample_Nearest_yuv420p_yuv420p .nv.constant2.Subsample_Nearest_yuv420p_yuv420p .nv.constant0.Subsample_Nearest_yuv420p_yuv420p .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv420p .nv.callgraph .nv.prototype .nv.rel.action .shstrtab .strtab .symtab .symtab_shndx .nv.info .text.Subsample_Lanczos_rgb0_bgr0_uv .nv.info.Subsample_Lanczos_rgb0_bgr0_uv .nv.shared.Subsample_Lanczos_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Lanczos_rgb0_bgr0_uv .nv.constant0.Subsample_Lanczos_rgb0_bgr0_uv .text.Subsample_Lanczos_rgb0_bgr0 .nv.info.Subsample_Lanczos_rgb0_bgr0 .nv.shared.Subsample_Lanczos_rgb0_bgr0 .nv.constant2.Subsample_Lanczos_rgb0_bgr0 $Subsample_Lanczos_rgb0_bgr0$_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif $__internal_0_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_rgb0_bgr0 .nv.constant0.Subsample_Lanczos_rgb0_bgr0 .text.Subsample_Lanczos_bgr0_rgb0_uv .nv.info.Subsample_Lanczos_bgr0_rgb0_uv .nv.shared.Subsample_Lanczos_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Lanczos_bgr0_rgb0_uv .nv.constant0.Subsample_Lanczos_bgr0_rgb0_uv .text.Subsample_Lanczos_bgr0_rgb0 .nv.info.Subsample_Lanczos_bgr0_rgb0 .nv.shared.Subsample_Lanczos_bgr0_rgb0 .nv.constant2.Subsample_Lanczos_bgr0_rgb0 $Subsample_Lanczos_bgr0_rgb0$_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif $__internal_1_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_bgr0_rgb0 .nv.constant0.Subsample_Lanczos_bgr0_rgb0 .text.Subsample_Lanczos_rgb0_rgb0_uv .nv.info.Subsample_Lanczos_rgb0_rgb0_uv .nv.shared.Subsample_Lanczos_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Lanczos_rgb0_rgb0_uv .nv.constant0.Subsample_Lanczos_rgb0_rgb0_uv .text.Subsample_Lanczos_rgb0_rgb0 .nv.info.Subsample_Lanczos_rgb0_rgb0 .nv.shared.Subsample_Lanczos_rgb0_rgb0 .nv.constant2.Subsample_Lanczos_rgb0_rgb0 $Subsample_Lanczos_rgb0_rgb0$_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif $__internal_2_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_rgb0_rgb0 .nv.constant0.Subsample_Lanczos_rgb0_rgb0 .text.Subsample_Lanczos_bgr0_bgr0_uv .nv.info.Subsample_Lanczos_bgr0_bgr0_uv .nv.shared.Subsample_Lanczos_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Lanczos_bgr0_bgr0_uv .nv.constant0.Subsample_Lanczos_bgr0_bgr0_uv .text.Subsample_Lanczos_bgr0_bgr0 .nv.info.Subsample_Lanczos_bgr0_bgr0 .nv.shared.Subsample_Lanczos_bgr0_bgr0 .nv.constant2.Subsample_Lanczos_bgr0_bgr0 $Subsample_Lanczos_bgr0_bgr0$_ZL17Subsample_BicubicI6uchar4XadL_ZL14lanczos_coeffsffEEET_yiiiiiiif $__internal_3_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_bgr0_bgr0 .nv.constant0.Subsample_Lanczos_bgr0_bgr0 .text.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv $__internal_4_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le_uv .text.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p16le $__internal_5_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p16le .text.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.info.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_p016le_yuv444p16le_uv $__internal_6_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_p016le_yuv444p16le_uv .text.Subsample_Lanczos_p016le_yuv444p16le .nv.info.Subsample_Lanczos_p016le_yuv444p16le .nv.shared.Subsample_Lanczos_p016le_yuv444p16le .nv.constant2.Subsample_Lanczos_p016le_yuv444p16le $__internal_7_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p16le .nv.constant0.Subsample_Lanczos_p016le_yuv444p16le .text.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.info.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.shared.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_p010le_yuv444p16le_uv $__internal_8_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_p010le_yuv444p16le_uv .text.Subsample_Lanczos_p010le_yuv444p16le .nv.info.Subsample_Lanczos_p010le_yuv444p16le .nv.shared.Subsample_Lanczos_p010le_yuv444p16le .nv.constant2.Subsample_Lanczos_p010le_yuv444p16le $__internal_9_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p16le .nv.constant0.Subsample_Lanczos_p010le_yuv444p16le .text.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p16le_uv $__internal_10_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le_uv .text.Subsample_Lanczos_yuv444p_yuv444p16le .nv.info.Subsample_Lanczos_yuv444p_yuv444p16le .nv.shared.Subsample_Lanczos_yuv444p_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p16le $__internal_11_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p16le .text.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.info.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.shared.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_nv12_yuv444p16le_uv $__internal_12_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_nv12_yuv444p16le_uv .text.Subsample_Lanczos_nv12_yuv444p16le .nv.info.Subsample_Lanczos_nv12_yuv444p16le .nv.shared.Subsample_Lanczos_nv12_yuv444p16le .nv.constant2.Subsample_Lanczos_nv12_yuv444p16le $__internal_13_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p16le .nv.constant0.Subsample_Lanczos_nv12_yuv444p16le .text.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.info.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p16le_uv $__internal_14_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le_uv .text.Subsample_Lanczos_yuv420p_yuv444p16le .nv.info.Subsample_Lanczos_yuv420p_yuv444p16le .nv.shared.Subsample_Lanczos_yuv420p_yuv444p16le .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p16le $__internal_15_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p16le .text.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.info.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_p016le_uv $__internal_16_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p016le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_p016le_uv .text.Subsample_Lanczos_yuv444p16le_p016le .nv.info.Subsample_Lanczos_yuv444p16le_p016le .nv.shared.Subsample_Lanczos_yuv444p16le_p016le .nv.constant2.Subsample_Lanczos_yuv444p16le_p016le $__internal_17_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p016le .nv.constant0.Subsample_Lanczos_yuv444p16le_p016le .text.Subsample_Lanczos_p016le_p016le_uv .nv.info.Subsample_Lanczos_p016le_p016le_uv .nv.shared.Subsample_Lanczos_p016le_p016le_uv .nv.constant2.Subsample_Lanczos_p016le_p016le_uv $__internal_18_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_p016le_uv .nv.constant0.Subsample_Lanczos_p016le_p016le_uv .text.Subsample_Lanczos_p016le_p016le .nv.info.Subsample_Lanczos_p016le_p016le .nv.shared.Subsample_Lanczos_p016le_p016le .nv.constant2.Subsample_Lanczos_p016le_p016le $__internal_19_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_p016le .nv.constant0.Subsample_Lanczos_p016le_p016le .text.Subsample_Lanczos_p010le_p016le_uv .nv.info.Subsample_Lanczos_p010le_p016le_uv .nv.shared.Subsample_Lanczos_p010le_p016le_uv .nv.constant2.Subsample_Lanczos_p010le_p016le_uv $__internal_20_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_p016le_uv .nv.constant0.Subsample_Lanczos_p010le_p016le_uv .text.Subsample_Lanczos_p010le_p016le .nv.info.Subsample_Lanczos_p010le_p016le .nv.shared.Subsample_Lanczos_p010le_p016le .nv.constant2.Subsample_Lanczos_p010le_p016le $__internal_21_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_p016le .nv.constant0.Subsample_Lanczos_p010le_p016le .text.Subsample_Lanczos_yuv444p_p016le_uv .nv.info.Subsample_Lanczos_yuv444p_p016le_uv .nv.shared.Subsample_Lanczos_yuv444p_p016le_uv .nv.constant2.Subsample_Lanczos_yuv444p_p016le_uv $__internal_22_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_p016le_uv .nv.constant0.Subsample_Lanczos_yuv444p_p016le_uv .text.Subsample_Lanczos_yuv444p_p016le .nv.info.Subsample_Lanczos_yuv444p_p016le .nv.shared.Subsample_Lanczos_yuv444p_p016le .nv.constant2.Subsample_Lanczos_yuv444p_p016le $__internal_23_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_p016le .nv.constant0.Subsample_Lanczos_yuv444p_p016le .text.Subsample_Lanczos_nv12_p016le_uv .nv.info.Subsample_Lanczos_nv12_p016le_uv .nv.shared.Subsample_Lanczos_nv12_p016le_uv .nv.constant2.Subsample_Lanczos_nv12_p016le_uv $__internal_24_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_p016le_uv .nv.constant0.Subsample_Lanczos_nv12_p016le_uv .text.Subsample_Lanczos_nv12_p016le .nv.info.Subsample_Lanczos_nv12_p016le .nv.shared.Subsample_Lanczos_nv12_p016le .nv.constant2.Subsample_Lanczos_nv12_p016le $__internal_25_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_p016le .nv.constant0.Subsample_Lanczos_nv12_p016le .text.Subsample_Lanczos_yuv420p_p016le_uv .nv.info.Subsample_Lanczos_yuv420p_p016le_uv .nv.shared.Subsample_Lanczos_yuv420p_p016le_uv .nv.constant2.Subsample_Lanczos_yuv420p_p016le_uv $__internal_26_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_p016le_uv .nv.constant0.Subsample_Lanczos_yuv420p_p016le_uv .text.Subsample_Lanczos_yuv420p_p016le .nv.info.Subsample_Lanczos_yuv420p_p016le .nv.shared.Subsample_Lanczos_yuv420p_p016le .nv.constant2.Subsample_Lanczos_yuv420p_p016le $__internal_27_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_p016le .nv.constant0.Subsample_Lanczos_yuv420p_p016le .text.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.info.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.shared.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_p010le_uv $__internal_28_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p010le_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_p010le_uv .text.Subsample_Lanczos_yuv444p16le_p010le .nv.info.Subsample_Lanczos_yuv444p16le_p010le .nv.shared.Subsample_Lanczos_yuv444p16le_p010le .nv.constant2.Subsample_Lanczos_yuv444p16le_p010le $__internal_29_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_p010le .nv.constant0.Subsample_Lanczos_yuv444p16le_p010le .text.Subsample_Lanczos_p016le_p010le_uv .nv.info.Subsample_Lanczos_p016le_p010le_uv .nv.shared.Subsample_Lanczos_p016le_p010le_uv .nv.constant2.Subsample_Lanczos_p016le_p010le_uv $__internal_30_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_p010le_uv .nv.constant0.Subsample_Lanczos_p016le_p010le_uv .text.Subsample_Lanczos_p016le_p010le .nv.info.Subsample_Lanczos_p016le_p010le .nv.shared.Subsample_Lanczos_p016le_p010le .nv.constant2.Subsample_Lanczos_p016le_p010le $__internal_31_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_p010le .nv.constant0.Subsample_Lanczos_p016le_p010le .text.Subsample_Lanczos_p010le_p010le_uv .nv.info.Subsample_Lanczos_p010le_p010le_uv .nv.shared.Subsample_Lanczos_p010le_p010le_uv .nv.constant2.Subsample_Lanczos_p010le_p010le_uv $__internal_32_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_p010le_uv .nv.constant0.Subsample_Lanczos_p010le_p010le_uv .text.Subsample_Lanczos_p010le_p010le .nv.info.Subsample_Lanczos_p010le_p010le .nv.shared.Subsample_Lanczos_p010le_p010le .nv.constant2.Subsample_Lanczos_p010le_p010le $__internal_33_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_p010le .nv.constant0.Subsample_Lanczos_p010le_p010le .text.Subsample_Lanczos_yuv444p_p010le_uv .nv.info.Subsample_Lanczos_yuv444p_p010le_uv .nv.shared.Subsample_Lanczos_yuv444p_p010le_uv .nv.constant2.Subsample_Lanczos_yuv444p_p010le_uv $__internal_34_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_p010le_uv .nv.constant0.Subsample_Lanczos_yuv444p_p010le_uv .text.Subsample_Lanczos_yuv444p_p010le .nv.info.Subsample_Lanczos_yuv444p_p010le .nv.shared.Subsample_Lanczos_yuv444p_p010le .nv.constant2.Subsample_Lanczos_yuv444p_p010le $__internal_35_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_p010le .nv.constant0.Subsample_Lanczos_yuv444p_p010le .text.Subsample_Lanczos_nv12_p010le_uv .nv.info.Subsample_Lanczos_nv12_p010le_uv .nv.shared.Subsample_Lanczos_nv12_p010le_uv .nv.constant2.Subsample_Lanczos_nv12_p010le_uv $__internal_36_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_p010le_uv .nv.constant0.Subsample_Lanczos_nv12_p010le_uv .text.Subsample_Lanczos_nv12_p010le .nv.info.Subsample_Lanczos_nv12_p010le .nv.shared.Subsample_Lanczos_nv12_p010le .nv.constant2.Subsample_Lanczos_nv12_p010le $__internal_37_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_p010le .nv.constant0.Subsample_Lanczos_nv12_p010le .text.Subsample_Lanczos_yuv420p_p010le_uv .nv.info.Subsample_Lanczos_yuv420p_p010le_uv .nv.shared.Subsample_Lanczos_yuv420p_p010le_uv .nv.constant2.Subsample_Lanczos_yuv420p_p010le_uv $__internal_38_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_p010le_uv .nv.constant0.Subsample_Lanczos_yuv420p_p010le_uv .text.Subsample_Lanczos_yuv420p_p010le .nv.info.Subsample_Lanczos_yuv420p_p010le .nv.shared.Subsample_Lanczos_yuv420p_p010le .nv.constant2.Subsample_Lanczos_yuv420p_p010le $__internal_39_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_p010le .nv.constant0.Subsample_Lanczos_yuv420p_p010le .text.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p_uv $__internal_40_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p_uv .text.Subsample_Lanczos_yuv444p16le_yuv444p .nv.info.Subsample_Lanczos_yuv444p16le_yuv444p .nv.shared.Subsample_Lanczos_yuv444p16le_yuv444p .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv444p $__internal_41_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv444p .text.Subsample_Lanczos_p016le_yuv444p_uv .nv.info.Subsample_Lanczos_p016le_yuv444p_uv .nv.shared.Subsample_Lanczos_p016le_yuv444p_uv .nv.constant2.Subsample_Lanczos_p016le_yuv444p_uv $__internal_42_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p_uv .nv.constant0.Subsample_Lanczos_p016le_yuv444p_uv .text.Subsample_Lanczos_p016le_yuv444p .nv.info.Subsample_Lanczos_p016le_yuv444p .nv.shared.Subsample_Lanczos_p016le_yuv444p .nv.constant2.Subsample_Lanczos_p016le_yuv444p $__internal_43_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv444p .nv.constant0.Subsample_Lanczos_p016le_yuv444p .text.Subsample_Lanczos_p010le_yuv444p_uv .nv.info.Subsample_Lanczos_p010le_yuv444p_uv .nv.shared.Subsample_Lanczos_p010le_yuv444p_uv .nv.constant2.Subsample_Lanczos_p010le_yuv444p_uv $__internal_44_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p_uv .nv.constant0.Subsample_Lanczos_p010le_yuv444p_uv .text.Subsample_Lanczos_p010le_yuv444p .nv.info.Subsample_Lanczos_p010le_yuv444p .nv.shared.Subsample_Lanczos_p010le_yuv444p .nv.constant2.Subsample_Lanczos_p010le_yuv444p $__internal_45_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv444p .nv.constant0.Subsample_Lanczos_p010le_yuv444p .text.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.info.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p_uv $__internal_46_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p_uv .text.Subsample_Lanczos_yuv444p_yuv444p .nv.info.Subsample_Lanczos_yuv444p_yuv444p .nv.shared.Subsample_Lanczos_yuv444p_yuv444p .nv.constant2.Subsample_Lanczos_yuv444p_yuv444p $__internal_47_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv444p .nv.constant0.Subsample_Lanczos_yuv444p_yuv444p .text.Subsample_Lanczos_nv12_yuv444p_uv .nv.info.Subsample_Lanczos_nv12_yuv444p_uv .nv.shared.Subsample_Lanczos_nv12_yuv444p_uv .nv.constant2.Subsample_Lanczos_nv12_yuv444p_uv $__internal_48_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p_uv .nv.constant0.Subsample_Lanczos_nv12_yuv444p_uv .text.Subsample_Lanczos_nv12_yuv444p .nv.info.Subsample_Lanczos_nv12_yuv444p .nv.shared.Subsample_Lanczos_nv12_yuv444p .nv.constant2.Subsample_Lanczos_nv12_yuv444p $__internal_49_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv444p .nv.constant0.Subsample_Lanczos_nv12_yuv444p .text.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.info.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p_uv $__internal_50_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p_uv .text.Subsample_Lanczos_yuv420p_yuv444p .nv.info.Subsample_Lanczos_yuv420p_yuv444p .nv.shared.Subsample_Lanczos_yuv420p_yuv444p .nv.constant2.Subsample_Lanczos_yuv420p_yuv444p $__internal_51_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv444p .nv.constant0.Subsample_Lanczos_yuv420p_yuv444p .text.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.info.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.shared.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_nv12_uv $__internal_52_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_nv12_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_nv12_uv .text.Subsample_Lanczos_yuv444p16le_nv12 .nv.info.Subsample_Lanczos_yuv444p16le_nv12 .nv.shared.Subsample_Lanczos_yuv444p16le_nv12 .nv.constant2.Subsample_Lanczos_yuv444p16le_nv12 $__internal_53_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_nv12 .nv.constant0.Subsample_Lanczos_yuv444p16le_nv12 .text.Subsample_Lanczos_p016le_nv12_uv .nv.info.Subsample_Lanczos_p016le_nv12_uv .nv.shared.Subsample_Lanczos_p016le_nv12_uv .nv.constant2.Subsample_Lanczos_p016le_nv12_uv $__internal_54_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_nv12_uv .nv.constant0.Subsample_Lanczos_p016le_nv12_uv .text.Subsample_Lanczos_p016le_nv12 .nv.info.Subsample_Lanczos_p016le_nv12 .nv.shared.Subsample_Lanczos_p016le_nv12 .nv.constant2.Subsample_Lanczos_p016le_nv12 $__internal_55_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_nv12 .nv.constant0.Subsample_Lanczos_p016le_nv12 .text.Subsample_Lanczos_p010le_nv12_uv .nv.info.Subsample_Lanczos_p010le_nv12_uv .nv.shared.Subsample_Lanczos_p010le_nv12_uv .nv.constant2.Subsample_Lanczos_p010le_nv12_uv $__internal_56_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_nv12_uv .nv.constant0.Subsample_Lanczos_p010le_nv12_uv .text.Subsample_Lanczos_p010le_nv12 .nv.info.Subsample_Lanczos_p010le_nv12 .nv.shared.Subsample_Lanczos_p010le_nv12 .nv.constant2.Subsample_Lanczos_p010le_nv12 $__internal_57_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_nv12 .nv.constant0.Subsample_Lanczos_p010le_nv12 .text.Subsample_Lanczos_yuv444p_nv12_uv .nv.info.Subsample_Lanczos_yuv444p_nv12_uv .nv.shared.Subsample_Lanczos_yuv444p_nv12_uv .nv.constant2.Subsample_Lanczos_yuv444p_nv12_uv $__internal_58_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_nv12_uv .nv.constant0.Subsample_Lanczos_yuv444p_nv12_uv .text.Subsample_Lanczos_yuv444p_nv12 .nv.info.Subsample_Lanczos_yuv444p_nv12 .nv.shared.Subsample_Lanczos_yuv444p_nv12 .nv.constant2.Subsample_Lanczos_yuv444p_nv12 $__internal_59_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_nv12 .nv.constant0.Subsample_Lanczos_yuv444p_nv12 .text.Subsample_Lanczos_nv12_nv12_uv .nv.info.Subsample_Lanczos_nv12_nv12_uv .nv.shared.Subsample_Lanczos_nv12_nv12_uv .nv.constant2.Subsample_Lanczos_nv12_nv12_uv $__internal_60_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_nv12_uv .nv.constant0.Subsample_Lanczos_nv12_nv12_uv .text.Subsample_Lanczos_nv12_nv12 .nv.info.Subsample_Lanczos_nv12_nv12 .nv.shared.Subsample_Lanczos_nv12_nv12 .nv.constant2.Subsample_Lanczos_nv12_nv12 $__internal_61_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_nv12 .nv.constant0.Subsample_Lanczos_nv12_nv12 .text.Subsample_Lanczos_yuv420p_nv12_uv .nv.info.Subsample_Lanczos_yuv420p_nv12_uv .nv.shared.Subsample_Lanczos_yuv420p_nv12_uv .nv.constant2.Subsample_Lanczos_yuv420p_nv12_uv $__internal_62_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_nv12_uv .nv.constant0.Subsample_Lanczos_yuv420p_nv12_uv .text.Subsample_Lanczos_yuv420p_nv12 .nv.info.Subsample_Lanczos_yuv420p_nv12 .nv.shared.Subsample_Lanczos_yuv420p_nv12 .nv.constant2.Subsample_Lanczos_yuv420p_nv12 $__internal_63_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_nv12 .nv.constant0.Subsample_Lanczos_yuv420p_nv12 .text.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.info.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv420p_uv $__internal_64_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p_uv .text.Subsample_Lanczos_yuv444p16le_yuv420p .nv.info.Subsample_Lanczos_yuv444p16le_yuv420p .nv.shared.Subsample_Lanczos_yuv444p16le_yuv420p .nv.constant2.Subsample_Lanczos_yuv444p16le_yuv420p $__internal_65_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p .nv.constant0.Subsample_Lanczos_yuv444p16le_yuv420p .text.Subsample_Lanczos_p016le_yuv420p_uv .nv.info.Subsample_Lanczos_p016le_yuv420p_uv .nv.shared.Subsample_Lanczos_p016le_yuv420p_uv .nv.constant2.Subsample_Lanczos_p016le_yuv420p_uv $__internal_66_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv420p_uv .nv.constant0.Subsample_Lanczos_p016le_yuv420p_uv .text.Subsample_Lanczos_p016le_yuv420p .nv.info.Subsample_Lanczos_p016le_yuv420p .nv.shared.Subsample_Lanczos_p016le_yuv420p .nv.constant2.Subsample_Lanczos_p016le_yuv420p $__internal_67_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p016le_yuv420p .nv.constant0.Subsample_Lanczos_p016le_yuv420p .text.Subsample_Lanczos_p010le_yuv420p_uv .nv.info.Subsample_Lanczos_p010le_yuv420p_uv .nv.shared.Subsample_Lanczos_p010le_yuv420p_uv .nv.constant2.Subsample_Lanczos_p010le_yuv420p_uv $__internal_68_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv420p_uv .nv.constant0.Subsample_Lanczos_p010le_yuv420p_uv .text.Subsample_Lanczos_p010le_yuv420p .nv.info.Subsample_Lanczos_p010le_yuv420p .nv.shared.Subsample_Lanczos_p010le_yuv420p .nv.constant2.Subsample_Lanczos_p010le_yuv420p $__internal_69_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_p010le_yuv420p .nv.constant0.Subsample_Lanczos_p010le_yuv420p .text.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.info.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv444p_yuv420p_uv $__internal_70_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv444p_yuv420p_uv .text.Subsample_Lanczos_yuv444p_yuv420p .nv.info.Subsample_Lanczos_yuv444p_yuv420p .nv.shared.Subsample_Lanczos_yuv444p_yuv420p .nv.constant2.Subsample_Lanczos_yuv444p_yuv420p $__internal_71_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv444p_yuv420p .nv.constant0.Subsample_Lanczos_yuv444p_yuv420p .text.Subsample_Lanczos_nv12_yuv420p_uv .nv.info.Subsample_Lanczos_nv12_yuv420p_uv .nv.shared.Subsample_Lanczos_nv12_yuv420p_uv .nv.constant2.Subsample_Lanczos_nv12_yuv420p_uv $__internal_72_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv420p_uv .nv.constant0.Subsample_Lanczos_nv12_yuv420p_uv .text.Subsample_Lanczos_nv12_yuv420p .nv.info.Subsample_Lanczos_nv12_yuv420p .nv.shared.Subsample_Lanczos_nv12_yuv420p .nv.constant2.Subsample_Lanczos_nv12_yuv420p $__internal_73_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_nv12_yuv420p .nv.constant0.Subsample_Lanczos_nv12_yuv420p .text.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.info.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.shared.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.constant2.Subsample_Lanczos_yuv420p_yuv420p_uv $__internal_74_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv420p_uv .nv.constant0.Subsample_Lanczos_yuv420p_yuv420p_uv .text.Subsample_Lanczos_yuv420p_yuv420p .nv.info.Subsample_Lanczos_yuv420p_yuv420p .nv.shared.Subsample_Lanczos_yuv420p_yuv420p .nv.constant2.Subsample_Lanczos_yuv420p_yuv420p $__internal_75_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Lanczos_yuv420p_yuv420p .nv.constant0.Subsample_Lanczos_yuv420p_yuv420p .text.Subsample_Bicubic_rgb0_bgr0_uv .nv.info.Subsample_Bicubic_rgb0_bgr0_uv .nv.shared.Subsample_Bicubic_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Bicubic_rgb0_bgr0_uv .nv.constant0.Subsample_Bicubic_rgb0_bgr0_uv .text.Subsample_Bicubic_rgb0_bgr0 .nv.info.Subsample_Bicubic_rgb0_bgr0 .nv.shared.Subsample_Bicubic_rgb0_bgr0 .nv.constant2.Subsample_Bicubic_rgb0_bgr0 $__internal_76_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_rgb0_bgr0 .nv.constant0.Subsample_Bicubic_rgb0_bgr0 .text.Subsample_Bicubic_bgr0_rgb0_uv .nv.info.Subsample_Bicubic_bgr0_rgb0_uv .nv.shared.Subsample_Bicubic_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Bicubic_bgr0_rgb0_uv .nv.constant0.Subsample_Bicubic_bgr0_rgb0_uv .text.Subsample_Bicubic_bgr0_rgb0 .nv.info.Subsample_Bicubic_bgr0_rgb0 .nv.shared.Subsample_Bicubic_bgr0_rgb0 .nv.constant2.Subsample_Bicubic_bgr0_rgb0 $__internal_77_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_bgr0_rgb0 .nv.constant0.Subsample_Bicubic_bgr0_rgb0 .text.Subsample_Bicubic_rgb0_rgb0_uv .nv.info.Subsample_Bicubic_rgb0_rgb0_uv .nv.shared.Subsample_Bicubic_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Bicubic_rgb0_rgb0_uv .nv.constant0.Subsample_Bicubic_rgb0_rgb0_uv .text.Subsample_Bicubic_rgb0_rgb0 .nv.info.Subsample_Bicubic_rgb0_rgb0 .nv.shared.Subsample_Bicubic_rgb0_rgb0 .nv.constant2.Subsample_Bicubic_rgb0_rgb0 $__internal_78_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_rgb0_rgb0 .nv.constant0.Subsample_Bicubic_rgb0_rgb0 .text.Subsample_Bicubic_bgr0_bgr0_uv .nv.info.Subsample_Bicubic_bgr0_bgr0_uv .nv.shared.Subsample_Bicubic_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Bicubic_bgr0_bgr0_uv .nv.constant0.Subsample_Bicubic_bgr0_bgr0_uv .text.Subsample_Bicubic_bgr0_bgr0 .nv.info.Subsample_Bicubic_bgr0_bgr0 .nv.shared.Subsample_Bicubic_bgr0_bgr0 .nv.constant2.Subsample_Bicubic_bgr0_bgr0 $__internal_79_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_bgr0_bgr0 .nv.constant0.Subsample_Bicubic_bgr0_bgr0 .text.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv $__internal_80_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le_uv .text.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p16le $__internal_81_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p16le .text.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.info.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_p016le_yuv444p16le_uv $__internal_82_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_p016le_yuv444p16le_uv .text.Subsample_Bicubic_p016le_yuv444p16le .nv.info.Subsample_Bicubic_p016le_yuv444p16le .nv.shared.Subsample_Bicubic_p016le_yuv444p16le .nv.constant2.Subsample_Bicubic_p016le_yuv444p16le $__internal_83_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p16le .nv.constant0.Subsample_Bicubic_p016le_yuv444p16le .text.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.info.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.shared.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_p010le_yuv444p16le_uv $__internal_84_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_p010le_yuv444p16le_uv .text.Subsample_Bicubic_p010le_yuv444p16le .nv.info.Subsample_Bicubic_p010le_yuv444p16le .nv.shared.Subsample_Bicubic_p010le_yuv444p16le .nv.constant2.Subsample_Bicubic_p010le_yuv444p16le $__internal_85_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p16le .nv.constant0.Subsample_Bicubic_p010le_yuv444p16le .text.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p16le_uv $__internal_86_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le_uv .text.Subsample_Bicubic_yuv444p_yuv444p16le .nv.info.Subsample_Bicubic_yuv444p_yuv444p16le .nv.shared.Subsample_Bicubic_yuv444p_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p16le $__internal_87_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p16le .text.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.info.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.shared.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_nv12_yuv444p16le_uv $__internal_88_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_nv12_yuv444p16le_uv .text.Subsample_Bicubic_nv12_yuv444p16le .nv.info.Subsample_Bicubic_nv12_yuv444p16le .nv.shared.Subsample_Bicubic_nv12_yuv444p16le .nv.constant2.Subsample_Bicubic_nv12_yuv444p16le $__internal_89_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p16le .nv.constant0.Subsample_Bicubic_nv12_yuv444p16le .text.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.info.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p16le_uv $__internal_90_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le_uv .text.Subsample_Bicubic_yuv420p_yuv444p16le .nv.info.Subsample_Bicubic_yuv420p_yuv444p16le .nv.shared.Subsample_Bicubic_yuv420p_yuv444p16le .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p16le $__internal_91_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p16le .text.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.info.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_p016le_uv $__internal_92_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p016le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_p016le_uv .text.Subsample_Bicubic_yuv444p16le_p016le .nv.info.Subsample_Bicubic_yuv444p16le_p016le .nv.shared.Subsample_Bicubic_yuv444p16le_p016le .nv.constant2.Subsample_Bicubic_yuv444p16le_p016le $__internal_93_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p016le .nv.constant0.Subsample_Bicubic_yuv444p16le_p016le .text.Subsample_Bicubic_p016le_p016le_uv .nv.info.Subsample_Bicubic_p016le_p016le_uv .nv.shared.Subsample_Bicubic_p016le_p016le_uv .nv.constant2.Subsample_Bicubic_p016le_p016le_uv $__internal_94_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_p016le_uv .nv.constant0.Subsample_Bicubic_p016le_p016le_uv .text.Subsample_Bicubic_p016le_p016le .nv.info.Subsample_Bicubic_p016le_p016le .nv.shared.Subsample_Bicubic_p016le_p016le .nv.constant2.Subsample_Bicubic_p016le_p016le $__internal_95_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_p016le .nv.constant0.Subsample_Bicubic_p016le_p016le .text.Subsample_Bicubic_p010le_p016le_uv .nv.info.Subsample_Bicubic_p010le_p016le_uv .nv.shared.Subsample_Bicubic_p010le_p016le_uv .nv.constant2.Subsample_Bicubic_p010le_p016le_uv $__internal_96_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_p016le_uv .nv.constant0.Subsample_Bicubic_p010le_p016le_uv .text.Subsample_Bicubic_p010le_p016le .nv.info.Subsample_Bicubic_p010le_p016le .nv.shared.Subsample_Bicubic_p010le_p016le .nv.constant2.Subsample_Bicubic_p010le_p016le $__internal_97_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_p016le .nv.constant0.Subsample_Bicubic_p010le_p016le .text.Subsample_Bicubic_yuv444p_p016le_uv .nv.info.Subsample_Bicubic_yuv444p_p016le_uv .nv.shared.Subsample_Bicubic_yuv444p_p016le_uv .nv.constant2.Subsample_Bicubic_yuv444p_p016le_uv $__internal_98_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_p016le_uv .nv.constant0.Subsample_Bicubic_yuv444p_p016le_uv .text.Subsample_Bicubic_yuv444p_p016le .nv.info.Subsample_Bicubic_yuv444p_p016le .nv.shared.Subsample_Bicubic_yuv444p_p016le .nv.constant2.Subsample_Bicubic_yuv444p_p016le $__internal_99_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_p016le .nv.constant0.Subsample_Bicubic_yuv444p_p016le .text.Subsample_Bicubic_nv12_p016le_uv .nv.info.Subsample_Bicubic_nv12_p016le_uv .nv.shared.Subsample_Bicubic_nv12_p016le_uv .nv.constant2.Subsample_Bicubic_nv12_p016le_uv $__internal_100_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_p016le_uv .nv.constant0.Subsample_Bicubic_nv12_p016le_uv .text.Subsample_Bicubic_nv12_p016le .nv.info.Subsample_Bicubic_nv12_p016le .nv.shared.Subsample_Bicubic_nv12_p016le .nv.constant2.Subsample_Bicubic_nv12_p016le $__internal_101_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_p016le .nv.constant0.Subsample_Bicubic_nv12_p016le .text.Subsample_Bicubic_yuv420p_p016le_uv .nv.info.Subsample_Bicubic_yuv420p_p016le_uv .nv.shared.Subsample_Bicubic_yuv420p_p016le_uv .nv.constant2.Subsample_Bicubic_yuv420p_p016le_uv $__internal_102_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_p016le_uv .nv.constant0.Subsample_Bicubic_yuv420p_p016le_uv .text.Subsample_Bicubic_yuv420p_p016le .nv.info.Subsample_Bicubic_yuv420p_p016le .nv.shared.Subsample_Bicubic_yuv420p_p016le .nv.constant2.Subsample_Bicubic_yuv420p_p016le $__internal_103_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_p016le .nv.constant0.Subsample_Bicubic_yuv420p_p016le .text.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.info.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.shared.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_p010le_uv $__internal_104_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p010le_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_p010le_uv .text.Subsample_Bicubic_yuv444p16le_p010le .nv.info.Subsample_Bicubic_yuv444p16le_p010le .nv.shared.Subsample_Bicubic_yuv444p16le_p010le .nv.constant2.Subsample_Bicubic_yuv444p16le_p010le $__internal_105_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_p010le .nv.constant0.Subsample_Bicubic_yuv444p16le_p010le .text.Subsample_Bicubic_p016le_p010le_uv .nv.info.Subsample_Bicubic_p016le_p010le_uv .nv.shared.Subsample_Bicubic_p016le_p010le_uv .nv.constant2.Subsample_Bicubic_p016le_p010le_uv $__internal_106_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_p010le_uv .nv.constant0.Subsample_Bicubic_p016le_p010le_uv .text.Subsample_Bicubic_p016le_p010le .nv.info.Subsample_Bicubic_p016le_p010le .nv.shared.Subsample_Bicubic_p016le_p010le .nv.constant2.Subsample_Bicubic_p016le_p010le $__internal_107_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_p010le .nv.constant0.Subsample_Bicubic_p016le_p010le .text.Subsample_Bicubic_p010le_p010le_uv .nv.info.Subsample_Bicubic_p010le_p010le_uv .nv.shared.Subsample_Bicubic_p010le_p010le_uv .nv.constant2.Subsample_Bicubic_p010le_p010le_uv $__internal_108_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_p010le_uv .nv.constant0.Subsample_Bicubic_p010le_p010le_uv .text.Subsample_Bicubic_p010le_p010le .nv.info.Subsample_Bicubic_p010le_p010le .nv.shared.Subsample_Bicubic_p010le_p010le .nv.constant2.Subsample_Bicubic_p010le_p010le $__internal_109_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_p010le .nv.constant0.Subsample_Bicubic_p010le_p010le .text.Subsample_Bicubic_yuv444p_p010le_uv .nv.info.Subsample_Bicubic_yuv444p_p010le_uv .nv.shared.Subsample_Bicubic_yuv444p_p010le_uv .nv.constant2.Subsample_Bicubic_yuv444p_p010le_uv $__internal_110_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_p010le_uv .nv.constant0.Subsample_Bicubic_yuv444p_p010le_uv .text.Subsample_Bicubic_yuv444p_p010le .nv.info.Subsample_Bicubic_yuv444p_p010le .nv.shared.Subsample_Bicubic_yuv444p_p010le .nv.constant2.Subsample_Bicubic_yuv444p_p010le $__internal_111_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_p010le .nv.constant0.Subsample_Bicubic_yuv444p_p010le .text.Subsample_Bicubic_nv12_p010le_uv .nv.info.Subsample_Bicubic_nv12_p010le_uv .nv.shared.Subsample_Bicubic_nv12_p010le_uv .nv.constant2.Subsample_Bicubic_nv12_p010le_uv $__internal_112_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_p010le_uv .nv.constant0.Subsample_Bicubic_nv12_p010le_uv .text.Subsample_Bicubic_nv12_p010le .nv.info.Subsample_Bicubic_nv12_p010le .nv.shared.Subsample_Bicubic_nv12_p010le .nv.constant2.Subsample_Bicubic_nv12_p010le $__internal_113_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_p010le .nv.constant0.Subsample_Bicubic_nv12_p010le .text.Subsample_Bicubic_yuv420p_p010le_uv .nv.info.Subsample_Bicubic_yuv420p_p010le_uv .nv.shared.Subsample_Bicubic_yuv420p_p010le_uv .nv.constant2.Subsample_Bicubic_yuv420p_p010le_uv $__internal_114_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_p010le_uv .nv.constant0.Subsample_Bicubic_yuv420p_p010le_uv .text.Subsample_Bicubic_yuv420p_p010le .nv.info.Subsample_Bicubic_yuv420p_p010le .nv.shared.Subsample_Bicubic_yuv420p_p010le .nv.constant2.Subsample_Bicubic_yuv420p_p010le $__internal_115_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_p010le .nv.constant0.Subsample_Bicubic_yuv420p_p010le .text.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p_uv $__internal_116_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p_uv .text.Subsample_Bicubic_yuv444p16le_yuv444p .nv.info.Subsample_Bicubic_yuv444p16le_yuv444p .nv.shared.Subsample_Bicubic_yuv444p16le_yuv444p .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv444p $__internal_117_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv444p .text.Subsample_Bicubic_p016le_yuv444p_uv .nv.info.Subsample_Bicubic_p016le_yuv444p_uv .nv.shared.Subsample_Bicubic_p016le_yuv444p_uv .nv.constant2.Subsample_Bicubic_p016le_yuv444p_uv $__internal_118_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p_uv .nv.constant0.Subsample_Bicubic_p016le_yuv444p_uv .text.Subsample_Bicubic_p016le_yuv444p .nv.info.Subsample_Bicubic_p016le_yuv444p .nv.shared.Subsample_Bicubic_p016le_yuv444p .nv.constant2.Subsample_Bicubic_p016le_yuv444p $__internal_119_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv444p .nv.constant0.Subsample_Bicubic_p016le_yuv444p .text.Subsample_Bicubic_p010le_yuv444p_uv .nv.info.Subsample_Bicubic_p010le_yuv444p_uv .nv.shared.Subsample_Bicubic_p010le_yuv444p_uv .nv.constant2.Subsample_Bicubic_p010le_yuv444p_uv $__internal_120_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p_uv .nv.constant0.Subsample_Bicubic_p010le_yuv444p_uv .text.Subsample_Bicubic_p010le_yuv444p .nv.info.Subsample_Bicubic_p010le_yuv444p .nv.shared.Subsample_Bicubic_p010le_yuv444p .nv.constant2.Subsample_Bicubic_p010le_yuv444p $__internal_121_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv444p .nv.constant0.Subsample_Bicubic_p010le_yuv444p .text.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.info.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p_uv $__internal_122_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p_uv .text.Subsample_Bicubic_yuv444p_yuv444p .nv.info.Subsample_Bicubic_yuv444p_yuv444p .nv.shared.Subsample_Bicubic_yuv444p_yuv444p .nv.constant2.Subsample_Bicubic_yuv444p_yuv444p $__internal_123_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv444p .nv.constant0.Subsample_Bicubic_yuv444p_yuv444p .text.Subsample_Bicubic_nv12_yuv444p_uv .nv.info.Subsample_Bicubic_nv12_yuv444p_uv .nv.shared.Subsample_Bicubic_nv12_yuv444p_uv .nv.constant2.Subsample_Bicubic_nv12_yuv444p_uv $__internal_124_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p_uv .nv.constant0.Subsample_Bicubic_nv12_yuv444p_uv .text.Subsample_Bicubic_nv12_yuv444p .nv.info.Subsample_Bicubic_nv12_yuv444p .nv.shared.Subsample_Bicubic_nv12_yuv444p .nv.constant2.Subsample_Bicubic_nv12_yuv444p $__internal_125_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv444p .nv.constant0.Subsample_Bicubic_nv12_yuv444p .text.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.info.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p_uv $__internal_126_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p_uv .text.Subsample_Bicubic_yuv420p_yuv444p .nv.info.Subsample_Bicubic_yuv420p_yuv444p .nv.shared.Subsample_Bicubic_yuv420p_yuv444p .nv.constant2.Subsample_Bicubic_yuv420p_yuv444p $__internal_127_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv444p .nv.constant0.Subsample_Bicubic_yuv420p_yuv444p .text.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.info.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.shared.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_nv12_uv $__internal_128_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_nv12_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_nv12_uv .text.Subsample_Bicubic_yuv444p16le_nv12 .nv.info.Subsample_Bicubic_yuv444p16le_nv12 .nv.shared.Subsample_Bicubic_yuv444p16le_nv12 .nv.constant2.Subsample_Bicubic_yuv444p16le_nv12 $__internal_129_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_nv12 .nv.constant0.Subsample_Bicubic_yuv444p16le_nv12 .text.Subsample_Bicubic_p016le_nv12_uv .nv.info.Subsample_Bicubic_p016le_nv12_uv .nv.shared.Subsample_Bicubic_p016le_nv12_uv .nv.constant2.Subsample_Bicubic_p016le_nv12_uv $__internal_130_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_nv12_uv .nv.constant0.Subsample_Bicubic_p016le_nv12_uv .text.Subsample_Bicubic_p016le_nv12 .nv.info.Subsample_Bicubic_p016le_nv12 .nv.shared.Subsample_Bicubic_p016le_nv12 .nv.constant2.Subsample_Bicubic_p016le_nv12 $__internal_131_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_nv12 .nv.constant0.Subsample_Bicubic_p016le_nv12 .text.Subsample_Bicubic_p010le_nv12_uv .nv.info.Subsample_Bicubic_p010le_nv12_uv .nv.shared.Subsample_Bicubic_p010le_nv12_uv .nv.constant2.Subsample_Bicubic_p010le_nv12_uv $__internal_132_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_nv12_uv .nv.constant0.Subsample_Bicubic_p010le_nv12_uv .text.Subsample_Bicubic_p010le_nv12 .nv.info.Subsample_Bicubic_p010le_nv12 .nv.shared.Subsample_Bicubic_p010le_nv12 .nv.constant2.Subsample_Bicubic_p010le_nv12 $__internal_133_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_nv12 .nv.constant0.Subsample_Bicubic_p010le_nv12 .text.Subsample_Bicubic_yuv444p_nv12_uv .nv.info.Subsample_Bicubic_yuv444p_nv12_uv .nv.shared.Subsample_Bicubic_yuv444p_nv12_uv .nv.constant2.Subsample_Bicubic_yuv444p_nv12_uv $__internal_134_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_nv12_uv .nv.constant0.Subsample_Bicubic_yuv444p_nv12_uv .text.Subsample_Bicubic_yuv444p_nv12 .nv.info.Subsample_Bicubic_yuv444p_nv12 .nv.shared.Subsample_Bicubic_yuv444p_nv12 .nv.constant2.Subsample_Bicubic_yuv444p_nv12 $__internal_135_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_nv12 .nv.constant0.Subsample_Bicubic_yuv444p_nv12 .text.Subsample_Bicubic_nv12_nv12_uv .nv.info.Subsample_Bicubic_nv12_nv12_uv .nv.shared.Subsample_Bicubic_nv12_nv12_uv .nv.constant2.Subsample_Bicubic_nv12_nv12_uv $__internal_136_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_nv12_uv .nv.constant0.Subsample_Bicubic_nv12_nv12_uv .text.Subsample_Bicubic_nv12_nv12 .nv.info.Subsample_Bicubic_nv12_nv12 .nv.shared.Subsample_Bicubic_nv12_nv12 .nv.constant2.Subsample_Bicubic_nv12_nv12 $__internal_137_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_nv12 .nv.constant0.Subsample_Bicubic_nv12_nv12 .text.Subsample_Bicubic_yuv420p_nv12_uv .nv.info.Subsample_Bicubic_yuv420p_nv12_uv .nv.shared.Subsample_Bicubic_yuv420p_nv12_uv .nv.constant2.Subsample_Bicubic_yuv420p_nv12_uv $__internal_138_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_nv12_uv .nv.constant0.Subsample_Bicubic_yuv420p_nv12_uv .text.Subsample_Bicubic_yuv420p_nv12 .nv.info.Subsample_Bicubic_yuv420p_nv12 .nv.shared.Subsample_Bicubic_yuv420p_nv12 .nv.constant2.Subsample_Bicubic_yuv420p_nv12 $__internal_139_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_nv12 .nv.constant0.Subsample_Bicubic_yuv420p_nv12 .text.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.info.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv420p_uv $__internal_140_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p_uv .text.Subsample_Bicubic_yuv444p16le_yuv420p .nv.info.Subsample_Bicubic_yuv444p16le_yuv420p .nv.shared.Subsample_Bicubic_yuv444p16le_yuv420p .nv.constant2.Subsample_Bicubic_yuv444p16le_yuv420p $__internal_141_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p .nv.constant0.Subsample_Bicubic_yuv444p16le_yuv420p .text.Subsample_Bicubic_p016le_yuv420p_uv .nv.info.Subsample_Bicubic_p016le_yuv420p_uv .nv.shared.Subsample_Bicubic_p016le_yuv420p_uv .nv.constant2.Subsample_Bicubic_p016le_yuv420p_uv $__internal_142_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv420p_uv .nv.constant0.Subsample_Bicubic_p016le_yuv420p_uv .text.Subsample_Bicubic_p016le_yuv420p .nv.info.Subsample_Bicubic_p016le_yuv420p .nv.shared.Subsample_Bicubic_p016le_yuv420p .nv.constant2.Subsample_Bicubic_p016le_yuv420p $__internal_143_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p016le_yuv420p .nv.constant0.Subsample_Bicubic_p016le_yuv420p .text.Subsample_Bicubic_p010le_yuv420p_uv .nv.info.Subsample_Bicubic_p010le_yuv420p_uv .nv.shared.Subsample_Bicubic_p010le_yuv420p_uv .nv.constant2.Subsample_Bicubic_p010le_yuv420p_uv $__internal_144_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv420p_uv .nv.constant0.Subsample_Bicubic_p010le_yuv420p_uv .text.Subsample_Bicubic_p010le_yuv420p .nv.info.Subsample_Bicubic_p010le_yuv420p .nv.shared.Subsample_Bicubic_p010le_yuv420p .nv.constant2.Subsample_Bicubic_p010le_yuv420p $__internal_145_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_p010le_yuv420p .nv.constant0.Subsample_Bicubic_p010le_yuv420p .text.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.info.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv444p_yuv420p_uv $__internal_146_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv444p_yuv420p_uv .text.Subsample_Bicubic_yuv444p_yuv420p .nv.info.Subsample_Bicubic_yuv444p_yuv420p .nv.shared.Subsample_Bicubic_yuv444p_yuv420p .nv.constant2.Subsample_Bicubic_yuv444p_yuv420p $__internal_147_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv444p_yuv420p .nv.constant0.Subsample_Bicubic_yuv444p_yuv420p .text.Subsample_Bicubic_nv12_yuv420p_uv .nv.info.Subsample_Bicubic_nv12_yuv420p_uv .nv.shared.Subsample_Bicubic_nv12_yuv420p_uv .nv.constant2.Subsample_Bicubic_nv12_yuv420p_uv $__internal_148_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv420p_uv .nv.constant0.Subsample_Bicubic_nv12_yuv420p_uv .text.Subsample_Bicubic_nv12_yuv420p .nv.info.Subsample_Bicubic_nv12_yuv420p .nv.shared.Subsample_Bicubic_nv12_yuv420p .nv.constant2.Subsample_Bicubic_nv12_yuv420p $__internal_149_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_nv12_yuv420p .nv.constant0.Subsample_Bicubic_nv12_yuv420p .text.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.info.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.shared.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.constant2.Subsample_Bicubic_yuv420p_yuv420p_uv $__internal_150_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv420p_uv .nv.constant0.Subsample_Bicubic_yuv420p_yuv420p_uv .text.Subsample_Bicubic_yuv420p_yuv420p .nv.info.Subsample_Bicubic_yuv420p_yuv420p .nv.shared.Subsample_Bicubic_yuv420p_yuv420p .nv.constant2.Subsample_Bicubic_yuv420p_yuv420p $__internal_151_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bicubic_yuv420p_yuv420p .nv.constant0.Subsample_Bicubic_yuv420p_yuv420p .text.Subsample_Bilinear_rgb0_bgr0_uv .nv.info.Subsample_Bilinear_rgb0_bgr0_uv .nv.shared.Subsample_Bilinear_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Bilinear_rgb0_bgr0_uv .nv.constant0.Subsample_Bilinear_rgb0_bgr0_uv .text.Subsample_Bilinear_rgb0_bgr0 .nv.info.Subsample_Bilinear_rgb0_bgr0 .nv.shared.Subsample_Bilinear_rgb0_bgr0 .nv.constant2.Subsample_Bilinear_rgb0_bgr0 $__internal_152_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_rgb0_bgr0 .nv.constant0.Subsample_Bilinear_rgb0_bgr0 .text.Subsample_Bilinear_bgr0_rgb0_uv .nv.info.Subsample_Bilinear_bgr0_rgb0_uv .nv.shared.Subsample_Bilinear_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Bilinear_bgr0_rgb0_uv .nv.constant0.Subsample_Bilinear_bgr0_rgb0_uv .text.Subsample_Bilinear_bgr0_rgb0 .nv.info.Subsample_Bilinear_bgr0_rgb0 .nv.shared.Subsample_Bilinear_bgr0_rgb0 .nv.constant2.Subsample_Bilinear_bgr0_rgb0 $__internal_153_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_bgr0_rgb0 .nv.constant0.Subsample_Bilinear_bgr0_rgb0 .text.Subsample_Bilinear_rgb0_rgb0_uv .nv.info.Subsample_Bilinear_rgb0_rgb0_uv .nv.shared.Subsample_Bilinear_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Bilinear_rgb0_rgb0_uv .nv.constant0.Subsample_Bilinear_rgb0_rgb0_uv .text.Subsample_Bilinear_rgb0_rgb0 .nv.info.Subsample_Bilinear_rgb0_rgb0 .nv.shared.Subsample_Bilinear_rgb0_rgb0 .nv.constant2.Subsample_Bilinear_rgb0_rgb0 $__internal_154_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_rgb0_rgb0 .nv.constant0.Subsample_Bilinear_rgb0_rgb0 .text.Subsample_Bilinear_bgr0_bgr0_uv .nv.info.Subsample_Bilinear_bgr0_bgr0_uv .nv.shared.Subsample_Bilinear_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Bilinear_bgr0_bgr0_uv .nv.constant0.Subsample_Bilinear_bgr0_bgr0_uv .text.Subsample_Bilinear_bgr0_bgr0 .nv.info.Subsample_Bilinear_bgr0_bgr0 .nv.shared.Subsample_Bilinear_bgr0_bgr0 .nv.constant2.Subsample_Bilinear_bgr0_bgr0 $__internal_155_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_bgr0_bgr0 .nv.constant0.Subsample_Bilinear_bgr0_bgr0 .text.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv $__internal_156_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le_uv .text.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p16le $__internal_157_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p16le .text.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.info.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_p016le_yuv444p16le_uv $__internal_158_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_p016le_yuv444p16le_uv .text.Subsample_Bilinear_p016le_yuv444p16le .nv.info.Subsample_Bilinear_p016le_yuv444p16le .nv.shared.Subsample_Bilinear_p016le_yuv444p16le .nv.constant2.Subsample_Bilinear_p016le_yuv444p16le $__internal_159_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p16le .nv.constant0.Subsample_Bilinear_p016le_yuv444p16le .text.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.info.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.shared.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_p010le_yuv444p16le_uv $__internal_160_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_p010le_yuv444p16le_uv .text.Subsample_Bilinear_p010le_yuv444p16le .nv.info.Subsample_Bilinear_p010le_yuv444p16le .nv.shared.Subsample_Bilinear_p010le_yuv444p16le .nv.constant2.Subsample_Bilinear_p010le_yuv444p16le $__internal_161_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p16le .nv.constant0.Subsample_Bilinear_p010le_yuv444p16le .text.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p16le_uv $__internal_162_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le_uv .text.Subsample_Bilinear_yuv444p_yuv444p16le .nv.info.Subsample_Bilinear_yuv444p_yuv444p16le .nv.shared.Subsample_Bilinear_yuv444p_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p16le $__internal_163_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p16le .text.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.info.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.shared.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_nv12_yuv444p16le_uv $__internal_164_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_nv12_yuv444p16le_uv .text.Subsample_Bilinear_nv12_yuv444p16le .nv.info.Subsample_Bilinear_nv12_yuv444p16le .nv.shared.Subsample_Bilinear_nv12_yuv444p16le .nv.constant2.Subsample_Bilinear_nv12_yuv444p16le $__internal_165_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p16le .nv.constant0.Subsample_Bilinear_nv12_yuv444p16le .text.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.info.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p16le_uv $__internal_166_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le_uv .text.Subsample_Bilinear_yuv420p_yuv444p16le .nv.info.Subsample_Bilinear_yuv420p_yuv444p16le .nv.shared.Subsample_Bilinear_yuv420p_yuv444p16le .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p16le $__internal_167_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p16le .text.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.info.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_p016le_uv $__internal_168_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p016le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_p016le_uv .text.Subsample_Bilinear_yuv444p16le_p016le .nv.info.Subsample_Bilinear_yuv444p16le_p016le .nv.shared.Subsample_Bilinear_yuv444p16le_p016le .nv.constant2.Subsample_Bilinear_yuv444p16le_p016le $__internal_169_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p016le .nv.constant0.Subsample_Bilinear_yuv444p16le_p016le .text.Subsample_Bilinear_p016le_p016le_uv .nv.info.Subsample_Bilinear_p016le_p016le_uv .nv.shared.Subsample_Bilinear_p016le_p016le_uv .nv.constant2.Subsample_Bilinear_p016le_p016le_uv $__internal_170_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_p016le_uv .nv.constant0.Subsample_Bilinear_p016le_p016le_uv .text.Subsample_Bilinear_p016le_p016le .nv.info.Subsample_Bilinear_p016le_p016le .nv.shared.Subsample_Bilinear_p016le_p016le .nv.constant2.Subsample_Bilinear_p016le_p016le $__internal_171_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_p016le .nv.constant0.Subsample_Bilinear_p016le_p016le .text.Subsample_Bilinear_p010le_p016le_uv .nv.info.Subsample_Bilinear_p010le_p016le_uv .nv.shared.Subsample_Bilinear_p010le_p016le_uv .nv.constant2.Subsample_Bilinear_p010le_p016le_uv $__internal_172_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_p016le_uv .nv.constant0.Subsample_Bilinear_p010le_p016le_uv .text.Subsample_Bilinear_p010le_p016le .nv.info.Subsample_Bilinear_p010le_p016le .nv.shared.Subsample_Bilinear_p010le_p016le .nv.constant2.Subsample_Bilinear_p010le_p016le $__internal_173_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_p016le .nv.constant0.Subsample_Bilinear_p010le_p016le .text.Subsample_Bilinear_yuv444p_p016le_uv .nv.info.Subsample_Bilinear_yuv444p_p016le_uv .nv.shared.Subsample_Bilinear_yuv444p_p016le_uv .nv.constant2.Subsample_Bilinear_yuv444p_p016le_uv $__internal_174_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_p016le_uv .nv.constant0.Subsample_Bilinear_yuv444p_p016le_uv .text.Subsample_Bilinear_yuv444p_p016le .nv.info.Subsample_Bilinear_yuv444p_p016le .nv.shared.Subsample_Bilinear_yuv444p_p016le .nv.constant2.Subsample_Bilinear_yuv444p_p016le $__internal_175_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_p016le .nv.constant0.Subsample_Bilinear_yuv444p_p016le .text.Subsample_Bilinear_nv12_p016le_uv .nv.info.Subsample_Bilinear_nv12_p016le_uv .nv.shared.Subsample_Bilinear_nv12_p016le_uv .nv.constant2.Subsample_Bilinear_nv12_p016le_uv $__internal_176_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_p016le_uv .nv.constant0.Subsample_Bilinear_nv12_p016le_uv .text.Subsample_Bilinear_nv12_p016le .nv.info.Subsample_Bilinear_nv12_p016le .nv.shared.Subsample_Bilinear_nv12_p016le .nv.constant2.Subsample_Bilinear_nv12_p016le $__internal_177_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_p016le .nv.constant0.Subsample_Bilinear_nv12_p016le .text.Subsample_Bilinear_yuv420p_p016le_uv .nv.info.Subsample_Bilinear_yuv420p_p016le_uv .nv.shared.Subsample_Bilinear_yuv420p_p016le_uv .nv.constant2.Subsample_Bilinear_yuv420p_p016le_uv $__internal_178_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_p016le_uv .nv.constant0.Subsample_Bilinear_yuv420p_p016le_uv .text.Subsample_Bilinear_yuv420p_p016le .nv.info.Subsample_Bilinear_yuv420p_p016le .nv.shared.Subsample_Bilinear_yuv420p_p016le .nv.constant2.Subsample_Bilinear_yuv420p_p016le $__internal_179_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_p016le .nv.constant0.Subsample_Bilinear_yuv420p_p016le .text.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.info.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.shared.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_p010le_uv $__internal_180_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p010le_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_p010le_uv .text.Subsample_Bilinear_yuv444p16le_p010le .nv.info.Subsample_Bilinear_yuv444p16le_p010le .nv.shared.Subsample_Bilinear_yuv444p16le_p010le .nv.constant2.Subsample_Bilinear_yuv444p16le_p010le $__internal_181_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_p010le .nv.constant0.Subsample_Bilinear_yuv444p16le_p010le .text.Subsample_Bilinear_p016le_p010le_uv .nv.info.Subsample_Bilinear_p016le_p010le_uv .nv.shared.Subsample_Bilinear_p016le_p010le_uv .nv.constant2.Subsample_Bilinear_p016le_p010le_uv $__internal_182_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_p010le_uv .nv.constant0.Subsample_Bilinear_p016le_p010le_uv .text.Subsample_Bilinear_p016le_p010le .nv.info.Subsample_Bilinear_p016le_p010le .nv.shared.Subsample_Bilinear_p016le_p010le .nv.constant2.Subsample_Bilinear_p016le_p010le $__internal_183_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_p010le .nv.constant0.Subsample_Bilinear_p016le_p010le .text.Subsample_Bilinear_p010le_p010le_uv .nv.info.Subsample_Bilinear_p010le_p010le_uv .nv.shared.Subsample_Bilinear_p010le_p010le_uv .nv.constant2.Subsample_Bilinear_p010le_p010le_uv $__internal_184_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_p010le_uv .nv.constant0.Subsample_Bilinear_p010le_p010le_uv .text.Subsample_Bilinear_p010le_p010le .nv.info.Subsample_Bilinear_p010le_p010le .nv.shared.Subsample_Bilinear_p010le_p010le .nv.constant2.Subsample_Bilinear_p010le_p010le $__internal_185_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_p010le .nv.constant0.Subsample_Bilinear_p010le_p010le .text.Subsample_Bilinear_yuv444p_p010le_uv .nv.info.Subsample_Bilinear_yuv444p_p010le_uv .nv.shared.Subsample_Bilinear_yuv444p_p010le_uv .nv.constant2.Subsample_Bilinear_yuv444p_p010le_uv $__internal_186_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_p010le_uv .nv.constant0.Subsample_Bilinear_yuv444p_p010le_uv .text.Subsample_Bilinear_yuv444p_p010le .nv.info.Subsample_Bilinear_yuv444p_p010le .nv.shared.Subsample_Bilinear_yuv444p_p010le .nv.constant2.Subsample_Bilinear_yuv444p_p010le $__internal_187_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_p010le .nv.constant0.Subsample_Bilinear_yuv444p_p010le .text.Subsample_Bilinear_nv12_p010le_uv .nv.info.Subsample_Bilinear_nv12_p010le_uv .nv.shared.Subsample_Bilinear_nv12_p010le_uv .nv.constant2.Subsample_Bilinear_nv12_p010le_uv $__internal_188_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_p010le_uv .nv.constant0.Subsample_Bilinear_nv12_p010le_uv .text.Subsample_Bilinear_nv12_p010le .nv.info.Subsample_Bilinear_nv12_p010le .nv.shared.Subsample_Bilinear_nv12_p010le .nv.constant2.Subsample_Bilinear_nv12_p010le $__internal_189_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_p010le .nv.constant0.Subsample_Bilinear_nv12_p010le .text.Subsample_Bilinear_yuv420p_p010le_uv .nv.info.Subsample_Bilinear_yuv420p_p010le_uv .nv.shared.Subsample_Bilinear_yuv420p_p010le_uv .nv.constant2.Subsample_Bilinear_yuv420p_p010le_uv $__internal_190_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_p010le_uv .nv.constant0.Subsample_Bilinear_yuv420p_p010le_uv .text.Subsample_Bilinear_yuv420p_p010le .nv.info.Subsample_Bilinear_yuv420p_p010le .nv.shared.Subsample_Bilinear_yuv420p_p010le .nv.constant2.Subsample_Bilinear_yuv420p_p010le $__internal_191_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_p010le .nv.constant0.Subsample_Bilinear_yuv420p_p010le .text.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p_uv $__internal_192_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p_uv .text.Subsample_Bilinear_yuv444p16le_yuv444p .nv.info.Subsample_Bilinear_yuv444p16le_yuv444p .nv.shared.Subsample_Bilinear_yuv444p16le_yuv444p .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv444p $__internal_193_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv444p .text.Subsample_Bilinear_p016le_yuv444p_uv .nv.info.Subsample_Bilinear_p016le_yuv444p_uv .nv.shared.Subsample_Bilinear_p016le_yuv444p_uv .nv.constant2.Subsample_Bilinear_p016le_yuv444p_uv $__internal_194_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p_uv .nv.constant0.Subsample_Bilinear_p016le_yuv444p_uv .text.Subsample_Bilinear_p016le_yuv444p .nv.info.Subsample_Bilinear_p016le_yuv444p .nv.shared.Subsample_Bilinear_p016le_yuv444p .nv.constant2.Subsample_Bilinear_p016le_yuv444p $__internal_195_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv444p .nv.constant0.Subsample_Bilinear_p016le_yuv444p .text.Subsample_Bilinear_p010le_yuv444p_uv .nv.info.Subsample_Bilinear_p010le_yuv444p_uv .nv.shared.Subsample_Bilinear_p010le_yuv444p_uv .nv.constant2.Subsample_Bilinear_p010le_yuv444p_uv $__internal_196_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p_uv .nv.constant0.Subsample_Bilinear_p010le_yuv444p_uv .text.Subsample_Bilinear_p010le_yuv444p .nv.info.Subsample_Bilinear_p010le_yuv444p .nv.shared.Subsample_Bilinear_p010le_yuv444p .nv.constant2.Subsample_Bilinear_p010le_yuv444p $__internal_197_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv444p .nv.constant0.Subsample_Bilinear_p010le_yuv444p .text.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.info.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p_uv $__internal_198_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p_uv .text.Subsample_Bilinear_yuv444p_yuv444p .nv.info.Subsample_Bilinear_yuv444p_yuv444p .nv.shared.Subsample_Bilinear_yuv444p_yuv444p .nv.constant2.Subsample_Bilinear_yuv444p_yuv444p $__internal_199_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv444p .nv.constant0.Subsample_Bilinear_yuv444p_yuv444p .text.Subsample_Bilinear_nv12_yuv444p_uv .nv.info.Subsample_Bilinear_nv12_yuv444p_uv .nv.shared.Subsample_Bilinear_nv12_yuv444p_uv .nv.constant2.Subsample_Bilinear_nv12_yuv444p_uv $__internal_200_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p_uv .nv.constant0.Subsample_Bilinear_nv12_yuv444p_uv .text.Subsample_Bilinear_nv12_yuv444p .nv.info.Subsample_Bilinear_nv12_yuv444p .nv.shared.Subsample_Bilinear_nv12_yuv444p .nv.constant2.Subsample_Bilinear_nv12_yuv444p $__internal_201_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv444p .nv.constant0.Subsample_Bilinear_nv12_yuv444p .text.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.info.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p_uv $__internal_202_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p_uv .text.Subsample_Bilinear_yuv420p_yuv444p .nv.info.Subsample_Bilinear_yuv420p_yuv444p .nv.shared.Subsample_Bilinear_yuv420p_yuv444p .nv.constant2.Subsample_Bilinear_yuv420p_yuv444p $__internal_203_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv444p .nv.constant0.Subsample_Bilinear_yuv420p_yuv444p .text.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.info.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.shared.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_nv12_uv $__internal_204_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_nv12_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_nv12_uv .text.Subsample_Bilinear_yuv444p16le_nv12 .nv.info.Subsample_Bilinear_yuv444p16le_nv12 .nv.shared.Subsample_Bilinear_yuv444p16le_nv12 .nv.constant2.Subsample_Bilinear_yuv444p16le_nv12 $__internal_205_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_nv12 .nv.constant0.Subsample_Bilinear_yuv444p16le_nv12 .text.Subsample_Bilinear_p016le_nv12_uv .nv.info.Subsample_Bilinear_p016le_nv12_uv .nv.shared.Subsample_Bilinear_p016le_nv12_uv .nv.constant2.Subsample_Bilinear_p016le_nv12_uv $__internal_206_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_nv12_uv .nv.constant0.Subsample_Bilinear_p016le_nv12_uv .text.Subsample_Bilinear_p016le_nv12 .nv.info.Subsample_Bilinear_p016le_nv12 .nv.shared.Subsample_Bilinear_p016le_nv12 .nv.constant2.Subsample_Bilinear_p016le_nv12 $__internal_207_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_nv12 .nv.constant0.Subsample_Bilinear_p016le_nv12 .text.Subsample_Bilinear_p010le_nv12_uv .nv.info.Subsample_Bilinear_p010le_nv12_uv .nv.shared.Subsample_Bilinear_p010le_nv12_uv .nv.constant2.Subsample_Bilinear_p010le_nv12_uv $__internal_208_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_nv12_uv .nv.constant0.Subsample_Bilinear_p010le_nv12_uv .text.Subsample_Bilinear_p010le_nv12 .nv.info.Subsample_Bilinear_p010le_nv12 .nv.shared.Subsample_Bilinear_p010le_nv12 .nv.constant2.Subsample_Bilinear_p010le_nv12 $__internal_209_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_nv12 .nv.constant0.Subsample_Bilinear_p010le_nv12 .text.Subsample_Bilinear_yuv444p_nv12_uv .nv.info.Subsample_Bilinear_yuv444p_nv12_uv .nv.shared.Subsample_Bilinear_yuv444p_nv12_uv .nv.constant2.Subsample_Bilinear_yuv444p_nv12_uv $__internal_210_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_nv12_uv .nv.constant0.Subsample_Bilinear_yuv444p_nv12_uv .text.Subsample_Bilinear_yuv444p_nv12 .nv.info.Subsample_Bilinear_yuv444p_nv12 .nv.shared.Subsample_Bilinear_yuv444p_nv12 .nv.constant2.Subsample_Bilinear_yuv444p_nv12 $__internal_211_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_nv12 .nv.constant0.Subsample_Bilinear_yuv444p_nv12 .text.Subsample_Bilinear_nv12_nv12_uv .nv.info.Subsample_Bilinear_nv12_nv12_uv .nv.shared.Subsample_Bilinear_nv12_nv12_uv .nv.constant2.Subsample_Bilinear_nv12_nv12_uv $__internal_212_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_nv12_uv .nv.constant0.Subsample_Bilinear_nv12_nv12_uv .text.Subsample_Bilinear_nv12_nv12 .nv.info.Subsample_Bilinear_nv12_nv12 .nv.shared.Subsample_Bilinear_nv12_nv12 .nv.constant2.Subsample_Bilinear_nv12_nv12 $__internal_213_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_nv12 .nv.constant0.Subsample_Bilinear_nv12_nv12 .text.Subsample_Bilinear_yuv420p_nv12_uv .nv.info.Subsample_Bilinear_yuv420p_nv12_uv .nv.shared.Subsample_Bilinear_yuv420p_nv12_uv .nv.constant2.Subsample_Bilinear_yuv420p_nv12_uv $__internal_214_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_nv12_uv .nv.constant0.Subsample_Bilinear_yuv420p_nv12_uv .text.Subsample_Bilinear_yuv420p_nv12 .nv.info.Subsample_Bilinear_yuv420p_nv12 .nv.shared.Subsample_Bilinear_yuv420p_nv12 .nv.constant2.Subsample_Bilinear_yuv420p_nv12 $__internal_215_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_nv12 .nv.constant0.Subsample_Bilinear_yuv420p_nv12 .text.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.info.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv420p_uv $__internal_216_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p_uv .text.Subsample_Bilinear_yuv444p16le_yuv420p .nv.info.Subsample_Bilinear_yuv444p16le_yuv420p .nv.shared.Subsample_Bilinear_yuv444p16le_yuv420p .nv.constant2.Subsample_Bilinear_yuv444p16le_yuv420p $__internal_217_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p .nv.constant0.Subsample_Bilinear_yuv444p16le_yuv420p .text.Subsample_Bilinear_p016le_yuv420p_uv .nv.info.Subsample_Bilinear_p016le_yuv420p_uv .nv.shared.Subsample_Bilinear_p016le_yuv420p_uv .nv.constant2.Subsample_Bilinear_p016le_yuv420p_uv $__internal_218_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv420p_uv .nv.constant0.Subsample_Bilinear_p016le_yuv420p_uv .text.Subsample_Bilinear_p016le_yuv420p .nv.info.Subsample_Bilinear_p016le_yuv420p .nv.shared.Subsample_Bilinear_p016le_yuv420p .nv.constant2.Subsample_Bilinear_p016le_yuv420p $__internal_219_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p016le_yuv420p .nv.constant0.Subsample_Bilinear_p016le_yuv420p .text.Subsample_Bilinear_p010le_yuv420p_uv .nv.info.Subsample_Bilinear_p010le_yuv420p_uv .nv.shared.Subsample_Bilinear_p010le_yuv420p_uv .nv.constant2.Subsample_Bilinear_p010le_yuv420p_uv $__internal_220_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv420p_uv .nv.constant0.Subsample_Bilinear_p010le_yuv420p_uv .text.Subsample_Bilinear_p010le_yuv420p .nv.info.Subsample_Bilinear_p010le_yuv420p .nv.shared.Subsample_Bilinear_p010le_yuv420p .nv.constant2.Subsample_Bilinear_p010le_yuv420p $__internal_221_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_p010le_yuv420p .nv.constant0.Subsample_Bilinear_p010le_yuv420p .text.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.info.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv444p_yuv420p_uv $__internal_222_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv444p_yuv420p_uv .text.Subsample_Bilinear_yuv444p_yuv420p .nv.info.Subsample_Bilinear_yuv444p_yuv420p .nv.shared.Subsample_Bilinear_yuv444p_yuv420p .nv.constant2.Subsample_Bilinear_yuv444p_yuv420p $__internal_223_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv444p_yuv420p .nv.constant0.Subsample_Bilinear_yuv444p_yuv420p .text.Subsample_Bilinear_nv12_yuv420p_uv .nv.info.Subsample_Bilinear_nv12_yuv420p_uv .nv.shared.Subsample_Bilinear_nv12_yuv420p_uv .nv.constant2.Subsample_Bilinear_nv12_yuv420p_uv $__internal_224_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv420p_uv .nv.constant0.Subsample_Bilinear_nv12_yuv420p_uv .text.Subsample_Bilinear_nv12_yuv420p .nv.info.Subsample_Bilinear_nv12_yuv420p .nv.shared.Subsample_Bilinear_nv12_yuv420p .nv.constant2.Subsample_Bilinear_nv12_yuv420p $__internal_225_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_nv12_yuv420p .nv.constant0.Subsample_Bilinear_nv12_yuv420p .text.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.info.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.shared.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.constant2.Subsample_Bilinear_yuv420p_yuv420p_uv $__internal_226_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv420p_uv .nv.constant0.Subsample_Bilinear_yuv420p_yuv420p_uv .text.Subsample_Bilinear_yuv420p_yuv420p .nv.info.Subsample_Bilinear_yuv420p_yuv420p .nv.shared.Subsample_Bilinear_yuv420p_yuv420p .nv.constant2.Subsample_Bilinear_yuv420p_yuv420p $__internal_227_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Bilinear_yuv420p_yuv420p .nv.constant0.Subsample_Bilinear_yuv420p_yuv420p .text.Subsample_Nearest_rgb0_bgr0_uv .nv.info.Subsample_Nearest_rgb0_bgr0_uv .nv.shared.Subsample_Nearest_rgb0_bgr0_uv .rel.nv.constant0.Subsample_Nearest_rgb0_bgr0_uv .nv.constant0.Subsample_Nearest_rgb0_bgr0_uv .text.Subsample_Nearest_rgb0_bgr0 .nv.info.Subsample_Nearest_rgb0_bgr0 .nv.shared.Subsample_Nearest_rgb0_bgr0 .nv.constant2.Subsample_Nearest_rgb0_bgr0 $__internal_228_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_rgb0_bgr0 .nv.constant0.Subsample_Nearest_rgb0_bgr0 .text.Subsample_Nearest_bgr0_rgb0_uv .nv.info.Subsample_Nearest_bgr0_rgb0_uv .nv.shared.Subsample_Nearest_bgr0_rgb0_uv .rel.nv.constant0.Subsample_Nearest_bgr0_rgb0_uv .nv.constant0.Subsample_Nearest_bgr0_rgb0_uv .text.Subsample_Nearest_bgr0_rgb0 .nv.info.Subsample_Nearest_bgr0_rgb0 .nv.shared.Subsample_Nearest_bgr0_rgb0 .nv.constant2.Subsample_Nearest_bgr0_rgb0 $__internal_229_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_bgr0_rgb0 .nv.constant0.Subsample_Nearest_bgr0_rgb0 .text.Subsample_Nearest_rgb0_rgb0_uv .nv.info.Subsample_Nearest_rgb0_rgb0_uv .nv.shared.Subsample_Nearest_rgb0_rgb0_uv .rel.nv.constant0.Subsample_Nearest_rgb0_rgb0_uv .nv.constant0.Subsample_Nearest_rgb0_rgb0_uv .text.Subsample_Nearest_rgb0_rgb0 .nv.info.Subsample_Nearest_rgb0_rgb0 .nv.shared.Subsample_Nearest_rgb0_rgb0 .nv.constant2.Subsample_Nearest_rgb0_rgb0 $__internal_230_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_rgb0_rgb0 .nv.constant0.Subsample_Nearest_rgb0_rgb0 .text.Subsample_Nearest_bgr0_bgr0_uv .nv.info.Subsample_Nearest_bgr0_bgr0_uv .nv.shared.Subsample_Nearest_bgr0_bgr0_uv .rel.nv.constant0.Subsample_Nearest_bgr0_bgr0_uv .nv.constant0.Subsample_Nearest_bgr0_bgr0_uv .text.Subsample_Nearest_bgr0_bgr0 .nv.info.Subsample_Nearest_bgr0_bgr0 .nv.shared.Subsample_Nearest_bgr0_bgr0 .nv.constant2.Subsample_Nearest_bgr0_bgr0 $__internal_231_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_bgr0_bgr0 .nv.constant0.Subsample_Nearest_bgr0_bgr0 .text.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p16le_uv $__internal_232_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le_uv .text.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.info.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p16le $__internal_233_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p16le .text.Subsample_Nearest_p016le_yuv444p16le_uv .nv.info.Subsample_Nearest_p016le_yuv444p16le_uv .nv.shared.Subsample_Nearest_p016le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_p016le_yuv444p16le_uv $__internal_234_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_p016le_yuv444p16le_uv .text.Subsample_Nearest_p016le_yuv444p16le .nv.info.Subsample_Nearest_p016le_yuv444p16le .nv.shared.Subsample_Nearest_p016le_yuv444p16le .nv.constant2.Subsample_Nearest_p016le_yuv444p16le $__internal_235_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p16le .nv.constant0.Subsample_Nearest_p016le_yuv444p16le .text.Subsample_Nearest_p010le_yuv444p16le_uv .nv.info.Subsample_Nearest_p010le_yuv444p16le_uv .nv.shared.Subsample_Nearest_p010le_yuv444p16le_uv .nv.constant2.Subsample_Nearest_p010le_yuv444p16le_uv $__internal_236_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p16le_uv .nv.constant0.Subsample_Nearest_p010le_yuv444p16le_uv .text.Subsample_Nearest_p010le_yuv444p16le .nv.info.Subsample_Nearest_p010le_yuv444p16le .nv.shared.Subsample_Nearest_p010le_yuv444p16le .nv.constant2.Subsample_Nearest_p010le_yuv444p16le $__internal_237_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p16le .nv.constant0.Subsample_Nearest_p010le_yuv444p16le .text.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv444p16le_uv $__internal_238_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le_uv .text.Subsample_Nearest_yuv444p_yuv444p16le .nv.info.Subsample_Nearest_yuv444p_yuv444p16le .nv.shared.Subsample_Nearest_yuv444p_yuv444p16le .nv.constant2.Subsample_Nearest_yuv444p_yuv444p16le $__internal_239_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le .nv.constant0.Subsample_Nearest_yuv444p_yuv444p16le .text.Subsample_Nearest_nv12_yuv444p16le_uv .nv.info.Subsample_Nearest_nv12_yuv444p16le_uv .nv.shared.Subsample_Nearest_nv12_yuv444p16le_uv .nv.constant2.Subsample_Nearest_nv12_yuv444p16le_uv $__internal_240_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p16le_uv .nv.constant0.Subsample_Nearest_nv12_yuv444p16le_uv .text.Subsample_Nearest_nv12_yuv444p16le .nv.info.Subsample_Nearest_nv12_yuv444p16le .nv.shared.Subsample_Nearest_nv12_yuv444p16le .nv.constant2.Subsample_Nearest_nv12_yuv444p16le $__internal_241_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p16le .nv.constant0.Subsample_Nearest_nv12_yuv444p16le .text.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.info.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.shared.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv444p16le_uv $__internal_242_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le_uv .text.Subsample_Nearest_yuv420p_yuv444p16le .nv.info.Subsample_Nearest_yuv420p_yuv444p16le .nv.shared.Subsample_Nearest_yuv420p_yuv444p16le .nv.constant2.Subsample_Nearest_yuv420p_yuv444p16le $__internal_243_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le .nv.constant0.Subsample_Nearest_yuv420p_yuv444p16le .text.Subsample_Nearest_yuv444p16le_p016le_uv .nv.info.Subsample_Nearest_yuv444p16le_p016le_uv .nv.shared.Subsample_Nearest_yuv444p16le_p016le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_p016le_uv $__internal_244_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p016le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_p016le_uv .text.Subsample_Nearest_yuv444p16le_p016le .nv.info.Subsample_Nearest_yuv444p16le_p016le .nv.shared.Subsample_Nearest_yuv444p16le_p016le .nv.constant2.Subsample_Nearest_yuv444p16le_p016le $__internal_245_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p016le .nv.constant0.Subsample_Nearest_yuv444p16le_p016le .text.Subsample_Nearest_p016le_p016le_uv .nv.info.Subsample_Nearest_p016le_p016le_uv .nv.shared.Subsample_Nearest_p016le_p016le_uv .nv.constant2.Subsample_Nearest_p016le_p016le_uv $__internal_246_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_p016le_uv .nv.constant0.Subsample_Nearest_p016le_p016le_uv .text.Subsample_Nearest_p016le_p016le .nv.info.Subsample_Nearest_p016le_p016le .nv.shared.Subsample_Nearest_p016le_p016le .nv.constant2.Subsample_Nearest_p016le_p016le $__internal_247_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_p016le .nv.constant0.Subsample_Nearest_p016le_p016le .text.Subsample_Nearest_p010le_p016le_uv .nv.info.Subsample_Nearest_p010le_p016le_uv .nv.shared.Subsample_Nearest_p010le_p016le_uv .nv.constant2.Subsample_Nearest_p010le_p016le_uv $__internal_248_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_p016le_uv .nv.constant0.Subsample_Nearest_p010le_p016le_uv .text.Subsample_Nearest_p010le_p016le .nv.info.Subsample_Nearest_p010le_p016le .nv.shared.Subsample_Nearest_p010le_p016le .nv.constant2.Subsample_Nearest_p010le_p016le $__internal_249_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_p016le .nv.constant0.Subsample_Nearest_p010le_p016le .text.Subsample_Nearest_yuv444p_p016le_uv .nv.info.Subsample_Nearest_yuv444p_p016le_uv .nv.shared.Subsample_Nearest_yuv444p_p016le_uv .nv.constant2.Subsample_Nearest_yuv444p_p016le_uv $__internal_250_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_p016le_uv .nv.constant0.Subsample_Nearest_yuv444p_p016le_uv .text.Subsample_Nearest_yuv444p_p016le .nv.info.Subsample_Nearest_yuv444p_p016le .nv.shared.Subsample_Nearest_yuv444p_p016le .nv.constant2.Subsample_Nearest_yuv444p_p016le $__internal_251_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_p016le .nv.constant0.Subsample_Nearest_yuv444p_p016le .text.Subsample_Nearest_nv12_p016le_uv .nv.info.Subsample_Nearest_nv12_p016le_uv .nv.shared.Subsample_Nearest_nv12_p016le_uv .nv.constant2.Subsample_Nearest_nv12_p016le_uv $__internal_252_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_p016le_uv .nv.constant0.Subsample_Nearest_nv12_p016le_uv .text.Subsample_Nearest_nv12_p016le .nv.info.Subsample_Nearest_nv12_p016le .nv.shared.Subsample_Nearest_nv12_p016le .nv.constant2.Subsample_Nearest_nv12_p016le $__internal_253_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_p016le .nv.constant0.Subsample_Nearest_nv12_p016le .text.Subsample_Nearest_yuv420p_p016le_uv .nv.info.Subsample_Nearest_yuv420p_p016le_uv .nv.shared.Subsample_Nearest_yuv420p_p016le_uv .nv.constant2.Subsample_Nearest_yuv420p_p016le_uv $__internal_254_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_p016le_uv .nv.constant0.Subsample_Nearest_yuv420p_p016le_uv .text.Subsample_Nearest_yuv420p_p016le .nv.info.Subsample_Nearest_yuv420p_p016le .nv.shared.Subsample_Nearest_yuv420p_p016le .nv.constant2.Subsample_Nearest_yuv420p_p016le $__internal_255_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_p016le .nv.constant0.Subsample_Nearest_yuv420p_p016le .text.Subsample_Nearest_yuv444p16le_p010le_uv .nv.info.Subsample_Nearest_yuv444p16le_p010le_uv .nv.shared.Subsample_Nearest_yuv444p16le_p010le_uv .nv.constant2.Subsample_Nearest_yuv444p16le_p010le_uv $__internal_256_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p010le_uv .nv.constant0.Subsample_Nearest_yuv444p16le_p010le_uv .text.Subsample_Nearest_yuv444p16le_p010le .nv.info.Subsample_Nearest_yuv444p16le_p010le .nv.shared.Subsample_Nearest_yuv444p16le_p010le .nv.constant2.Subsample_Nearest_yuv444p16le_p010le $__internal_257_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_p010le .nv.constant0.Subsample_Nearest_yuv444p16le_p010le .text.Subsample_Nearest_p016le_p010le_uv .nv.info.Subsample_Nearest_p016le_p010le_uv .nv.shared.Subsample_Nearest_p016le_p010le_uv .nv.constant2.Subsample_Nearest_p016le_p010le_uv $__internal_258_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_p010le_uv .nv.constant0.Subsample_Nearest_p016le_p010le_uv .text.Subsample_Nearest_p016le_p010le .nv.info.Subsample_Nearest_p016le_p010le .nv.shared.Subsample_Nearest_p016le_p010le .nv.constant2.Subsample_Nearest_p016le_p010le $__internal_259_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_p010le .nv.constant0.Subsample_Nearest_p016le_p010le .text.Subsample_Nearest_p010le_p010le_uv .nv.info.Subsample_Nearest_p010le_p010le_uv .nv.shared.Subsample_Nearest_p010le_p010le_uv .nv.constant2.Subsample_Nearest_p010le_p010le_uv $__internal_260_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_p010le_uv .nv.constant0.Subsample_Nearest_p010le_p010le_uv .text.Subsample_Nearest_p010le_p010le .nv.info.Subsample_Nearest_p010le_p010le .nv.shared.Subsample_Nearest_p010le_p010le .nv.constant2.Subsample_Nearest_p010le_p010le $__internal_261_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_p010le .nv.constant0.Subsample_Nearest_p010le_p010le .text.Subsample_Nearest_yuv444p_p010le_uv .nv.info.Subsample_Nearest_yuv444p_p010le_uv .nv.shared.Subsample_Nearest_yuv444p_p010le_uv .nv.constant2.Subsample_Nearest_yuv444p_p010le_uv $__internal_262_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_p010le_uv .nv.constant0.Subsample_Nearest_yuv444p_p010le_uv .text.Subsample_Nearest_yuv444p_p010le .nv.info.Subsample_Nearest_yuv444p_p010le .nv.shared.Subsample_Nearest_yuv444p_p010le .nv.constant2.Subsample_Nearest_yuv444p_p010le $__internal_263_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_p010le .nv.constant0.Subsample_Nearest_yuv444p_p010le .text.Subsample_Nearest_nv12_p010le_uv .nv.info.Subsample_Nearest_nv12_p010le_uv .nv.shared.Subsample_Nearest_nv12_p010le_uv .nv.constant2.Subsample_Nearest_nv12_p010le_uv $__internal_264_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_p010le_uv .nv.constant0.Subsample_Nearest_nv12_p010le_uv .text.Subsample_Nearest_nv12_p010le .nv.info.Subsample_Nearest_nv12_p010le .nv.shared.Subsample_Nearest_nv12_p010le .nv.constant2.Subsample_Nearest_nv12_p010le $__internal_265_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_p010le .nv.constant0.Subsample_Nearest_nv12_p010le .text.Subsample_Nearest_yuv420p_p010le_uv .nv.info.Subsample_Nearest_yuv420p_p010le_uv .nv.shared.Subsample_Nearest_yuv420p_p010le_uv .nv.constant2.Subsample_Nearest_yuv420p_p010le_uv $__internal_266_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_p010le_uv .nv.constant0.Subsample_Nearest_yuv420p_p010le_uv .text.Subsample_Nearest_yuv420p_p010le .nv.info.Subsample_Nearest_yuv420p_p010le .nv.shared.Subsample_Nearest_yuv420p_p010le .nv.constant2.Subsample_Nearest_yuv420p_p010le $__internal_267_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_p010le .nv.constant0.Subsample_Nearest_yuv420p_p010le .text.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p_uv $__internal_268_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p_uv .text.Subsample_Nearest_yuv444p16le_yuv444p .nv.info.Subsample_Nearest_yuv444p16le_yuv444p .nv.shared.Subsample_Nearest_yuv444p16le_yuv444p .nv.constant2.Subsample_Nearest_yuv444p16le_yuv444p $__internal_269_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p .nv.constant0.Subsample_Nearest_yuv444p16le_yuv444p .text.Subsample_Nearest_p016le_yuv444p_uv .nv.info.Subsample_Nearest_p016le_yuv444p_uv .nv.shared.Subsample_Nearest_p016le_yuv444p_uv .nv.constant2.Subsample_Nearest_p016le_yuv444p_uv $__internal_270_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p_uv .nv.constant0.Subsample_Nearest_p016le_yuv444p_uv .text.Subsample_Nearest_p016le_yuv444p .nv.info.Subsample_Nearest_p016le_yuv444p .nv.shared.Subsample_Nearest_p016le_yuv444p .nv.constant2.Subsample_Nearest_p016le_yuv444p $__internal_271_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv444p .nv.constant0.Subsample_Nearest_p016le_yuv444p .text.Subsample_Nearest_p010le_yuv444p_uv .nv.info.Subsample_Nearest_p010le_yuv444p_uv .nv.shared.Subsample_Nearest_p010le_yuv444p_uv .nv.constant2.Subsample_Nearest_p010le_yuv444p_uv $__internal_272_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p_uv .nv.constant0.Subsample_Nearest_p010le_yuv444p_uv .text.Subsample_Nearest_p010le_yuv444p .nv.info.Subsample_Nearest_p010le_yuv444p .nv.shared.Subsample_Nearest_p010le_yuv444p .nv.constant2.Subsample_Nearest_p010le_yuv444p $__internal_273_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv444p .nv.constant0.Subsample_Nearest_p010le_yuv444p .text.Subsample_Nearest_yuv444p_yuv444p_uv .nv.info.Subsample_Nearest_yuv444p_yuv444p_uv .nv.shared.Subsample_Nearest_yuv444p_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv444p_uv $__internal_274_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv444p_uv .text.Subsample_Nearest_yuv444p_yuv444p .nv.info.Subsample_Nearest_yuv444p_yuv444p .nv.shared.Subsample_Nearest_yuv444p_yuv444p .nv.constant2.Subsample_Nearest_yuv444p_yuv444p $__internal_275_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv444p .nv.constant0.Subsample_Nearest_yuv444p_yuv444p .text.Subsample_Nearest_nv12_yuv444p_uv .nv.info.Subsample_Nearest_nv12_yuv444p_uv .nv.shared.Subsample_Nearest_nv12_yuv444p_uv .nv.constant2.Subsample_Nearest_nv12_yuv444p_uv $__internal_276_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p_uv .nv.constant0.Subsample_Nearest_nv12_yuv444p_uv .text.Subsample_Nearest_nv12_yuv444p .nv.info.Subsample_Nearest_nv12_yuv444p .nv.shared.Subsample_Nearest_nv12_yuv444p .nv.constant2.Subsample_Nearest_nv12_yuv444p $__internal_277_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv444p .nv.constant0.Subsample_Nearest_nv12_yuv444p .text.Subsample_Nearest_yuv420p_yuv444p_uv .nv.info.Subsample_Nearest_yuv420p_yuv444p_uv .nv.shared.Subsample_Nearest_yuv420p_yuv444p_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv444p_uv $__internal_278_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv444p_uv .text.Subsample_Nearest_yuv420p_yuv444p .nv.info.Subsample_Nearest_yuv420p_yuv444p .nv.shared.Subsample_Nearest_yuv420p_yuv444p .nv.constant2.Subsample_Nearest_yuv420p_yuv444p $__internal_279_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv444p .nv.constant0.Subsample_Nearest_yuv420p_yuv444p .text.Subsample_Nearest_yuv444p16le_nv12_uv .nv.info.Subsample_Nearest_yuv444p16le_nv12_uv .nv.shared.Subsample_Nearest_yuv444p16le_nv12_uv .nv.constant2.Subsample_Nearest_yuv444p16le_nv12_uv $__internal_280_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_nv12_uv .nv.constant0.Subsample_Nearest_yuv444p16le_nv12_uv .text.Subsample_Nearest_yuv444p16le_nv12 .nv.info.Subsample_Nearest_yuv444p16le_nv12 .nv.shared.Subsample_Nearest_yuv444p16le_nv12 .nv.constant2.Subsample_Nearest_yuv444p16le_nv12 $__internal_281_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_nv12 .nv.constant0.Subsample_Nearest_yuv444p16le_nv12 .text.Subsample_Nearest_p016le_nv12_uv .nv.info.Subsample_Nearest_p016le_nv12_uv .nv.shared.Subsample_Nearest_p016le_nv12_uv .nv.constant2.Subsample_Nearest_p016le_nv12_uv $__internal_282_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_nv12_uv .nv.constant0.Subsample_Nearest_p016le_nv12_uv .text.Subsample_Nearest_p016le_nv12 .nv.info.Subsample_Nearest_p016le_nv12 .nv.shared.Subsample_Nearest_p016le_nv12 .nv.constant2.Subsample_Nearest_p016le_nv12 $__internal_283_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_nv12 .nv.constant0.Subsample_Nearest_p016le_nv12 .text.Subsample_Nearest_p010le_nv12_uv .nv.info.Subsample_Nearest_p010le_nv12_uv .nv.shared.Subsample_Nearest_p010le_nv12_uv .nv.constant2.Subsample_Nearest_p010le_nv12_uv $__internal_284_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_nv12_uv .nv.constant0.Subsample_Nearest_p010le_nv12_uv .text.Subsample_Nearest_p010le_nv12 .nv.info.Subsample_Nearest_p010le_nv12 .nv.shared.Subsample_Nearest_p010le_nv12 .nv.constant2.Subsample_Nearest_p010le_nv12 $__internal_285_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_nv12 .nv.constant0.Subsample_Nearest_p010le_nv12 .text.Subsample_Nearest_yuv444p_nv12_uv .nv.info.Subsample_Nearest_yuv444p_nv12_uv .nv.shared.Subsample_Nearest_yuv444p_nv12_uv .nv.constant2.Subsample_Nearest_yuv444p_nv12_uv $__internal_286_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_nv12_uv .nv.constant0.Subsample_Nearest_yuv444p_nv12_uv .text.Subsample_Nearest_yuv444p_nv12 .nv.info.Subsample_Nearest_yuv444p_nv12 .nv.shared.Subsample_Nearest_yuv444p_nv12 .nv.constant2.Subsample_Nearest_yuv444p_nv12 $__internal_287_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_nv12 .nv.constant0.Subsample_Nearest_yuv444p_nv12 .text.Subsample_Nearest_nv12_nv12_uv .nv.info.Subsample_Nearest_nv12_nv12_uv .nv.shared.Subsample_Nearest_nv12_nv12_uv .nv.constant2.Subsample_Nearest_nv12_nv12_uv $__internal_288_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_nv12_uv .nv.constant0.Subsample_Nearest_nv12_nv12_uv .text.Subsample_Nearest_nv12_nv12 .nv.info.Subsample_Nearest_nv12_nv12 .nv.shared.Subsample_Nearest_nv12_nv12 .nv.constant2.Subsample_Nearest_nv12_nv12 $__internal_289_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_nv12 .nv.constant0.Subsample_Nearest_nv12_nv12 .text.Subsample_Nearest_yuv420p_nv12_uv .nv.info.Subsample_Nearest_yuv420p_nv12_uv .nv.shared.Subsample_Nearest_yuv420p_nv12_uv .nv.constant2.Subsample_Nearest_yuv420p_nv12_uv $__internal_290_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_nv12_uv .nv.constant0.Subsample_Nearest_yuv420p_nv12_uv .text.Subsample_Nearest_yuv420p_nv12 .nv.info.Subsample_Nearest_yuv420p_nv12 .nv.shared.Subsample_Nearest_yuv420p_nv12 .nv.constant2.Subsample_Nearest_yuv420p_nv12 $__internal_291_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_nv12 .nv.constant0.Subsample_Nearest_yuv420p_nv12 .text.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.info.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.shared.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv444p16le_yuv420p_uv $__internal_292_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p_uv .text.Subsample_Nearest_yuv444p16le_yuv420p .nv.info.Subsample_Nearest_yuv444p16le_yuv420p .nv.shared.Subsample_Nearest_yuv444p16le_yuv420p .nv.constant2.Subsample_Nearest_yuv444p16le_yuv420p $__internal_293_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p .nv.constant0.Subsample_Nearest_yuv444p16le_yuv420p .text.Subsample_Nearest_p016le_yuv420p_uv .nv.info.Subsample_Nearest_p016le_yuv420p_uv .nv.shared.Subsample_Nearest_p016le_yuv420p_uv .nv.constant2.Subsample_Nearest_p016le_yuv420p_uv $__internal_294_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv420p_uv .nv.constant0.Subsample_Nearest_p016le_yuv420p_uv .text.Subsample_Nearest_p016le_yuv420p .nv.info.Subsample_Nearest_p016le_yuv420p .nv.shared.Subsample_Nearest_p016le_yuv420p .nv.constant2.Subsample_Nearest_p016le_yuv420p $__internal_295_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p016le_yuv420p .nv.constant0.Subsample_Nearest_p016le_yuv420p .text.Subsample_Nearest_p010le_yuv420p_uv .nv.info.Subsample_Nearest_p010le_yuv420p_uv .nv.shared.Subsample_Nearest_p010le_yuv420p_uv .nv.constant2.Subsample_Nearest_p010le_yuv420p_uv $__internal_296_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv420p_uv .nv.constant0.Subsample_Nearest_p010le_yuv420p_uv .text.Subsample_Nearest_p010le_yuv420p .nv.info.Subsample_Nearest_p010le_yuv420p .nv.shared.Subsample_Nearest_p010le_yuv420p .nv.constant2.Subsample_Nearest_p010le_yuv420p $__internal_297_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_p010le_yuv420p .nv.constant0.Subsample_Nearest_p010le_yuv420p .text.Subsample_Nearest_yuv444p_yuv420p_uv .nv.info.Subsample_Nearest_yuv444p_yuv420p_uv .nv.shared.Subsample_Nearest_yuv444p_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv444p_yuv420p_uv $__internal_298_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv444p_yuv420p_uv .text.Subsample_Nearest_yuv444p_yuv420p .nv.info.Subsample_Nearest_yuv444p_yuv420p .nv.shared.Subsample_Nearest_yuv444p_yuv420p .nv.constant2.Subsample_Nearest_yuv444p_yuv420p $__internal_299_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv444p_yuv420p .nv.constant0.Subsample_Nearest_yuv444p_yuv420p .text.Subsample_Nearest_nv12_yuv420p_uv .nv.info.Subsample_Nearest_nv12_yuv420p_uv .nv.shared.Subsample_Nearest_nv12_yuv420p_uv .nv.constant2.Subsample_Nearest_nv12_yuv420p_uv $__internal_300_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv420p_uv .nv.constant0.Subsample_Nearest_nv12_yuv420p_uv .text.Subsample_Nearest_nv12_yuv420p .nv.info.Subsample_Nearest_nv12_yuv420p .nv.shared.Subsample_Nearest_nv12_yuv420p .nv.constant2.Subsample_Nearest_nv12_yuv420p $__internal_301_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_nv12_yuv420p .nv.constant0.Subsample_Nearest_nv12_yuv420p .text.Subsample_Nearest_yuv420p_yuv420p_uv .nv.info.Subsample_Nearest_yuv420p_yuv420p_uv .nv.shared.Subsample_Nearest_yuv420p_yuv420p_uv .nv.constant2.Subsample_Nearest_yuv420p_yuv420p_uv $__internal_302_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv420p_uv .nv.constant0.Subsample_Nearest_yuv420p_yuv420p_uv .text.Subsample_Nearest_yuv420p_yuv420p .nv.info.Subsample_Nearest_yuv420p_yuv420p .nv.shared.Subsample_Nearest_yuv420p_yuv420p .nv.constant2.Subsample_Nearest_yuv420p_yuv420p $__internal_303_$__cuda_sm3x_div_rn_noftz_f32_slowpath .rel.nv.constant0.Subsample_Nearest_yuv420p_yuv420p .nv.constant0.Subsample_Nearest_yuv420p_yuv420p .nv.callgraph .nv.prototype .nv.rel.action Subsample_Lanczos_rgb0_bgr0_uv Subsample_Lanczos_rgb0_bgr0 Subsample_Lanczos_bgr0_rgb0_uv Subsample_Lanczos_bgr0_rgb0 Subsample_Lanczos_rgb0_rgb0_uv Subsample_Lanczos_rgb0_rgb0 Subsample_Lanczos_bgr0_bgr0_uv Subsample_Lanczos_bgr0_bgr0 Subsample_Lanczos_yuv444p16le_yuv444p16le_uv Subsample_Lanczos_yuv444p16le_yuv444p16le Subsample_Lanczos_p016le_yuv444p16le_uv Subsample_Lanczos_p016le_yuv444p16le Subsample_Lanczos_p010le_yuv444p16le_uv Subsample_Lanczos_p010le_yuv444p16le Subsample_Lanczos_yuv444p_yuv444p16le_uv Subsample_Lanczos_yuv444p_yuv444p16le Subsample_Lanczos_nv12_yuv444p16le_uv Subsample_Lanczos_nv12_yuv444p16le Subsample_Lanczos_yuv420p_yuv444p16le_uv Subsample_Lanczos_yuv420p_yuv444p16le Subsample_Lanczos_yuv444p16le_p016le_uv Subsample_Lanczos_yuv444p16le_p016le Subsample_Lanczos_p016le_p016le_uv Subsample_Lanczos_p016le_p016le Subsample_Lanczos_p010le_p016le_uv Subsample_Lanczos_p010le_p016le Subsample_Lanczos_yuv444p_p016le_uv Subsample_Lanczos_yuv444p_p016le Subsample_Lanczos_nv12_p016le_uv Subsample_Lanczos_nv12_p016le Subsample_Lanczos_yuv420p_p016le_uv Subsample_Lanczos_yuv420p_p016le Subsample_Lanczos_yuv444p16le_p010le_uv Subsample_Lanczos_yuv444p16le_p010le Subsample_Lanczos_p016le_p010le_uv Subsample_Lanczos_p016le_p010le Subsample_Lanczos_p010le_p010le_uv Subsample_Lanczos_p010le_p010le Subsample_Lanczos_yuv444p_p010le_uv Subsample_Lanczos_yuv444p_p010le Subsample_Lanczos_nv12_p010le_uv Subsample_Lanczos_nv12_p010le Subsample_Lanczos_yuv420p_p010le_uv Subsample_Lanczos_yuv420p_p010le Subsample_Lanczos_yuv444p16le_yuv444p_uv Subsample_Lanczos_yuv444p16le_yuv444p Subsample_Lanczos_p016le_yuv444p_uv Subsample_Lanczos_p016le_yuv444p Subsample_Lanczos_p010le_yuv444p_uv Subsample_Lanczos_p010le_yuv444p Subsample_Lanczos_yuv444p_yuv444p_uv Subsample_Lanczos_yuv444p_yuv444p Subsample_Lanczos_nv12_yuv444p_uv Subsample_Lanczos_nv12_yuv444p Subsample_Lanczos_yuv420p_yuv444p_uv Subsample_Lanczos_yuv420p_yuv444p Subsample_Lanczos_yuv444p16le_nv12_uv Subsample_Lanczos_yuv444p16le_nv12 Subsample_Lanczos_p016le_nv12_uv Subsample_Lanczos_p016le_nv12 Subsample_Lanczos_p010le_nv12_uv Subsample_Lanczos_p010le_nv12 Subsample_Lanczos_yuv444p_nv12_uv Subsample_Lanczos_yuv444p_nv12 Subsample_Lanczos_nv12_nv12_uv Subsample_Lanczos_nv12_nv12 Subsample_Lanczos_yuv420p_nv12_uv Subsample_Lanczos_yuv420p_nv12 Subsample_Lanczos_yuv444p16le_yuv420p_uv Subsample_Lanczos_yuv444p16le_yuv420p Subsample_Lanczos_p016le_yuv420p_uv Subsample_Lanczos_p016le_yuv420p Subsample_Lanczos_p010le_yuv420p_uv Subsample_Lanczos_p010le_yuv420p Subsample_Lanczos_yuv444p_yuv420p_uv Subsample_Lanczos_yuv444p_yuv420p Subsample_Lanczos_nv12_yuv420p_uv Subsample_Lanczos_nv12_yuv420p Subsample_Lanczos_yuv420p_yuv420p_uv Subsample_Lanczos_yuv420p_yuv420p Subsample_Bicubic_rgb0_bgr0_uv Subsample_Bicubic_rgb0_bgr0 Subsample_Bicubic_bgr0_rgb0_uv Subsample_Bicubic_bgr0_rgb0 Subsample_Bicubic_rgb0_rgb0_uv Subsample_Bicubic_rgb0_rgb0 Subsample_Bicubic_bgr0_bgr0_uv Subsample_Bicubic_bgr0_bgr0 Subsample_Bicubic_yuv444p16le_yuv444p16le_uv Subsample_Bicubic_yuv444p16le_yuv444p16le Subsample_Bicubic_p016le_yuv444p16le_uv Subsample_Bicubic_p016le_yuv444p16le Subsample_Bicubic_p010le_yuv444p16le_uv Subsample_Bicubic_p010le_yuv444p16le Subsample_Bicubic_yuv444p_yuv444p16le_uv Subsample_Bicubic_yuv444p_yuv444p16le Subsample_Bicubic_nv12_yuv444p16le_uv Subsample_Bicubic_nv12_yuv444p16le Subsample_Bicubic_yuv420p_yuv444p16le_uv Subsample_Bicubic_yuv420p_yuv444p16le Subsample_Bicubic_yuv444p16le_p016le_uv Subsample_Bicubic_yuv444p16le_p016le Subsample_Bicubic_p016le_p016le_uv Subsample_Bicubic_p016le_p016le Subsample_Bicubic_p010le_p016le_uv Subsample_Bicubic_p010le_p016le Subsample_Bicubic_yuv444p_p016le_uv Subsample_Bicubic_yuv444p_p016le Subsample_Bicubic_nv12_p016le_uv Subsample_Bicubic_nv12_p016le Subsample_Bicubic_yuv420p_p016le_uv Subsample_Bicubic_yuv420p_p016le Subsample_Bicubic_yuv444p16le_p010le_uv Subsample_Bicubic_yuv444p16le_p010le Subsample_Bicubic_p016le_p010le_uv Subsample_Bicubic_p016le_p010le Subsample_Bicubic_p010le_p010le_uv Subsample_Bicubic_p010le_p010le Subsample_Bicubic_yuv444p_p010le_uv Subsample_Bicubic_yuv444p_p010le Subsample_Bicubic_nv12_p010le_uv Subsample_Bicubic_nv12_p010le Subsample_Bicubic_yuv420p_p010le_uv Subsample_Bicubic_yuv420p_p010le Subsample_Bicubic_yuv444p16le_yuv444p_uv Subsample_Bicubic_yuv444p16le_yuv444p Subsample_Bicubic_p016le_yuv444p_uv Subsample_Bicubic_p016le_yuv444p Subsample_Bicubic_p010le_yuv444p_uv Subsample_Bicubic_p010le_yuv444p Subsample_Bicubic_yuv444p_yuv444p_uv Subsample_Bicubic_yuv444p_yuv444p Subsample_Bicubic_nv12_yuv444p_uv Subsample_Bicubic_nv12_yuv444p Subsample_Bicubic_yuv420p_yuv444p_uv Subsample_Bicubic_yuv420p_yuv444p Subsample_Bicubic_yuv444p16le_nv12_uv Subsample_Bicubic_yuv444p16le_nv12 Subsample_Bicubic_p016le_nv12_uv Subsample_Bicubic_p016le_nv12 Subsample_Bicubic_p010le_nv12_uv Subsample_Bicubic_p010le_nv12 Subsample_Bicubic_yuv444p_nv12_uv Subsample_Bicubic_yuv444p_nv12 Subsample_Bicubic_nv12_nv12_uv Subsample_Bicubic_nv12_nv12 Subsample_Bicubic_yuv420p_nv12_uv Subsample_Bicubic_yuv420p_nv12 Subsample_Bicubic_yuv444p16le_yuv420p_uv Subsample_Bicubic_yuv444p16le_yuv420p Subsample_Bicubic_p016le_yuv420p_uv Subsample_Bicubic_p016le_yuv420p Subsample_Bicubic_p010le_yuv420p_uv Subsample_Bicubic_p010le_yuv420p Subsample_Bicubic_yuv444p_yuv420p_uv Subsample_Bicubic_yuv444p_yuv420p Subsample_Bicubic_nv12_yuv420p_uv Subsample_Bicubic_nv12_yuv420p Subsample_Bicubic_yuv420p_yuv420p_uv Subsample_Bicubic_yuv420p_yuv420p Subsample_Bilinear_rgb0_bgr0_uv Subsample_Bilinear_rgb0_bgr0 Subsample_Bilinear_bgr0_rgb0_uv Subsample_Bilinear_bgr0_rgb0 Subsample_Bilinear_rgb0_rgb0_uv Subsample_Bilinear_rgb0_rgb0 Subsample_Bilinear_bgr0_bgr0_uv Subsample_Bilinear_bgr0_bgr0 Subsample_Bilinear_yuv444p16le_yuv444p16le_uv Subsample_Bilinear_yuv444p16le_yuv444p16le Subsample_Bilinear_p016le_yuv444p16le_uv Subsample_Bilinear_p016le_yuv444p16le Subsample_Bilinear_p010le_yuv444p16le_uv Subsample_Bilinear_p010le_yuv444p16le Subsample_Bilinear_yuv444p_yuv444p16le_uv Subsample_Bilinear_yuv444p_yuv444p16le Subsample_Bilinear_nv12_yuv444p16le_uv Subsample_Bilinear_nv12_yuv444p16le Subsample_Bilinear_yuv420p_yuv444p16le_uv Subsample_Bilinear_yuv420p_yuv444p16le Subsample_Bilinear_yuv444p16le_p016le_uv Subsample_Bilinear_yuv444p16le_p016le Subsample_Bilinear_p016le_p016le_uv Subsample_Bilinear_p016le_p016le Subsample_Bilinear_p010le_p016le_uv Subsample_Bilinear_p010le_p016le Subsample_Bilinear_yuv444p_p016le_uv Subsample_Bilinear_yuv444p_p016le Subsample_Bilinear_nv12_p016le_uv Subsample_Bilinear_nv12_p016le Subsample_Bilinear_yuv420p_p016le_uv Subsample_Bilinear_yuv420p_p016le Subsample_Bilinear_yuv444p16le_p010le_uv Subsample_Bilinear_yuv444p16le_p010le Subsample_Bilinear_p016le_p010le_uv Subsample_Bilinear_p016le_p010le Subsample_Bilinear_p010le_p010le_uv Subsample_Bilinear_p010le_p010le Subsample_Bilinear_yuv444p_p010le_uv Subsample_Bilinear_yuv444p_p010le Subsample_Bilinear_nv12_p010le_uv Subsample_Bilinear_nv12_p010le Subsample_Bilinear_yuv420p_p010le_uv Subsample_Bilinear_yuv420p_p010le Subsample_Bilinear_yuv444p16le_yuv444p_uv Subsample_Bilinear_yuv444p16le_yuv444p Subsample_Bilinear_p016le_yuv444p_uv Subsample_Bilinear_p016le_yuv444p Subsample_Bilinear_p010le_yuv444p_uv Subsample_Bilinear_p010le_yuv444p Subsample_Bilinear_yuv444p_yuv444p_uv Subsample_Bilinear_yuv444p_yuv444p Subsample_Bilinear_nv12_yuv444p_uv Subsample_Bilinear_nv12_yuv444p Subsample_Bilinear_yuv420p_yuv444p_uv Subsample_Bilinear_yuv420p_yuv444p Subsample_Bilinear_yuv444p16le_nv12_uv Subsample_Bilinear_yuv444p16le_nv12 Subsample_Bilinear_p016le_nv12_uv Subsample_Bilinear_p016le_nv12 Subsample_Bilinear_p010le_nv12_uv Subsample_Bilinear_p010le_nv12 Subsample_Bilinear_yuv444p_nv12_uv Subsample_Bilinear_yuv444p_nv12 Subsample_Bilinear_nv12_nv12_uv Subsample_Bilinear_nv12_nv12 Subsample_Bilinear_yuv420p_nv12_uv Subsample_Bilinear_yuv420p_nv12 Subsample_Bilinear_yuv444p16le_yuv420p_uv Subsample_Bilinear_yuv444p16le_yuv420p Subsample_Bilinear_p016le_yuv420p_uv Subsample_Bilinear_p016le_yuv420p Subsample_Bilinear_p010le_yuv420p_uv Subsample_Bilinear_p010le_yuv420p Subsample_Bilinear_yuv444p_yuv420p_uv Subsample_Bilinear_yuv444p_yuv420p Subsample_Bilinear_nv12_yuv420p_uv Subsample_Bilinear_nv12_yuv420p Subsample_Bilinear_yuv420p_yuv420p_uv Subsample_Bilinear_yuv420p_yuv420p Subsample_Nearest_rgb0_bgr0_uv Subsample_Nearest_rgb0_bgr0 Subsample_Nearest_bgr0_rgb0_uv Subsample_Nearest_bgr0_rgb0 Subsample_Nearest_rgb0_rgb0_uv Subsample_Nearest_rgb0_rgb0 Subsample_Nearest_bgr0_bgr0_uv Subsample_Nearest_bgr0_bgr0 Subsample_Nearest_yuv444p16le_yuv444p16le_uv Subsample_Nearest_yuv444p16le_yuv444p16le Subsample_Nearest_p016le_yuv444p16le_uv Subsample_Nearest_p016le_yuv444p16le Subsample_Nearest_p010le_yuv444p16le_uv Subsample_Nearest_p010le_yuv444p16le Subsample_Nearest_yuv444p_yuv444p16le_uv Subsample_Nearest_yuv444p_yuv444p16le Subsample_Nearest_nv12_yuv444p16le_uv Subsample_Nearest_nv12_yuv444p16le Subsample_Nearest_yuv420p_yuv444p16le_uv Subsample_Nearest_yuv420p_yuv444p16le Subsample_Nearest_yuv444p16le_p016le_uv Subsample_Nearest_yuv444p16le_p016le Subsample_Nearest_p016le_p016le_uv Subsample_Nearest_p016le_p016le Subsample_Nearest_p010le_p016le_uv Subsample_Nearest_p010le_p016le Subsample_Nearest_yuv444p_p016le_uv Subsample_Nearest_yuv444p_p016le Subsample_Nearest_nv12_p016le_uv Subsample_Nearest_nv12_p016le Subsample_Nearest_yuv420p_p016le_uv Subsample_Nearest_yuv420p_p016le Subsample_Nearest_yuv444p16le_p010le_uv Subsample_Nearest_yuv444p16le_p010le Subsample_Nearest_p016le_p010le_uv Subsample_Nearest_p016le_p010le Subsample_Nearest_p010le_p010le_uv Subsample_Nearest_p010le_p010le Subsample_Nearest_yuv444p_p010le_uv Subsample_Nearest_yuv444p_p010le Subsample_Nearest_nv12_p010le_uv Subsample_Nearest_nv12_p010le Subsample_Nearest_yuv420p_p010le_uv Subsample_Nearest_yuv420p_p010le Subsample_Nearest_yuv444p16le_yuv444p_uv Subsample_Nearest_yuv444p16le_yuv444p Subsample_Nearest_p016le_yuv444p_uv Subsample_Nearest_p016le_yuv444p Subsample_Nearest_p010le_yuv444p_uv Subsample_Nearest_p010le_yuv444p Subsample_Nearest_yuv444p_yuv444p_uv Subsample_Nearest_yuv444p_yuv444p Subsample_Nearest_nv12_yuv444p_uv Subsample_Nearest_nv12_yuv444p Subsample_Nearest_yuv420p_yuv444p_uv Subsample_Nearest_yuv420p_yuv444p Subsample_Nearest_yuv444p16le_nv12_uv Subsample_Nearest_yuv444p16le_nv12 Subsample_Nearest_p016le_nv12_uv Subsample_Nearest_p016le_nv12 Subsample_Nearest_p010le_nv12_uv Subsample_Nearest_p010le_nv12 Subsample_Nearest_yuv444p_nv12_uv Subsample_Nearest_yuv444p_nv12 Subsample_Nearest_nv12_nv12_uv Subsample_Nearest_nv12_nv12 Subsample_Nearest_yuv420p_nv12_uv Subsample_Nearest_yuv420p_nv12 Subsample_Nearest_yuv444p16le_yuv420p_uv Subsample_Nearest_yuv444p16le_yuv420p Subsample_Nearest_p016le_yuv420p_uv Subsample_Nearest_p016le_yuv420p Subsample_Nearest_p010le_yuv420p_uv Subsample_Nearest_p010le_yuv420p Subsample_Nearest_yuv444p_yuv420p_uv Subsample_Nearest_yuv444p_yuv420p Subsample_Nearest_nv12_yuv420p_uv Subsample_Nearest_nv12_yuv420p Subsample_Nearest_yuv420p_yuv420p_uv Subsample_Nearest_yuv420p_yuv420p 2 � � G � u H � � � "