1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- ;*****************************************************************************
- ;* x86-optimized functions for threshold filter
- ;*
- ;* Copyright (C) 2017 Paul B Mahol
- ;*
- ;* This file is part of FFmpeg.
- ;*
- ;* FFmpeg is free software; you can redistribute it and/or
- ;* modify it under the terms of the GNU Lesser General Public
- ;* License as published by the Free Software Foundation; either
- ;* version 2.1 of the License, or (at your option) any later version.
- ;*
- ;* FFmpeg is distributed in the hope that it will be useful,
- ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;* Lesser General Public License for more details.
- ;*
- ;* You should have received a copy of the GNU Lesser General Public
- ;* License along with FFmpeg; if not, write to the Free Software
- ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;*****************************************************************************
- %include "libavutil/x86/x86util.asm"
- SECTION_RODATA
- pb_128: times 16 db 128
- pb_128_0 : times 8 db 0, 128
- SECTION .text
- ;%1 depth (8 or 16) ; %2 b or w ; %3 constant
- %macro THRESHOLD 3
- %if ARCH_X86_64
- cglobal threshold%1, 10, 13, 5, in, threshold, min, max, out, ilinesize, tlinesize, flinesize, slinesize, olinesize, w, h, x
- mov wd, dword wm
- mov hd, dword hm
- %else
- cglobal threshold%1, 5, 7, 5, in, threshold, min, max, out, w, x
- mov wd, r10m
- %define ilinesizeq r5mp
- %define tlinesizeq r6mp
- %define flinesizeq r7mp
- %define slinesizeq r8mp
- %define olinesizeq r9mp
- %define hd r11mp
- %endif
- VBROADCASTI128 m4, [%3]
- %if %1 == 16
- add wq, wq ; w *= 2 (16 bits instead of 8)
- %endif
- add inq, wq
- add thresholdq, wq
- add minq, wq
- add maxq, wq
- add outq, wq
- neg wq
- .nextrow:
- mov xq, wq
- .loop:
- movu m1, [inq + xq]
- movu m0, [thresholdq + xq]
- movu m2, [minq + xq]
- movu m3, [maxq + xq]
- pxor m0, m4
- pxor m1, m4
- pcmpgt%2 m0, m1
- PBLENDVB m3, m2, m0
- movu [outq + xq], m3
- add xq, mmsize
- jl .loop
- add inq, ilinesizeq
- add thresholdq, tlinesizeq
- add minq, flinesizeq
- add maxq, slinesizeq
- add outq, olinesizeq
- sub hd, 1
- jg .nextrow
- RET
- %endmacro
- INIT_XMM sse4
- THRESHOLD 8, b, pb_128
- THRESHOLD 16, w, pb_128_0
- %if HAVE_AVX2_EXTERNAL
- INIT_YMM avx2
- THRESHOLD 8, b, pb_128
- THRESHOLD 16, w, pb_128_0
- %endif
|