123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- /*
- * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
- #include "vpx_config.h"
- #include "vp8_rtcd.h"
- #include "vpx_ports/mem.h"
- extern const short vp8_six_tap_x86[8][6 * 8];
- extern void vp8_filter_block1d_h6_mmx(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_filter_block1dc_v6_mmx(
- unsigned short *src_ptr, unsigned char *output_ptr, int output_pitch,
- unsigned int pixels_per_line, unsigned int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_filter_block1d8_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_filter_block1d16_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int pixel_step,
- unsigned int output_height,
- unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_filter_block1d8_v6_sse2(
- unsigned short *src_ptr, unsigned char *output_ptr, int dst_ptich,
- unsigned int pixels_per_line, unsigned int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_filter_block1d16_v6_sse2(
- unsigned short *src_ptr, unsigned char *output_ptr, int dst_ptich,
- unsigned int pixels_per_line, unsigned int pixel_step,
- unsigned int output_height, unsigned int output_width,
- const short *vp8_filter);
- extern void vp8_unpack_block1d16_h6_sse2(unsigned char *src_ptr,
- unsigned short *output_ptr,
- unsigned int src_pixels_per_line,
- unsigned int output_height,
- unsigned int output_width);
- extern void vp8_filter_block1d8_h6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- int dst_ptich,
- unsigned int output_height,
- const short *vp8_filter);
- extern void vp8_filter_block1d16_h6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- int dst_ptich,
- unsigned int output_height,
- const short *vp8_filter);
- extern void vp8_filter_block1d8_v6_only_sse2(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- int dst_ptich,
- unsigned int output_height,
- const short *vp8_filter);
- #if HAVE_MMX
- void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line,
- int xoffset, int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned short,
- FData2[16 * 16]); /* Temp data bufffer used in filtering */
- const short *HFilter, *VFilter;
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2,
- src_pixels_per_line, 1, 9, 8, HFilter);
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4, 4, 4,
- VFilter);
- }
- #endif
- #if HAVE_SSE2
- void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr,
- int src_pixels_per_line, int xoffset,
- int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned short,
- FData2[24 * 24]); /* Temp data bufffer used in filtering */
- const short *HFilter, *VFilter;
- if (xoffset) {
- if (yoffset) {
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,
- src_pixels_per_line, 1, 21, 32, HFilter);
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16,
- dst_pitch, VFilter);
- } else {
- /* First-pass only */
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 16, HFilter);
- }
- } else {
- /* Second-pass only */
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,
- src_pixels_per_line, 21, 32);
- vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16,
- dst_pitch, VFilter);
- }
- }
- void vp8_sixtap_predict8x8_sse2(unsigned char *src_ptr, int src_pixels_per_line,
- int xoffset, int yoffset,
- unsigned char *dst_ptr, int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned short,
- FData2[256]); /* Temp data bufffer used in filtering */
- const short *HFilter, *VFilter;
- if (xoffset) {
- if (yoffset) {
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,
- src_pixels_per_line, 1, 13, 16, HFilter);
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, 8,
- dst_pitch, VFilter);
- } else {
- /* First-pass only */
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 8, HFilter);
- }
- } else {
- /* Second-pass only */
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 8,
- VFilter);
- }
- }
- void vp8_sixtap_predict8x4_sse2(unsigned char *src_ptr, int src_pixels_per_line,
- int xoffset, int yoffset,
- unsigned char *dst_ptr, int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned short,
- FData2[256]); /* Temp data bufffer used in filtering */
- const short *HFilter, *VFilter;
- if (xoffset) {
- if (yoffset) {
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,
- src_pixels_per_line, 1, 9, 16, HFilter);
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, 4,
- dst_pitch, VFilter);
- } else {
- /* First-pass only */
- HFilter = vp8_six_tap_x86[xoffset];
- vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 4, HFilter);
- }
- } else {
- /* Second-pass only */
- VFilter = vp8_six_tap_x86[yoffset];
- vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 4,
- VFilter);
- }
- }
- #endif
- #if HAVE_SSSE3
- extern void vp8_filter_block1d8_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- extern void vp8_filter_block1d16_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- extern void vp8_filter_block1d16_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- extern void vp8_filter_block1d8_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- extern void vp8_filter_block1d4_h6_ssse3(unsigned char *src_ptr,
- unsigned int src_pixels_per_line,
- unsigned char *output_ptr,
- unsigned int output_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr,
- unsigned int src_pitch,
- unsigned char *output_ptr,
- unsigned int out_pitch,
- unsigned int output_height,
- unsigned int vp8_filter_index);
- void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line, int xoffset,
- int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]);
- if (xoffset) {
- if (yoffset) {
- vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, FData2, 16, 21,
- xoffset);
- vp8_filter_block1d16_v6_ssse3(FData2, 16, dst_ptr, dst_pitch, 16,
- yoffset);
- } else {
- /* First-pass only */
- vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 16, xoffset);
- }
- } else {
- if (yoffset) {
- /* Second-pass only */
- vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 16,
- yoffset);
- } else {
- /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
- * yoffset==0) case correctly. Add copy function here to guarantee
- * six-tap function handles all possible offsets. */
- vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
- }
- }
- }
- void vp8_sixtap_predict8x8_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line, int xoffset,
- int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned char, FData2[256]);
- if (xoffset) {
- if (yoffset) {
- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, FData2, 8, 13, xoffset);
- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
- } else {
- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 8, xoffset);
- }
- } else {
- if (yoffset) {
- /* Second-pass only */
- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 8,
- yoffset);
- } else {
- /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
- * yoffset==0) case correctly. Add copy function here to guarantee
- * six-tap function handles all possible offsets. */
- vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
- }
- }
- }
- void vp8_sixtap_predict8x4_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line, int xoffset,
- int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned char, FData2[256]);
- if (xoffset) {
- if (yoffset) {
- vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, FData2, 8, 9, xoffset);
- vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
- } else {
- /* First-pass only */
- vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 4, xoffset);
- }
- } else {
- if (yoffset) {
- /* Second-pass only */
- vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 4,
- yoffset);
- } else {
- /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
- * yoffset==0) case correctly. Add copy function here to guarantee
- * six-tap function handles all possible offsets. */
- vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch);
- }
- }
- }
- void vp8_sixtap_predict4x4_ssse3(unsigned char *src_ptr,
- int src_pixels_per_line, int xoffset,
- int yoffset, unsigned char *dst_ptr,
- int dst_pitch) {
- DECLARE_ALIGNED(16, unsigned char, FData2[4 * 9]);
- if (xoffset) {
- if (yoffset) {
- vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, FData2, 4, 9, xoffset);
- vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
- } else {
- vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr,
- dst_pitch, 4, xoffset);
- }
- } else {
- if (yoffset) {
- vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line),
- src_pixels_per_line, dst_ptr, dst_pitch, 4,
- yoffset);
- } else {
- /* ssse3 second-pass only function couldn't handle (xoffset==0 &&
- * yoffset==0) case correctly. Add copy function here to guarantee
- * six-tap function handles all possible offsets. */
- int r;
- for (r = 0; r < 4; ++r) {
- dst_ptr[0] = src_ptr[0];
- dst_ptr[1] = src_ptr[1];
- dst_ptr[2] = src_ptr[2];
- dst_ptr[3] = src_ptr[3];
- dst_ptr += dst_pitch;
- src_ptr += src_pixels_per_line;
- }
- }
- }
- }
- #endif
|