aacdec.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /*
  2. * AAC decoder
  3. * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
  4. * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
  5. * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
  6. *
  7. * AAC LATM decoder
  8. * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
  9. * Copyright (c) 2010 Janne Grunau <janne-libav@jannau.net>
  10. *
  11. * This file is part of FFmpeg.
  12. *
  13. * FFmpeg is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * FFmpeg is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with FFmpeg; if not, write to the Free Software
  25. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26. */
  27. /**
  28. * @file
  29. * AAC decoder
  30. * @author Oded Shimon ( ods15 ods15 dyndns org )
  31. * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  32. */
  33. #define FFT_FLOAT 1
  34. #define FFT_FIXED_32 0
  35. #define USE_FIXED 0
  36. #include "libavutil/float_dsp.h"
  37. #include "libavutil/opt.h"
  38. #include "avcodec.h"
  39. #include "internal.h"
  40. #include "get_bits.h"
  41. #include "fft.h"
  42. #include "mdct15.h"
  43. #include "lpc.h"
  44. #include "kbdwin.h"
  45. #include "sinewin.h"
  46. #include "aac.h"
  47. #include "aactab.h"
  48. #include "aacdectab.h"
  49. #include "adts_header.h"
  50. #include "cbrt_data.h"
  51. #include "sbr.h"
  52. #include "aacsbr.h"
  53. #include "mpeg4audio.h"
  54. #include "profiles.h"
  55. #include "libavutil/intfloat.h"
  56. #include <errno.h>
  57. #include <math.h>
  58. #include <stdint.h>
  59. #include <string.h>
  60. #if ARCH_ARM
  61. # include "arm/aac.h"
  62. #elif ARCH_MIPS
  63. # include "mips/aacdec_mips.h"
  64. #endif
  65. static av_always_inline void reset_predict_state(PredictorState *ps)
  66. {
  67. ps->r0 = 0.0f;
  68. ps->r1 = 0.0f;
  69. ps->cor0 = 0.0f;
  70. ps->cor1 = 0.0f;
  71. ps->var0 = 1.0f;
  72. ps->var1 = 1.0f;
  73. }
  74. #ifndef VMUL2
  75. static inline float *VMUL2(float *dst, const float *v, unsigned idx,
  76. const float *scale)
  77. {
  78. float s = *scale;
  79. *dst++ = v[idx & 15] * s;
  80. *dst++ = v[idx>>4 & 15] * s;
  81. return dst;
  82. }
  83. #endif
  84. #ifndef VMUL4
  85. static inline float *VMUL4(float *dst, const float *v, unsigned idx,
  86. const float *scale)
  87. {
  88. float s = *scale;
  89. *dst++ = v[idx & 3] * s;
  90. *dst++ = v[idx>>2 & 3] * s;
  91. *dst++ = v[idx>>4 & 3] * s;
  92. *dst++ = v[idx>>6 & 3] * s;
  93. return dst;
  94. }
  95. #endif
  96. #ifndef VMUL2S
  97. static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
  98. unsigned sign, const float *scale)
  99. {
  100. union av_intfloat32 s0, s1;
  101. s0.f = s1.f = *scale;
  102. s0.i ^= sign >> 1 << 31;
  103. s1.i ^= sign << 31;
  104. *dst++ = v[idx & 15] * s0.f;
  105. *dst++ = v[idx>>4 & 15] * s1.f;
  106. return dst;
  107. }
  108. #endif
  109. #ifndef VMUL4S
  110. static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
  111. unsigned sign, const float *scale)
  112. {
  113. unsigned nz = idx >> 12;
  114. union av_intfloat32 s = { .f = *scale };
  115. union av_intfloat32 t;
  116. t.i = s.i ^ (sign & 1U<<31);
  117. *dst++ = v[idx & 3] * t.f;
  118. sign <<= nz & 1; nz >>= 1;
  119. t.i = s.i ^ (sign & 1U<<31);
  120. *dst++ = v[idx>>2 & 3] * t.f;
  121. sign <<= nz & 1; nz >>= 1;
  122. t.i = s.i ^ (sign & 1U<<31);
  123. *dst++ = v[idx>>4 & 3] * t.f;
  124. sign <<= nz & 1;
  125. t.i = s.i ^ (sign & 1U<<31);
  126. *dst++ = v[idx>>6 & 3] * t.f;
  127. return dst;
  128. }
  129. #endif
  130. static av_always_inline float flt16_round(float pf)
  131. {
  132. union av_intfloat32 tmp;
  133. tmp.f = pf;
  134. tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
  135. return tmp.f;
  136. }
  137. static av_always_inline float flt16_even(float pf)
  138. {
  139. union av_intfloat32 tmp;
  140. tmp.f = pf;
  141. tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
  142. return tmp.f;
  143. }
  144. static av_always_inline float flt16_trunc(float pf)
  145. {
  146. union av_intfloat32 pun;
  147. pun.f = pf;
  148. pun.i &= 0xFFFF0000U;
  149. return pun.f;
  150. }
  151. static av_always_inline void predict(PredictorState *ps, float *coef,
  152. int output_enable)
  153. {
  154. const float a = 0.953125; // 61.0 / 64
  155. const float alpha = 0.90625; // 29.0 / 32
  156. float e0, e1;
  157. float pv;
  158. float k1, k2;
  159. float r0 = ps->r0, r1 = ps->r1;
  160. float cor0 = ps->cor0, cor1 = ps->cor1;
  161. float var0 = ps->var0, var1 = ps->var1;
  162. k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
  163. k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
  164. pv = flt16_round(k1 * r0 + k2 * r1);
  165. if (output_enable)
  166. *coef += pv;
  167. e0 = *coef;
  168. e1 = e0 - k1 * r0;
  169. ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
  170. ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
  171. ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
  172. ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
  173. ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
  174. ps->r0 = flt16_trunc(a * e0);
  175. }
  176. /**
  177. * Apply dependent channel coupling (applied before IMDCT).
  178. *
  179. * @param index index into coupling gain array
  180. */
  181. static void apply_dependent_coupling(AACContext *ac,
  182. SingleChannelElement *target,
  183. ChannelElement *cce, int index)
  184. {
  185. IndividualChannelStream *ics = &cce->ch[0].ics;
  186. const uint16_t *offsets = ics->swb_offset;
  187. float *dest = target->coeffs;
  188. const float *src = cce->ch[0].coeffs;
  189. int g, i, group, k, idx = 0;
  190. if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
  191. av_log(ac->avctx, AV_LOG_ERROR,
  192. "Dependent coupling is not supported together with LTP\n");
  193. return;
  194. }
  195. for (g = 0; g < ics->num_window_groups; g++) {
  196. for (i = 0; i < ics->max_sfb; i++, idx++) {
  197. if (cce->ch[0].band_type[idx] != ZERO_BT) {
  198. const float gain = cce->coup.gain[index][idx];
  199. for (group = 0; group < ics->group_len[g]; group++) {
  200. for (k = offsets[i]; k < offsets[i + 1]; k++) {
  201. // FIXME: SIMDify
  202. dest[group * 128 + k] += gain * src[group * 128 + k];
  203. }
  204. }
  205. }
  206. }
  207. dest += ics->group_len[g] * 128;
  208. src += ics->group_len[g] * 128;
  209. }
  210. }
  211. /**
  212. * Apply independent channel coupling (applied after IMDCT).
  213. *
  214. * @param index index into coupling gain array
  215. */
  216. static void apply_independent_coupling(AACContext *ac,
  217. SingleChannelElement *target,
  218. ChannelElement *cce, int index)
  219. {
  220. int i;
  221. const float gain = cce->coup.gain[index][0];
  222. const float *src = cce->ch[0].ret;
  223. float *dest = target->ret;
  224. const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
  225. for (i = 0; i < len; i++)
  226. dest[i] += gain * src[i];
  227. }
  228. #include "aacdec_template.c"
  229. #define LOAS_SYNC_WORD 0x2b7 ///< 11 bits LOAS sync word
  230. struct LATMContext {
  231. AACContext aac_ctx; ///< containing AACContext
  232. int initialized; ///< initialized after a valid extradata was seen
  233. // parser data
  234. int audio_mux_version_A; ///< LATM syntax version
  235. int frame_length_type; ///< 0/1 variable/fixed frame length
  236. int frame_length; ///< frame length for fixed frame length
  237. };
  238. static inline uint32_t latm_get_value(GetBitContext *b)
  239. {
  240. int length = get_bits(b, 2);
  241. return get_bits_long(b, (length+1)*8);
  242. }
  243. static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
  244. GetBitContext *gb, int asclen)
  245. {
  246. AACContext *ac = &latmctx->aac_ctx;
  247. AVCodecContext *avctx = ac->avctx;
  248. MPEG4AudioConfig m4ac = { 0 };
  249. GetBitContext gbc;
  250. int config_start_bit = get_bits_count(gb);
  251. int sync_extension = 0;
  252. int bits_consumed, esize, i;
  253. if (asclen > 0) {
  254. sync_extension = 1;
  255. asclen = FFMIN(asclen, get_bits_left(gb));
  256. init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
  257. skip_bits_long(&gbc, config_start_bit);
  258. } else if (asclen == 0) {
  259. gbc = *gb;
  260. } else {
  261. return AVERROR_INVALIDDATA;
  262. }
  263. if (get_bits_left(gb) <= 0)
  264. return AVERROR_INVALIDDATA;
  265. bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
  266. &gbc, config_start_bit,
  267. sync_extension);
  268. if (bits_consumed < config_start_bit)
  269. return AVERROR_INVALIDDATA;
  270. bits_consumed -= config_start_bit;
  271. if (asclen == 0)
  272. asclen = bits_consumed;
  273. if (!latmctx->initialized ||
  274. ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
  275. ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
  276. if (latmctx->initialized) {
  277. av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
  278. } else {
  279. av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
  280. }
  281. latmctx->initialized = 0;
  282. esize = (asclen + 7) / 8;
  283. if (avctx->extradata_size < esize) {
  284. av_free(avctx->extradata);
  285. avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
  286. if (!avctx->extradata)
  287. return AVERROR(ENOMEM);
  288. }
  289. avctx->extradata_size = esize;
  290. gbc = *gb;
  291. for (i = 0; i < esize; i++) {
  292. avctx->extradata[i] = get_bits(&gbc, 8);
  293. }
  294. memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
  295. }
  296. skip_bits_long(gb, asclen);
  297. return 0;
  298. }
  299. static int read_stream_mux_config(struct LATMContext *latmctx,
  300. GetBitContext *gb)
  301. {
  302. int ret, audio_mux_version = get_bits(gb, 1);
  303. latmctx->audio_mux_version_A = 0;
  304. if (audio_mux_version)
  305. latmctx->audio_mux_version_A = get_bits(gb, 1);
  306. if (!latmctx->audio_mux_version_A) {
  307. if (audio_mux_version)
  308. latm_get_value(gb); // taraFullness
  309. skip_bits(gb, 1); // allStreamSameTimeFraming
  310. skip_bits(gb, 6); // numSubFrames
  311. // numPrograms
  312. if (get_bits(gb, 4)) { // numPrograms
  313. avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
  314. return AVERROR_PATCHWELCOME;
  315. }
  316. // for each program (which there is only one in DVB)
  317. // for each layer (which there is only one in DVB)
  318. if (get_bits(gb, 3)) { // numLayer
  319. avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
  320. return AVERROR_PATCHWELCOME;
  321. }
  322. // for all but first stream: use_same_config = get_bits(gb, 1);
  323. if (!audio_mux_version) {
  324. if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
  325. return ret;
  326. } else {
  327. int ascLen = latm_get_value(gb);
  328. if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
  329. return ret;
  330. }
  331. latmctx->frame_length_type = get_bits(gb, 3);
  332. switch (latmctx->frame_length_type) {
  333. case 0:
  334. skip_bits(gb, 8); // latmBufferFullness
  335. break;
  336. case 1:
  337. latmctx->frame_length = get_bits(gb, 9);
  338. break;
  339. case 3:
  340. case 4:
  341. case 5:
  342. skip_bits(gb, 6); // CELP frame length table index
  343. break;
  344. case 6:
  345. case 7:
  346. skip_bits(gb, 1); // HVXC frame length table index
  347. break;
  348. }
  349. if (get_bits(gb, 1)) { // other data
  350. if (audio_mux_version) {
  351. latm_get_value(gb); // other_data_bits
  352. } else {
  353. int esc;
  354. do {
  355. if (get_bits_left(gb) < 9)
  356. return AVERROR_INVALIDDATA;
  357. esc = get_bits(gb, 1);
  358. skip_bits(gb, 8);
  359. } while (esc);
  360. }
  361. }
  362. if (get_bits(gb, 1)) // crc present
  363. skip_bits(gb, 8); // config_crc
  364. }
  365. return 0;
  366. }
  367. static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
  368. {
  369. uint8_t tmp;
  370. if (ctx->frame_length_type == 0) {
  371. int mux_slot_length = 0;
  372. do {
  373. if (get_bits_left(gb) < 8)
  374. return AVERROR_INVALIDDATA;
  375. tmp = get_bits(gb, 8);
  376. mux_slot_length += tmp;
  377. } while (tmp == 255);
  378. return mux_slot_length;
  379. } else if (ctx->frame_length_type == 1) {
  380. return ctx->frame_length;
  381. } else if (ctx->frame_length_type == 3 ||
  382. ctx->frame_length_type == 5 ||
  383. ctx->frame_length_type == 7) {
  384. skip_bits(gb, 2); // mux_slot_length_coded
  385. }
  386. return 0;
  387. }
  388. static int read_audio_mux_element(struct LATMContext *latmctx,
  389. GetBitContext *gb)
  390. {
  391. int err;
  392. uint8_t use_same_mux = get_bits(gb, 1);
  393. if (!use_same_mux) {
  394. if ((err = read_stream_mux_config(latmctx, gb)) < 0)
  395. return err;
  396. } else if (!latmctx->aac_ctx.avctx->extradata) {
  397. av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
  398. "no decoder config found\n");
  399. return 1;
  400. }
  401. if (latmctx->audio_mux_version_A == 0) {
  402. int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
  403. if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
  404. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
  405. return AVERROR_INVALIDDATA;
  406. } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
  407. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
  408. "frame length mismatch %d << %d\n",
  409. mux_slot_length_bytes * 8, get_bits_left(gb));
  410. return AVERROR_INVALIDDATA;
  411. }
  412. }
  413. return 0;
  414. }
  415. static int latm_decode_frame(AVCodecContext *avctx, void *out,
  416. int *got_frame_ptr, AVPacket *avpkt)
  417. {
  418. struct LATMContext *latmctx = avctx->priv_data;
  419. int muxlength, err;
  420. GetBitContext gb;
  421. if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
  422. return err;
  423. // check for LOAS sync word
  424. if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
  425. return AVERROR_INVALIDDATA;
  426. muxlength = get_bits(&gb, 13) + 3;
  427. // not enough data, the parser should have sorted this out
  428. if (muxlength > avpkt->size)
  429. return AVERROR_INVALIDDATA;
  430. if ((err = read_audio_mux_element(latmctx, &gb)))
  431. return (err < 0) ? err : avpkt->size;
  432. if (!latmctx->initialized) {
  433. if (!avctx->extradata) {
  434. *got_frame_ptr = 0;
  435. return avpkt->size;
  436. } else {
  437. push_output_configuration(&latmctx->aac_ctx);
  438. if ((err = decode_audio_specific_config(
  439. &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
  440. avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
  441. pop_output_configuration(&latmctx->aac_ctx);
  442. return err;
  443. }
  444. latmctx->initialized = 1;
  445. }
  446. }
  447. if (show_bits(&gb, 12) == 0xfff) {
  448. av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
  449. "ADTS header detected, probably as result of configuration "
  450. "misparsing\n");
  451. return AVERROR_INVALIDDATA;
  452. }
  453. switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
  454. case AOT_ER_AAC_LC:
  455. case AOT_ER_AAC_LTP:
  456. case AOT_ER_AAC_LD:
  457. case AOT_ER_AAC_ELD:
  458. err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
  459. break;
  460. default:
  461. err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
  462. }
  463. if (err < 0)
  464. return err;
  465. return muxlength;
  466. }
  467. static av_cold int latm_decode_init(AVCodecContext *avctx)
  468. {
  469. struct LATMContext *latmctx = avctx->priv_data;
  470. int ret = aac_decode_init(avctx);
  471. if (avctx->extradata_size > 0)
  472. latmctx->initialized = !ret;
  473. return ret;
  474. }
  475. AVCodec ff_aac_decoder = {
  476. .name = "aac",
  477. .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
  478. .type = AVMEDIA_TYPE_AUDIO,
  479. .id = AV_CODEC_ID_AAC,
  480. .priv_data_size = sizeof(AACContext),
  481. .init = aac_decode_init,
  482. .close = aac_decode_close,
  483. .decode = aac_decode_frame,
  484. .sample_fmts = (const enum AVSampleFormat[]) {
  485. AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
  486. },
  487. .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
  488. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  489. .channel_layouts = aac_channel_layout,
  490. .flush = flush,
  491. .priv_class = &aac_decoder_class,
  492. .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
  493. };
  494. /*
  495. Note: This decoder filter is intended to decode LATM streams transferred
  496. in MPEG transport streams which only contain one program.
  497. To do a more complex LATM demuxing a separate LATM demuxer should be used.
  498. */
  499. AVCodec ff_aac_latm_decoder = {
  500. .name = "aac_latm",
  501. .long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
  502. .type = AVMEDIA_TYPE_AUDIO,
  503. .id = AV_CODEC_ID_AAC_LATM,
  504. .priv_data_size = sizeof(struct LATMContext),
  505. .init = latm_decode_init,
  506. .close = aac_decode_close,
  507. .decode = latm_decode_frame,
  508. .sample_fmts = (const enum AVSampleFormat[]) {
  509. AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
  510. },
  511. .capabilities = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
  512. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  513. .channel_layouts = aac_channel_layout,
  514. .flush = flush,
  515. .profiles = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
  516. };