aacenc_pred.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /*
  2. * AAC encoder main-type prediction
  3. * Copyright (C) 2015 Rostislav Pehlivanov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder main-type prediction
  24. * @author Rostislav Pehlivanov ( atomnuker gmail com )
  25. */
  26. #include "aactab.h"
  27. #include "aacenc_pred.h"
  28. #include "aacenc_utils.h"
  29. #include "aacenc_is.h" /* <- Needed for common window distortions */
  30. #include "aacenc_quantization.h"
  31. #define RESTORE_PRED(sce, sfb) \
  32. if (sce->ics.prediction_used[sfb]) {\
  33. sce->ics.prediction_used[sfb] = 0;\
  34. sce->band_type[sfb] = sce->band_alt[sfb];\
  35. }
  36. static inline float flt16_round(float pf)
  37. {
  38. union av_intfloat32 tmp;
  39. tmp.f = pf;
  40. tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
  41. return tmp.f;
  42. }
  43. static inline float flt16_even(float pf)
  44. {
  45. union av_intfloat32 tmp;
  46. tmp.f = pf;
  47. tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
  48. return tmp.f;
  49. }
  50. static inline float flt16_trunc(float pf)
  51. {
  52. union av_intfloat32 pun;
  53. pun.f = pf;
  54. pun.i &= 0xFFFF0000U;
  55. return pun.f;
  56. }
  57. static inline void predict(PredictorState *ps, float *coef, float *rcoef, int set)
  58. {
  59. float k2;
  60. const float a = 0.953125; // 61.0 / 64
  61. const float alpha = 0.90625; // 29.0 / 32
  62. const float k1 = ps->k1;
  63. const float r0 = ps->r0, r1 = ps->r1;
  64. const float cor0 = ps->cor0, cor1 = ps->cor1;
  65. const float var0 = ps->var0, var1 = ps->var1;
  66. const float e0 = *coef - ps->x_est;
  67. const float e1 = e0 - k1 * r0;
  68. if (set)
  69. *coef = e0;
  70. ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
  71. ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
  72. ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
  73. ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
  74. ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
  75. ps->r0 = flt16_trunc(a * e0);
  76. /* Prediction for next frame */
  77. ps->k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
  78. k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
  79. *rcoef = ps->x_est = flt16_round(ps->k1*ps->r0 + k2*ps->r1);
  80. }
  81. static inline void reset_predict_state(PredictorState *ps)
  82. {
  83. ps->r0 = 0.0f;
  84. ps->r1 = 0.0f;
  85. ps->k1 = 0.0f;
  86. ps->cor0 = 0.0f;
  87. ps->cor1 = 0.0f;
  88. ps->var0 = 1.0f;
  89. ps->var1 = 1.0f;
  90. ps->x_est = 0.0f;
  91. }
  92. static inline void reset_all_predictors(PredictorState *ps)
  93. {
  94. int i;
  95. for (i = 0; i < MAX_PREDICTORS; i++)
  96. reset_predict_state(&ps[i]);
  97. }
  98. static inline void reset_predictor_group(SingleChannelElement *sce, int group_num)
  99. {
  100. int i;
  101. PredictorState *ps = sce->predictor_state;
  102. for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
  103. reset_predict_state(&ps[i]);
  104. }
  105. void ff_aac_apply_main_pred(AACEncContext *s, SingleChannelElement *sce)
  106. {
  107. int sfb, k;
  108. const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
  109. if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
  110. for (sfb = 0; sfb < pmax; sfb++) {
  111. for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
  112. predict(&sce->predictor_state[k], &sce->coeffs[k], &sce->prcoeffs[k],
  113. sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
  114. }
  115. }
  116. if (sce->ics.predictor_reset_group) {
  117. reset_predictor_group(sce, sce->ics.predictor_reset_group);
  118. }
  119. } else {
  120. reset_all_predictors(sce->predictor_state);
  121. }
  122. }
  123. /* If inc = 0 you can check if this returns 0 to see if you can reset freely */
  124. static inline int update_counters(IndividualChannelStream *ics, int inc)
  125. {
  126. int i;
  127. for (i = 1; i < 31; i++) {
  128. ics->predictor_reset_count[i] += inc;
  129. if (ics->predictor_reset_count[i] > PRED_RESET_FRAME_MIN)
  130. return i; /* Reset this immediately */
  131. }
  132. return 0;
  133. }
  134. void ff_aac_adjust_common_pred(AACEncContext *s, ChannelElement *cpe)
  135. {
  136. int start, w, w2, g, i, count = 0;
  137. SingleChannelElement *sce0 = &cpe->ch[0];
  138. SingleChannelElement *sce1 = &cpe->ch[1];
  139. const int pmax0 = FFMIN(sce0->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
  140. const int pmax1 = FFMIN(sce1->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
  141. const int pmax = FFMIN(pmax0, pmax1);
  142. if (!cpe->common_window ||
  143. sce0->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE ||
  144. sce1->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
  145. return;
  146. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  147. start = 0;
  148. for (g = 0; g < sce0->ics.num_swb; g++) {
  149. int sfb = w*16+g;
  150. int sum = sce0->ics.prediction_used[sfb] + sce1->ics.prediction_used[sfb];
  151. float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f;
  152. struct AACISError ph_err1, ph_err2, *erf;
  153. if (sfb < PRED_SFB_START || sfb > pmax || sum != 2) {
  154. RESTORE_PRED(sce0, sfb);
  155. RESTORE_PRED(sce1, sfb);
  156. start += sce0->ics.swb_sizes[g];
  157. continue;
  158. }
  159. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  160. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  161. float coef0 = sce0->pcoeffs[start+(w+w2)*128+i];
  162. float coef1 = sce1->pcoeffs[start+(w+w2)*128+i];
  163. ener0 += coef0*coef0;
  164. ener1 += coef1*coef1;
  165. ener01 += (coef0 + coef1)*(coef0 + coef1);
  166. }
  167. }
  168. ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g,
  169. ener0, ener1, ener01, 1, -1);
  170. ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g,
  171. ener0, ener1, ener01, 1, +1);
  172. erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2;
  173. if (erf->pass) {
  174. sce0->ics.prediction_used[sfb] = 1;
  175. sce1->ics.prediction_used[sfb] = 1;
  176. count++;
  177. } else {
  178. RESTORE_PRED(sce0, sfb);
  179. RESTORE_PRED(sce1, sfb);
  180. }
  181. start += sce0->ics.swb_sizes[g];
  182. }
  183. }
  184. sce1->ics.predictor_present = sce0->ics.predictor_present = !!count;
  185. }
  186. static void update_pred_resets(SingleChannelElement *sce)
  187. {
  188. int i, max_group_id_c, max_frame = 0;
  189. float avg_frame = 0.0f;
  190. IndividualChannelStream *ics = &sce->ics;
  191. /* Update the counters and immediately update any frame behind schedule */
  192. if ((ics->predictor_reset_group = update_counters(&sce->ics, 1)))
  193. return;
  194. for (i = 1; i < 31; i++) {
  195. /* Count-based */
  196. if (ics->predictor_reset_count[i] > max_frame) {
  197. max_group_id_c = i;
  198. max_frame = ics->predictor_reset_count[i];
  199. }
  200. avg_frame = (ics->predictor_reset_count[i] + avg_frame)/2;
  201. }
  202. if (max_frame > PRED_RESET_MIN) {
  203. ics->predictor_reset_group = max_group_id_c;
  204. } else {
  205. ics->predictor_reset_group = 0;
  206. }
  207. }
  208. void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce)
  209. {
  210. int sfb, i, count = 0, cost_coeffs = 0, cost_pred = 0;
  211. const int pmax = FFMIN(sce->ics.max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
  212. float *O34 = &s->scoefs[128*0], *P34 = &s->scoefs[128*1];
  213. float *SENT = &s->scoefs[128*2], *S34 = &s->scoefs[128*3];
  214. float *QERR = &s->scoefs[128*4];
  215. if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
  216. sce->ics.predictor_present = 0;
  217. return;
  218. }
  219. if (!sce->ics.predictor_initialized) {
  220. reset_all_predictors(sce->predictor_state);
  221. sce->ics.predictor_initialized = 1;
  222. memcpy(sce->prcoeffs, sce->coeffs, 1024*sizeof(float));
  223. for (i = 1; i < 31; i++)
  224. sce->ics.predictor_reset_count[i] = i;
  225. }
  226. update_pred_resets(sce);
  227. memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
  228. for (sfb = PRED_SFB_START; sfb < pmax; sfb++) {
  229. int cost1, cost2, cb_p;
  230. float dist1, dist2, dist_spec_err = 0.0f;
  231. const int cb_n = sce->zeroes[sfb] ? 0 : sce->band_type[sfb];
  232. const int cb_min = sce->zeroes[sfb] ? 0 : 1;
  233. const int cb_max = sce->zeroes[sfb] ? 0 : RESERVED_BT;
  234. const int start_coef = sce->ics.swb_offset[sfb];
  235. const int num_coeffs = sce->ics.swb_offset[sfb + 1] - start_coef;
  236. const FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[sfb];
  237. if (start_coef + num_coeffs > MAX_PREDICTORS ||
  238. (s->cur_channel && sce->band_type[sfb] >= INTENSITY_BT2) ||
  239. sce->band_type[sfb] == NOISE_BT)
  240. continue;
  241. /* Normal coefficients */
  242. s->abs_pow34(O34, &sce->coeffs[start_coef], num_coeffs);
  243. dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL,
  244. O34, num_coeffs, sce->sf_idx[sfb],
  245. cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0);
  246. cost_coeffs += cost1;
  247. /* Encoded coefficients - needed for #bits, band type and quant. error */
  248. for (i = 0; i < num_coeffs; i++)
  249. SENT[i] = sce->coeffs[start_coef + i] - sce->prcoeffs[start_coef + i];
  250. s->abs_pow34(S34, SENT, num_coeffs);
  251. if (cb_n < RESERVED_BT)
  252. cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, S34), sce->sf_idx[sfb]), cb_min, cb_max);
  253. else
  254. cb_p = cb_n;
  255. quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs,
  256. sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY,
  257. &cost2, NULL, 0);
  258. /* Reconstructed coefficients - needed for distortion measurements */
  259. for (i = 0; i < num_coeffs; i++)
  260. sce->prcoeffs[start_coef + i] += QERR[i] != 0.0f ? (sce->prcoeffs[start_coef + i] - QERR[i]) : 0.0f;
  261. s->abs_pow34(P34, &sce->prcoeffs[start_coef], num_coeffs);
  262. if (cb_n < RESERVED_BT)
  263. cb_p = av_clip(find_min_book(find_max_val(1, num_coeffs, P34), sce->sf_idx[sfb]), cb_min, cb_max);
  264. else
  265. cb_p = cb_n;
  266. dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL,
  267. P34, num_coeffs, sce->sf_idx[sfb],
  268. cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0);
  269. for (i = 0; i < num_coeffs; i++)
  270. dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]);
  271. dist_spec_err *= s->lambda / band->threshold;
  272. dist2 += dist_spec_err;
  273. if (dist2 <= dist1 && cb_p <= cb_n) {
  274. cost_pred += cost2;
  275. sce->ics.prediction_used[sfb] = 1;
  276. sce->band_alt[sfb] = cb_n;
  277. sce->band_type[sfb] = cb_p;
  278. count++;
  279. } else {
  280. cost_pred += cost1;
  281. sce->band_alt[sfb] = cb_p;
  282. }
  283. }
  284. if (count && cost_coeffs < cost_pred) {
  285. count = 0;
  286. for (sfb = PRED_SFB_START; sfb < pmax; sfb++)
  287. RESTORE_PRED(sce, sfb);
  288. memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
  289. }
  290. sce->ics.predictor_present = !!count;
  291. }
  292. /**
  293. * Encoder predictors data.
  294. */
  295. void ff_aac_encode_main_pred(AACEncContext *s, SingleChannelElement *sce)
  296. {
  297. int sfb;
  298. IndividualChannelStream *ics = &sce->ics;
  299. const int pmax = FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[s->samplerate_index]);
  300. if (s->profile != FF_PROFILE_AAC_MAIN ||
  301. !ics->predictor_present)
  302. return;
  303. put_bits(&s->pb, 1, !!ics->predictor_reset_group);
  304. if (ics->predictor_reset_group)
  305. put_bits(&s->pb, 5, ics->predictor_reset_group);
  306. for (sfb = 0; sfb < pmax; sfb++)
  307. put_bits(&s->pb, 1, ics->prediction_used[sfb]);
  308. }