aaccoder.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. /*
  2. * AAC coefficients encoder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC coefficients encoder
  24. */
  25. /***********************************
  26. * TODOs:
  27. * speedup quantizer selection
  28. * add sane pulse detection
  29. ***********************************/
  30. #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
  31. #include <float.h>
  32. #include "libavutil/mathematics.h"
  33. #include "mathops.h"
  34. #include "avcodec.h"
  35. #include "put_bits.h"
  36. #include "aac.h"
  37. #include "aacenc.h"
  38. #include "aactab.h"
  39. #include "aacenctab.h"
  40. #include "aacenc_utils.h"
  41. #include "aacenc_quantization.h"
  42. #include "aacenc_is.h"
  43. #include "aacenc_tns.h"
  44. #include "aacenc_ltp.h"
  45. #include "aacenc_pred.h"
  46. #include "libavcodec/aaccoder_twoloop.h"
  47. /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
  48. * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
  49. #define NOISE_SPREAD_THRESHOLD 0.9f
  50. /* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
  51. * replace low energy non zero bands */
  52. #define NOISE_LAMBDA_REPLACE 1.948f
  53. #include "libavcodec/aaccoder_trellis.h"
  54. /**
  55. * structure used in optimal codebook search
  56. */
  57. typedef struct BandCodingPath {
  58. int prev_idx; ///< pointer to the previous path point
  59. float cost; ///< path cost
  60. int run;
  61. } BandCodingPath;
  62. /**
  63. * Encode band info for single window group bands.
  64. */
  65. static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
  66. int win, int group_len, const float lambda)
  67. {
  68. BandCodingPath path[120][CB_TOT_ALL];
  69. int w, swb, cb, start, size;
  70. int i, j;
  71. const int max_sfb = sce->ics.max_sfb;
  72. const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  73. const int run_esc = (1 << run_bits) - 1;
  74. int idx, ppos, count;
  75. int stackrun[120], stackcb[120], stack_len;
  76. float next_minrd = INFINITY;
  77. int next_mincb = 0;
  78. s->abs_pow34(s->scoefs, sce->coeffs, 1024);
  79. start = win*128;
  80. for (cb = 0; cb < CB_TOT_ALL; cb++) {
  81. path[0][cb].cost = 0.0f;
  82. path[0][cb].prev_idx = -1;
  83. path[0][cb].run = 0;
  84. }
  85. for (swb = 0; swb < max_sfb; swb++) {
  86. size = sce->ics.swb_sizes[swb];
  87. if (sce->zeroes[win*16 + swb]) {
  88. for (cb = 0; cb < CB_TOT_ALL; cb++) {
  89. path[swb+1][cb].prev_idx = cb;
  90. path[swb+1][cb].cost = path[swb][cb].cost;
  91. path[swb+1][cb].run = path[swb][cb].run + 1;
  92. }
  93. } else {
  94. float minrd = next_minrd;
  95. int mincb = next_mincb;
  96. next_minrd = INFINITY;
  97. next_mincb = 0;
  98. for (cb = 0; cb < CB_TOT_ALL; cb++) {
  99. float cost_stay_here, cost_get_here;
  100. float rd = 0.0f;
  101. if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
  102. cb < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
  103. path[swb+1][cb].prev_idx = -1;
  104. path[swb+1][cb].cost = INFINITY;
  105. path[swb+1][cb].run = path[swb][cb].run + 1;
  106. continue;
  107. }
  108. for (w = 0; w < group_len; w++) {
  109. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
  110. rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
  111. &s->scoefs[start + w*128], size,
  112. sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
  113. lambda / band->threshold, INFINITY, NULL, NULL, 0);
  114. }
  115. cost_stay_here = path[swb][cb].cost + rd;
  116. cost_get_here = minrd + rd + run_bits + 4;
  117. if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  118. != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  119. cost_stay_here += run_bits;
  120. if (cost_get_here < cost_stay_here) {
  121. path[swb+1][cb].prev_idx = mincb;
  122. path[swb+1][cb].cost = cost_get_here;
  123. path[swb+1][cb].run = 1;
  124. } else {
  125. path[swb+1][cb].prev_idx = cb;
  126. path[swb+1][cb].cost = cost_stay_here;
  127. path[swb+1][cb].run = path[swb][cb].run + 1;
  128. }
  129. if (path[swb+1][cb].cost < next_minrd) {
  130. next_minrd = path[swb+1][cb].cost;
  131. next_mincb = cb;
  132. }
  133. }
  134. }
  135. start += sce->ics.swb_sizes[swb];
  136. }
  137. //convert resulting path from backward-linked list
  138. stack_len = 0;
  139. idx = 0;
  140. for (cb = 1; cb < CB_TOT_ALL; cb++)
  141. if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  142. idx = cb;
  143. ppos = max_sfb;
  144. while (ppos > 0) {
  145. av_assert1(idx >= 0);
  146. cb = idx;
  147. stackrun[stack_len] = path[ppos][cb].run;
  148. stackcb [stack_len] = cb;
  149. idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  150. ppos -= path[ppos][cb].run;
  151. stack_len++;
  152. }
  153. //perform actual band info encoding
  154. start = 0;
  155. for (i = stack_len - 1; i >= 0; i--) {
  156. cb = aac_cb_out_map[stackcb[i]];
  157. put_bits(&s->pb, 4, cb);
  158. count = stackrun[i];
  159. memset(sce->zeroes + win*16 + start, !cb, count);
  160. //XXX: memset when band_type is also uint8_t
  161. for (j = 0; j < count; j++) {
  162. sce->band_type[win*16 + start] = cb;
  163. start++;
  164. }
  165. while (count >= run_esc) {
  166. put_bits(&s->pb, run_bits, run_esc);
  167. count -= run_esc;
  168. }
  169. put_bits(&s->pb, run_bits, count);
  170. }
  171. }
  172. typedef struct TrellisPath {
  173. float cost;
  174. int prev;
  175. } TrellisPath;
  176. #define TRELLIS_STAGES 121
  177. #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
  178. static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
  179. {
  180. int w, g;
  181. int prevscaler_n = -255, prevscaler_i = 0;
  182. int bands = 0;
  183. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  184. for (g = 0; g < sce->ics.num_swb; g++) {
  185. if (sce->zeroes[w*16+g])
  186. continue;
  187. if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
  188. sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
  189. bands++;
  190. } else if (sce->band_type[w*16+g] == NOISE_BT) {
  191. sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
  192. if (prevscaler_n == -255)
  193. prevscaler_n = sce->sf_idx[w*16+g];
  194. bands++;
  195. }
  196. }
  197. }
  198. if (!bands)
  199. return;
  200. /* Clip the scalefactor indices */
  201. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  202. for (g = 0; g < sce->ics.num_swb; g++) {
  203. if (sce->zeroes[w*16+g])
  204. continue;
  205. if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
  206. sce->sf_idx[w*16+g] = prevscaler_i = av_clip(sce->sf_idx[w*16+g], prevscaler_i - SCALE_MAX_DIFF, prevscaler_i + SCALE_MAX_DIFF);
  207. } else if (sce->band_type[w*16+g] == NOISE_BT) {
  208. sce->sf_idx[w*16+g] = prevscaler_n = av_clip(sce->sf_idx[w*16+g], prevscaler_n - SCALE_MAX_DIFF, prevscaler_n + SCALE_MAX_DIFF);
  209. }
  210. }
  211. }
  212. }
  213. static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
  214. SingleChannelElement *sce,
  215. const float lambda)
  216. {
  217. int q, w, w2, g, start = 0;
  218. int i, j;
  219. int idx;
  220. TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
  221. int bandaddr[TRELLIS_STAGES];
  222. int minq;
  223. float mincost;
  224. float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
  225. int q0, q1, qcnt = 0;
  226. for (i = 0; i < 1024; i++) {
  227. float t = fabsf(sce->coeffs[i]);
  228. if (t > 0.0f) {
  229. q0f = FFMIN(q0f, t);
  230. q1f = FFMAX(q1f, t);
  231. qnrgf += t*t;
  232. qcnt++;
  233. }
  234. }
  235. if (!qcnt) {
  236. memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
  237. memset(sce->zeroes, 1, sizeof(sce->zeroes));
  238. return;
  239. }
  240. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  241. q0 = av_clip(coef2minsf(q0f), 0, SCALE_MAX_POS-1);
  242. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  243. q1 = av_clip(coef2maxsf(q1f), 1, SCALE_MAX_POS);
  244. if (q1 - q0 > 60) {
  245. int q0low = q0;
  246. int q1high = q1;
  247. //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
  248. int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
  249. q1 = qnrg + 30;
  250. q0 = qnrg - 30;
  251. if (q0 < q0low) {
  252. q1 += q0low - q0;
  253. q0 = q0low;
  254. } else if (q1 > q1high) {
  255. q0 -= q1 - q1high;
  256. q1 = q1high;
  257. }
  258. }
  259. // q0 == q1 isn't really a legal situation
  260. if (q0 == q1) {
  261. // the following is indirect but guarantees q1 != q0 && q1 near q0
  262. q1 = av_clip(q0+1, 1, SCALE_MAX_POS);
  263. q0 = av_clip(q1-1, 0, SCALE_MAX_POS - 1);
  264. }
  265. for (i = 0; i < TRELLIS_STATES; i++) {
  266. paths[0][i].cost = 0.0f;
  267. paths[0][i].prev = -1;
  268. }
  269. for (j = 1; j < TRELLIS_STAGES; j++) {
  270. for (i = 0; i < TRELLIS_STATES; i++) {
  271. paths[j][i].cost = INFINITY;
  272. paths[j][i].prev = -2;
  273. }
  274. }
  275. idx = 1;
  276. s->abs_pow34(s->scoefs, sce->coeffs, 1024);
  277. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  278. start = w*128;
  279. for (g = 0; g < sce->ics.num_swb; g++) {
  280. const float *coefs = &sce->coeffs[start];
  281. float qmin, qmax;
  282. int nz = 0;
  283. bandaddr[idx] = w * 16 + g;
  284. qmin = INT_MAX;
  285. qmax = 0.0f;
  286. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  287. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  288. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  289. sce->zeroes[(w+w2)*16+g] = 1;
  290. continue;
  291. }
  292. sce->zeroes[(w+w2)*16+g] = 0;
  293. nz = 1;
  294. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  295. float t = fabsf(coefs[w2*128+i]);
  296. if (t > 0.0f)
  297. qmin = FFMIN(qmin, t);
  298. qmax = FFMAX(qmax, t);
  299. }
  300. }
  301. if (nz) {
  302. int minscale, maxscale;
  303. float minrd = INFINITY;
  304. float maxval;
  305. //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
  306. minscale = coef2minsf(qmin);
  307. //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
  308. maxscale = coef2maxsf(qmax);
  309. minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
  310. maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
  311. if (minscale == maxscale) {
  312. maxscale = av_clip(minscale+1, 1, TRELLIS_STATES);
  313. minscale = av_clip(maxscale-1, 0, TRELLIS_STATES - 1);
  314. }
  315. maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
  316. for (q = minscale; q < maxscale; q++) {
  317. float dist = 0;
  318. int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
  319. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  320. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  321. dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
  322. q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0);
  323. }
  324. minrd = FFMIN(minrd, dist);
  325. for (i = 0; i < q1 - q0; i++) {
  326. float cost;
  327. cost = paths[idx - 1][i].cost + dist
  328. + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
  329. if (cost < paths[idx][q].cost) {
  330. paths[idx][q].cost = cost;
  331. paths[idx][q].prev = i;
  332. }
  333. }
  334. }
  335. } else {
  336. for (q = 0; q < q1 - q0; q++) {
  337. paths[idx][q].cost = paths[idx - 1][q].cost + 1;
  338. paths[idx][q].prev = q;
  339. }
  340. }
  341. sce->zeroes[w*16+g] = !nz;
  342. start += sce->ics.swb_sizes[g];
  343. idx++;
  344. }
  345. }
  346. idx--;
  347. mincost = paths[idx][0].cost;
  348. minq = 0;
  349. for (i = 1; i < TRELLIS_STATES; i++) {
  350. if (paths[idx][i].cost < mincost) {
  351. mincost = paths[idx][i].cost;
  352. minq = i;
  353. }
  354. }
  355. while (idx) {
  356. sce->sf_idx[bandaddr[idx]] = minq + q0;
  357. minq = FFMAX(paths[idx][minq].prev, 0);
  358. idx--;
  359. }
  360. //set the same quantizers inside window groups
  361. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  362. for (g = 0; g < sce->ics.num_swb; g++)
  363. for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
  364. sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
  365. }
  366. static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
  367. SingleChannelElement *sce,
  368. const float lambda)
  369. {
  370. int start = 0, i, w, w2, g;
  371. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
  372. float dists[128] = { 0 }, uplims[128] = { 0 };
  373. float maxvals[128];
  374. int fflag, minscaler;
  375. int its = 0;
  376. int allz = 0;
  377. float minthr = INFINITY;
  378. // for values above this the decoder might end up in an endless loop
  379. // due to always having more bits than what can be encoded.
  380. destbits = FFMIN(destbits, 5800);
  381. //some heuristic to determine initial quantizers will reduce search time
  382. //determine zero bands and upper limits
  383. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  384. start = 0;
  385. for (g = 0; g < sce->ics.num_swb; g++) {
  386. int nz = 0;
  387. float uplim = 0.0f, energy = 0.0f;
  388. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  389. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  390. uplim += band->threshold;
  391. energy += band->energy;
  392. if (band->energy <= band->threshold || band->threshold == 0.0f) {
  393. sce->zeroes[(w+w2)*16+g] = 1;
  394. continue;
  395. }
  396. nz = 1;
  397. }
  398. uplims[w*16+g] = uplim *512;
  399. sce->band_type[w*16+g] = 0;
  400. sce->zeroes[w*16+g] = !nz;
  401. if (nz)
  402. minthr = FFMIN(minthr, uplim);
  403. allz |= nz;
  404. start += sce->ics.swb_sizes[g];
  405. }
  406. }
  407. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  408. for (g = 0; g < sce->ics.num_swb; g++) {
  409. if (sce->zeroes[w*16+g]) {
  410. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  411. continue;
  412. }
  413. sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
  414. }
  415. }
  416. if (!allz)
  417. return;
  418. s->abs_pow34(s->scoefs, sce->coeffs, 1024);
  419. ff_quantize_band_cost_cache_init(s);
  420. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  421. start = w*128;
  422. for (g = 0; g < sce->ics.num_swb; g++) {
  423. const float *scaled = s->scoefs + start;
  424. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  425. start += sce->ics.swb_sizes[g];
  426. }
  427. }
  428. //perform two-loop search
  429. //outer loop - improve quality
  430. do {
  431. int tbits, qstep;
  432. minscaler = sce->sf_idx[0];
  433. //inner loop - quantize spectrum to fit into given number of bits
  434. qstep = its ? 1 : 32;
  435. do {
  436. int prev = -1;
  437. tbits = 0;
  438. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  439. start = w*128;
  440. for (g = 0; g < sce->ics.num_swb; g++) {
  441. const float *coefs = sce->coeffs + start;
  442. const float *scaled = s->scoefs + start;
  443. int bits = 0;
  444. int cb;
  445. float dist = 0.0f;
  446. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  447. start += sce->ics.swb_sizes[g];
  448. continue;
  449. }
  450. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  451. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  452. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  453. int b;
  454. dist += quantize_band_cost_cached(s, w + w2, g,
  455. coefs + w2*128,
  456. scaled + w2*128,
  457. sce->ics.swb_sizes[g],
  458. sce->sf_idx[w*16+g],
  459. cb, 1.0f, INFINITY,
  460. &b, NULL, 0);
  461. bits += b;
  462. }
  463. dists[w*16+g] = dist - bits;
  464. if (prev != -1) {
  465. bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  466. }
  467. tbits += bits;
  468. start += sce->ics.swb_sizes[g];
  469. prev = sce->sf_idx[w*16+g];
  470. }
  471. }
  472. if (tbits > destbits) {
  473. for (i = 0; i < 128; i++)
  474. if (sce->sf_idx[i] < 218 - qstep)
  475. sce->sf_idx[i] += qstep;
  476. } else {
  477. for (i = 0; i < 128; i++)
  478. if (sce->sf_idx[i] > 60 - qstep)
  479. sce->sf_idx[i] -= qstep;
  480. }
  481. qstep >>= 1;
  482. if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
  483. qstep = 1;
  484. } while (qstep);
  485. fflag = 0;
  486. minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  487. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  488. for (g = 0; g < sce->ics.num_swb; g++) {
  489. int prevsc = sce->sf_idx[w*16+g];
  490. if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
  491. if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
  492. sce->sf_idx[w*16+g]--;
  493. else //Try to make sure there is some energy in every band
  494. sce->sf_idx[w*16+g]-=2;
  495. }
  496. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  497. sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  498. if (sce->sf_idx[w*16+g] != prevsc)
  499. fflag = 1;
  500. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  501. }
  502. }
  503. its++;
  504. } while (fflag && its < 10);
  505. }
  506. static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
  507. {
  508. FFPsyBand *band;
  509. int w, g, w2, i;
  510. int wlen = 1024 / sce->ics.num_windows;
  511. int bandwidth, cutoff;
  512. float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
  513. float *NOR34 = &s->scoefs[3*128];
  514. uint8_t nextband[128];
  515. const float lambda = s->lambda;
  516. const float freq_mult = avctx->sample_rate*0.5f/wlen;
  517. const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
  518. const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
  519. const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f);
  520. const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
  521. int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  522. / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  523. * (lambda / 120.f);
  524. /** Keep this in sync with twoloop's cutoff selection */
  525. float rate_bandwidth_multiplier = 1.5f;
  526. int prev = -1000, prev_sf = -1;
  527. int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
  528. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  529. : (avctx->bit_rate / avctx->channels);
  530. frame_bit_rate *= 1.15f;
  531. if (avctx->cutoff > 0) {
  532. bandwidth = avctx->cutoff;
  533. } else {
  534. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  535. }
  536. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  537. memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
  538. ff_init_nextband_map(sce, nextband);
  539. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  540. int wstart = w*128;
  541. for (g = 0; g < sce->ics.num_swb; g++) {
  542. int noise_sfi;
  543. float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
  544. float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh;
  545. float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
  546. float min_energy = -1.0f, max_energy = 0.0f;
  547. const int start = wstart+sce->ics.swb_offset[g];
  548. const float freq = (start-wstart)*freq_mult;
  549. const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
  550. if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) {
  551. if (!sce->zeroes[w*16+g])
  552. prev_sf = sce->sf_idx[w*16+g];
  553. continue;
  554. }
  555. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  556. band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  557. sfb_energy += band->energy;
  558. spread = FFMIN(spread, band->spread);
  559. threshold += band->threshold;
  560. if (!w2) {
  561. min_energy = max_energy = band->energy;
  562. } else {
  563. min_energy = FFMIN(min_energy, band->energy);
  564. max_energy = FFMAX(max_energy, band->energy);
  565. }
  566. }
  567. /* Ramps down at ~8000Hz and loosens the dist threshold */
  568. dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias;
  569. /* PNS is acceptable when all of these are true:
  570. * 1. high spread energy (noise-like band)
  571. * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
  572. * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
  573. *
  574. * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important)
  575. */
  576. if ((!sce->zeroes[w*16+g] && !ff_sfdelta_can_remove_band(sce, nextband, prev_sf, w*16+g)) ||
  577. ((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.0f/freq_boost)) || spread < spread_threshold ||
  578. (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) ||
  579. min_energy < pns_transient_energy_r * max_energy ) {
  580. sce->pns_ener[w*16+g] = sfb_energy;
  581. if (!sce->zeroes[w*16+g])
  582. prev_sf = sce->sf_idx[w*16+g];
  583. continue;
  584. }
  585. pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread);
  586. noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */
  587. noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */
  588. if (prev != -1000) {
  589. int noise_sfdiff = noise_sfi - prev + SCALE_DIFF_ZERO;
  590. if (noise_sfdiff < 0 || noise_sfdiff > 2*SCALE_MAX_DIFF) {
  591. if (!sce->zeroes[w*16+g])
  592. prev_sf = sce->sf_idx[w*16+g];
  593. continue;
  594. }
  595. }
  596. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  597. float band_energy, scale, pns_senergy;
  598. const int start_c = (w+w2)*128+sce->ics.swb_offset[g];
  599. band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  600. for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
  601. s->random_state = lcg_random(s->random_state);
  602. PNS[i] = s->random_state;
  603. }
  604. band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
  605. scale = noise_amp/sqrtf(band_energy);
  606. s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
  607. pns_senergy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
  608. pns_energy += pns_senergy;
  609. s->abs_pow34(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
  610. s->abs_pow34(PNS34, PNS, sce->ics.swb_sizes[g]);
  611. dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
  612. NOR34,
  613. sce->ics.swb_sizes[g],
  614. sce->sf_idx[(w+w2)*16+g],
  615. sce->band_alt[(w+w2)*16+g],
  616. lambda/band->threshold, INFINITY, NULL, NULL, 0);
  617. /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */
  618. dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold;
  619. }
  620. if (g && sce->band_type[w*16+g-1] == NOISE_BT) {
  621. dist2 += 5;
  622. } else {
  623. dist2 += 9;
  624. }
  625. energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */
  626. sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy;
  627. if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) {
  628. sce->band_type[w*16+g] = NOISE_BT;
  629. sce->zeroes[w*16+g] = 0;
  630. prev = noise_sfi;
  631. } else {
  632. if (!sce->zeroes[w*16+g])
  633. prev_sf = sce->sf_idx[w*16+g];
  634. }
  635. }
  636. }
  637. }
  638. static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
  639. {
  640. FFPsyBand *band;
  641. int w, g, w2;
  642. int wlen = 1024 / sce->ics.num_windows;
  643. int bandwidth, cutoff;
  644. const float lambda = s->lambda;
  645. const float freq_mult = avctx->sample_rate*0.5f/wlen;
  646. const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f));
  647. const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f);
  648. int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  649. / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  650. * (lambda / 120.f);
  651. /** Keep this in sync with twoloop's cutoff selection */
  652. float rate_bandwidth_multiplier = 1.5f;
  653. int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
  654. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  655. : (avctx->bit_rate / avctx->channels);
  656. frame_bit_rate *= 1.15f;
  657. if (avctx->cutoff > 0) {
  658. bandwidth = avctx->cutoff;
  659. } else {
  660. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  661. }
  662. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  663. memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type));
  664. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  665. for (g = 0; g < sce->ics.num_swb; g++) {
  666. float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f;
  667. float min_energy = -1.0f, max_energy = 0.0f;
  668. const int start = sce->ics.swb_offset[g];
  669. const float freq = start*freq_mult;
  670. const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
  671. if (freq < NOISE_LOW_LIMIT || start >= cutoff) {
  672. sce->can_pns[w*16+g] = 0;
  673. continue;
  674. }
  675. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  676. band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  677. sfb_energy += band->energy;
  678. spread = FFMIN(spread, band->spread);
  679. threshold += band->threshold;
  680. if (!w2) {
  681. min_energy = max_energy = band->energy;
  682. } else {
  683. min_energy = FFMIN(min_energy, band->energy);
  684. max_energy = FFMAX(max_energy, band->energy);
  685. }
  686. }
  687. /* PNS is acceptable when all of these are true:
  688. * 1. high spread energy (noise-like band)
  689. * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed)
  690. * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS)
  691. */
  692. sce->pns_ener[w*16+g] = sfb_energy;
  693. if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) {
  694. sce->can_pns[w*16+g] = 0;
  695. } else {
  696. sce->can_pns[w*16+g] = 1;
  697. }
  698. }
  699. }
  700. }
  701. static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
  702. {
  703. int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
  704. uint8_t nextband0[128], nextband1[128];
  705. float *M = s->scoefs + 128*0, *S = s->scoefs + 128*1;
  706. float *L34 = s->scoefs + 128*2, *R34 = s->scoefs + 128*3;
  707. float *M34 = s->scoefs + 128*4, *S34 = s->scoefs + 128*5;
  708. const float lambda = s->lambda;
  709. const float mslambda = FFMIN(1.0f, lambda / 120.f);
  710. SingleChannelElement *sce0 = &cpe->ch[0];
  711. SingleChannelElement *sce1 = &cpe->ch[1];
  712. if (!cpe->common_window)
  713. return;
  714. /** Scout out next nonzero bands */
  715. ff_init_nextband_map(sce0, nextband0);
  716. ff_init_nextband_map(sce1, nextband1);
  717. prev_mid = sce0->sf_idx[0];
  718. prev_side = sce1->sf_idx[0];
  719. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  720. start = 0;
  721. for (g = 0; g < sce0->ics.num_swb; g++) {
  722. float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
  723. if (!cpe->is_mask[w*16+g])
  724. cpe->ms_mask[w*16+g] = 0;
  725. if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
  726. float Mmax = 0.0f, Smax = 0.0f;
  727. /* Must compute mid/side SF and book for the whole window group */
  728. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  729. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  730. M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  731. + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  732. S[i] = M[i]
  733. - sce1->coeffs[start+(w+w2)*128+i];
  734. }
  735. s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]);
  736. s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]);
  737. for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
  738. Mmax = FFMAX(Mmax, M34[i]);
  739. Smax = FFMAX(Smax, S34[i]);
  740. }
  741. }
  742. for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
  743. float dist1 = 0.0f, dist2 = 0.0f;
  744. int B0 = 0, B1 = 0;
  745. int minidx;
  746. int mididx, sididx;
  747. int midcb, sidcb;
  748. minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
  749. mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
  750. sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
  751. if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
  752. && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
  753. || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
  754. /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
  755. continue;
  756. }
  757. midcb = find_min_book(Mmax, mididx);
  758. sidcb = find_min_book(Smax, sididx);
  759. /* No CB can be zero */
  760. midcb = FFMAX(1,midcb);
  761. sidcb = FFMAX(1,sidcb);
  762. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  763. FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  764. FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  765. float minthr = FFMIN(band0->threshold, band1->threshold);
  766. int b1,b2,b3,b4;
  767. for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  768. M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  769. + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  770. S[i] = M[i]
  771. - sce1->coeffs[start+(w+w2)*128+i];
  772. }
  773. s->abs_pow34(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  774. s->abs_pow34(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  775. s->abs_pow34(M34, M, sce0->ics.swb_sizes[g]);
  776. s->abs_pow34(S34, S, sce0->ics.swb_sizes[g]);
  777. dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
  778. L34,
  779. sce0->ics.swb_sizes[g],
  780. sce0->sf_idx[w*16+g],
  781. sce0->band_type[w*16+g],
  782. lambda / band0->threshold, INFINITY, &b1, NULL, 0);
  783. dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
  784. R34,
  785. sce1->ics.swb_sizes[g],
  786. sce1->sf_idx[w*16+g],
  787. sce1->band_type[w*16+g],
  788. lambda / band1->threshold, INFINITY, &b2, NULL, 0);
  789. dist2 += quantize_band_cost(s, M,
  790. M34,
  791. sce0->ics.swb_sizes[g],
  792. mididx,
  793. midcb,
  794. lambda / minthr, INFINITY, &b3, NULL, 0);
  795. dist2 += quantize_band_cost(s, S,
  796. S34,
  797. sce1->ics.swb_sizes[g],
  798. sididx,
  799. sidcb,
  800. mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
  801. B0 += b1+b2;
  802. B1 += b3+b4;
  803. dist1 -= b1+b2;
  804. dist2 -= b3+b4;
  805. }
  806. cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
  807. if (cpe->ms_mask[w*16+g]) {
  808. if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
  809. sce0->sf_idx[w*16+g] = mididx;
  810. sce1->sf_idx[w*16+g] = sididx;
  811. sce0->band_type[w*16+g] = midcb;
  812. sce1->band_type[w*16+g] = sidcb;
  813. } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
  814. /* ms_mask unneeded, and it confuses some decoders */
  815. cpe->ms_mask[w*16+g] = 0;
  816. }
  817. break;
  818. } else if (B1 > B0) {
  819. /* More boost won't fix this */
  820. break;
  821. }
  822. }
  823. }
  824. if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
  825. prev_mid = sce0->sf_idx[w*16+g];
  826. if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
  827. prev_side = sce1->sf_idx[w*16+g];
  828. start += sce0->ics.swb_sizes[g];
  829. }
  830. }
  831. }
  832. const AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
  833. [AAC_CODER_ANMR] = {
  834. search_for_quantizers_anmr,
  835. encode_window_bands_info,
  836. quantize_and_encode_band,
  837. ff_aac_encode_tns_info,
  838. ff_aac_encode_ltp_info,
  839. ff_aac_encode_main_pred,
  840. ff_aac_adjust_common_pred,
  841. ff_aac_adjust_common_ltp,
  842. ff_aac_apply_main_pred,
  843. ff_aac_apply_tns,
  844. ff_aac_update_ltp,
  845. ff_aac_ltp_insert_new_frame,
  846. set_special_band_scalefactors,
  847. search_for_pns,
  848. mark_pns,
  849. ff_aac_search_for_tns,
  850. ff_aac_search_for_ltp,
  851. search_for_ms,
  852. ff_aac_search_for_is,
  853. ff_aac_search_for_pred,
  854. },
  855. [AAC_CODER_TWOLOOP] = {
  856. search_for_quantizers_twoloop,
  857. codebook_trellis_rate,
  858. quantize_and_encode_band,
  859. ff_aac_encode_tns_info,
  860. ff_aac_encode_ltp_info,
  861. ff_aac_encode_main_pred,
  862. ff_aac_adjust_common_pred,
  863. ff_aac_adjust_common_ltp,
  864. ff_aac_apply_main_pred,
  865. ff_aac_apply_tns,
  866. ff_aac_update_ltp,
  867. ff_aac_ltp_insert_new_frame,
  868. set_special_band_scalefactors,
  869. search_for_pns,
  870. mark_pns,
  871. ff_aac_search_for_tns,
  872. ff_aac_search_for_ltp,
  873. search_for_ms,
  874. ff_aac_search_for_is,
  875. ff_aac_search_for_pred,
  876. },
  877. [AAC_CODER_FAST] = {
  878. search_for_quantizers_fast,
  879. codebook_trellis_rate,
  880. quantize_and_encode_band,
  881. ff_aac_encode_tns_info,
  882. ff_aac_encode_ltp_info,
  883. ff_aac_encode_main_pred,
  884. ff_aac_adjust_common_pred,
  885. ff_aac_adjust_common_ltp,
  886. ff_aac_apply_main_pred,
  887. ff_aac_apply_tns,
  888. ff_aac_update_ltp,
  889. ff_aac_ltp_insert_new_frame,
  890. set_special_band_scalefactors,
  891. search_for_pns,
  892. mark_pns,
  893. ff_aac_search_for_tns,
  894. ff_aac_search_for_ltp,
  895. search_for_ms,
  896. ff_aac_search_for_is,
  897. ff_aac_search_for_pred,
  898. },
  899. };