opusenc.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738
  1. /*
  2. * Opus encoder
  3. * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "opusenc.h"
  22. #include "opus_pvq.h"
  23. #include "opusenc_psy.h"
  24. #include "opustab.h"
  25. #include "libavutil/float_dsp.h"
  26. #include "libavutil/opt.h"
  27. #include "internal.h"
  28. #include "bytestream.h"
  29. #include "audio_frame_queue.h"
  30. typedef struct OpusEncContext {
  31. AVClass *av_class;
  32. OpusEncOptions options;
  33. OpusPsyContext psyctx;
  34. AVCodecContext *avctx;
  35. AudioFrameQueue afq;
  36. AVFloatDSPContext *dsp;
  37. MDCT15Context *mdct[CELT_BLOCK_NB];
  38. CeltPVQ *pvq;
  39. struct FFBufQueue bufqueue;
  40. uint8_t enc_id[64];
  41. int enc_id_bits;
  42. OpusPacketInfo packet;
  43. int channels;
  44. CeltFrame *frame;
  45. OpusRangeCoder *rc;
  46. /* Actual energy the decoder will have */
  47. float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
  48. DECLARE_ALIGNED(32, float, scratch)[2048];
  49. } OpusEncContext;
  50. static void opus_write_extradata(AVCodecContext *avctx)
  51. {
  52. uint8_t *bs = avctx->extradata;
  53. bytestream_put_buffer(&bs, "OpusHead", 8);
  54. bytestream_put_byte (&bs, 0x1);
  55. bytestream_put_byte (&bs, avctx->channels);
  56. bytestream_put_le16 (&bs, avctx->initial_padding);
  57. bytestream_put_le32 (&bs, avctx->sample_rate);
  58. bytestream_put_le16 (&bs, 0x0);
  59. bytestream_put_byte (&bs, 0x0); /* Default layout */
  60. }
  61. static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
  62. {
  63. int tmp = 0x0, extended_toc = 0;
  64. static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
  65. /* Silk Hybrid Celt Layer */
  66. /* NB MB WB SWB FB NB MB WB SWB FB NB MB WB SWB FB Bandwidth */
  67. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 17, 0, 21, 25, 29 } }, /* 2.5 ms */
  68. { { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 }, { 18, 0, 22, 26, 30 } }, /* 5 ms */
  69. { { 1, 5, 9, 0, 0 }, { 0, 0, 0, 13, 15 }, { 19, 0, 23, 27, 31 } }, /* 10 ms */
  70. { { 2, 6, 10, 0, 0 }, { 0, 0, 0, 14, 16 }, { 20, 0, 24, 28, 32 } }, /* 20 ms */
  71. { { 3, 7, 11, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 40 ms */
  72. { { 4, 8, 12, 0, 0 }, { 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0 } }, /* 60 ms */
  73. };
  74. int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
  75. *fsize_needed = 0;
  76. if (!cfg)
  77. return 1;
  78. if (s->packet.frames == 2) { /* 2 packets */
  79. if (s->frame[0].framebits == s->frame[1].framebits) { /* same size */
  80. tmp = 0x1;
  81. } else { /* different size */
  82. tmp = 0x2;
  83. *fsize_needed = 1; /* put frame sizes in the packet */
  84. }
  85. } else if (s->packet.frames > 2) {
  86. tmp = 0x3;
  87. extended_toc = 1;
  88. }
  89. tmp |= (s->channels > 1) << 2; /* Stereo or mono */
  90. tmp |= (cfg - 1) << 3; /* codec configuration */
  91. *toc++ = tmp;
  92. if (extended_toc) {
  93. for (int i = 0; i < (s->packet.frames - 1); i++)
  94. *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
  95. tmp = (*fsize_needed) << 7; /* vbr flag */
  96. tmp |= (0) << 6; /* padding flag */
  97. tmp |= s->packet.frames;
  98. *toc++ = tmp;
  99. }
  100. *size = 1 + extended_toc;
  101. return 0;
  102. }
  103. static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
  104. {
  105. AVFrame *cur = NULL;
  106. const int subframesize = s->avctx->frame_size;
  107. int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  108. cur = ff_bufqueue_get(&s->bufqueue);
  109. for (int ch = 0; ch < f->channels; ch++) {
  110. CeltBlock *b = &f->block[ch];
  111. const void *input = cur->extended_data[ch];
  112. size_t bps = av_get_bytes_per_sample(cur->format);
  113. memcpy(b->overlap, input, bps*cur->nb_samples);
  114. }
  115. av_frame_free(&cur);
  116. for (int sf = 0; sf < subframes; sf++) {
  117. if (sf != (subframes - 1))
  118. cur = ff_bufqueue_get(&s->bufqueue);
  119. else
  120. cur = ff_bufqueue_peek(&s->bufqueue, 0);
  121. for (int ch = 0; ch < f->channels; ch++) {
  122. CeltBlock *b = &f->block[ch];
  123. const void *input = cur->extended_data[ch];
  124. const size_t bps = av_get_bytes_per_sample(cur->format);
  125. const size_t left = (subframesize - cur->nb_samples)*bps;
  126. const size_t len = FFMIN(subframesize, cur->nb_samples)*bps;
  127. memcpy(&b->samples[sf*subframesize], input, len);
  128. memset(&b->samples[cur->nb_samples], 0, left);
  129. }
  130. /* Last frame isn't popped off and freed yet - we need it for overlap */
  131. if (sf != (subframes - 1))
  132. av_frame_free(&cur);
  133. }
  134. }
  135. /* Apply the pre emphasis filter */
  136. static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
  137. {
  138. const int subframesize = s->avctx->frame_size;
  139. const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
  140. /* Filter overlap */
  141. for (int ch = 0; ch < f->channels; ch++) {
  142. CeltBlock *b = &f->block[ch];
  143. float m = b->emph_coeff;
  144. for (int i = 0; i < CELT_OVERLAP; i++) {
  145. float sample = b->overlap[i];
  146. b->overlap[i] = sample - m;
  147. m = sample * CELT_EMPH_COEFF;
  148. }
  149. b->emph_coeff = m;
  150. }
  151. /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
  152. for (int sf = 0; sf < subframes; sf++) {
  153. for (int ch = 0; ch < f->channels; ch++) {
  154. CeltBlock *b = &f->block[ch];
  155. float m = b->emph_coeff;
  156. for (int i = 0; i < subframesize; i++) {
  157. float sample = b->samples[sf*subframesize + i];
  158. b->samples[sf*subframesize + i] = sample - m;
  159. m = sample * CELT_EMPH_COEFF;
  160. }
  161. if (sf != (subframes - 1))
  162. b->emph_coeff = m;
  163. }
  164. }
  165. }
  166. /* Create the window and do the mdct */
  167. static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
  168. {
  169. float *win = s->scratch, *temp = s->scratch + 1920;
  170. if (f->transient) {
  171. for (int ch = 0; ch < f->channels; ch++) {
  172. CeltBlock *b = &f->block[ch];
  173. float *src1 = b->overlap;
  174. for (int t = 0; t < f->blocks; t++) {
  175. float *src2 = &b->samples[CELT_OVERLAP*t];
  176. s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
  177. s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
  178. ff_celt_window - 8, 128);
  179. src1 = src2;
  180. s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
  181. }
  182. }
  183. } else {
  184. int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
  185. int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
  186. memset(win, 0, wlen*sizeof(float));
  187. for (int ch = 0; ch < f->channels; ch++) {
  188. CeltBlock *b = &f->block[ch];
  189. /* Overlap */
  190. s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
  191. memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
  192. /* Samples, flat top window */
  193. memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
  194. /* Samples, windowed */
  195. s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
  196. ff_celt_window - 8, 128);
  197. memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
  198. s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
  199. }
  200. }
  201. for (int ch = 0; ch < f->channels; ch++) {
  202. CeltBlock *block = &f->block[ch];
  203. for (int i = 0; i < CELT_MAX_BANDS; i++) {
  204. float ener = 0.0f;
  205. int band_offset = ff_celt_freq_bands[i] << f->size;
  206. int band_size = ff_celt_freq_range[i] << f->size;
  207. float *coeffs = &block->coeffs[band_offset];
  208. for (int j = 0; j < band_size; j++)
  209. ener += coeffs[j]*coeffs[j];
  210. block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
  211. ener = 1.0f/block->lin_energy[i];
  212. for (int j = 0; j < band_size; j++)
  213. coeffs[j] *= ener;
  214. block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
  215. /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
  216. block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
  217. }
  218. }
  219. }
  220. static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
  221. {
  222. int tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
  223. int bits = f->transient ? 2 : 4;
  224. tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
  225. for (int i = f->start_band; i < f->end_band; i++) {
  226. if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
  227. const int tbit = (diff ^ 1) == f->tf_change[i];
  228. ff_opus_rc_enc_log(rc, tbit, bits);
  229. diff ^= tbit;
  230. tf_changed |= diff;
  231. }
  232. bits = f->transient ? 4 : 5;
  233. }
  234. if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
  235. ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
  236. ff_opus_rc_enc_log(rc, f->tf_select, 1);
  237. tf_select = f->tf_select;
  238. }
  239. for (int i = f->start_band; i < f->end_band; i++)
  240. f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
  241. }
  242. static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
  243. {
  244. float gain = f->pf_gain;
  245. int txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
  246. ff_opus_rc_enc_log(rc, f->pfilter, 1);
  247. if (!f->pfilter)
  248. return;
  249. /* Octave */
  250. txval = FFMIN(octave, 6);
  251. ff_opus_rc_enc_uint(rc, txval, 6);
  252. octave = txval;
  253. /* Period */
  254. txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
  255. ff_opus_rc_put_raw(rc, period, 4 + octave);
  256. period = txval + (16 << octave) - 1;
  257. /* Gain */
  258. txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
  259. ff_opus_rc_put_raw(rc, txval, 3);
  260. gain = 0.09375f * (txval + 1);
  261. /* Tapset */
  262. if ((opus_rc_tell(rc) + 2) <= f->framebits)
  263. ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
  264. else
  265. tapset = 0;
  266. /* Finally create the coeffs */
  267. for (int i = 0; i < 2; i++) {
  268. CeltBlock *block = &f->block[i];
  269. block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
  270. block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
  271. block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
  272. block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
  273. }
  274. }
  275. static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
  276. float last_energy[][CELT_MAX_BANDS], int intra)
  277. {
  278. float alpha, beta, prev[2] = { 0, 0 };
  279. const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
  280. /* Inter is really just differential coding */
  281. if (opus_rc_tell(rc) + 3 <= f->framebits)
  282. ff_opus_rc_enc_log(rc, intra, 3);
  283. else
  284. intra = 0;
  285. if (intra) {
  286. alpha = 0.0f;
  287. beta = 1.0f - (4915.0f/32768.0f);
  288. } else {
  289. alpha = ff_celt_alpha_coef[f->size];
  290. beta = ff_celt_beta_coef[f->size];
  291. }
  292. for (int i = f->start_band; i < f->end_band; i++) {
  293. for (int ch = 0; ch < f->channels; ch++) {
  294. CeltBlock *block = &f->block[ch];
  295. const int left = f->framebits - opus_rc_tell(rc);
  296. const float last = FFMAX(-9.0f, last_energy[ch][i]);
  297. float diff = block->energy[i] - prev[ch] - last*alpha;
  298. int q_en = lrintf(diff);
  299. if (left >= 15) {
  300. ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
  301. } else if (left >= 2) {
  302. q_en = av_clip(q_en, -1, 1);
  303. ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
  304. } else if (left >= 1) {
  305. q_en = av_clip(q_en, -1, 0);
  306. ff_opus_rc_enc_log(rc, (q_en & 1), 1);
  307. } else q_en = -1;
  308. block->error_energy[i] = q_en - diff;
  309. prev[ch] += beta * q_en;
  310. }
  311. }
  312. }
  313. static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc,
  314. float last_energy[][CELT_MAX_BANDS])
  315. {
  316. uint32_t inter, intra;
  317. OPUS_RC_CHECKPOINT_SPAWN(rc);
  318. exp_quant_coarse(rc, f, last_energy, 1);
  319. intra = OPUS_RC_CHECKPOINT_BITS(rc);
  320. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  321. exp_quant_coarse(rc, f, last_energy, 0);
  322. inter = OPUS_RC_CHECKPOINT_BITS(rc);
  323. if (inter > intra) { /* Unlikely */
  324. OPUS_RC_CHECKPOINT_ROLLBACK(rc);
  325. exp_quant_coarse(rc, f, last_energy, 1);
  326. }
  327. }
  328. static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
  329. {
  330. for (int i = f->start_band; i < f->end_band; i++) {
  331. if (!f->fine_bits[i])
  332. continue;
  333. for (int ch = 0; ch < f->channels; ch++) {
  334. CeltBlock *block = &f->block[ch];
  335. int quant, lim = (1 << f->fine_bits[i]);
  336. float offset, diff = 0.5f - block->error_energy[i];
  337. quant = av_clip(floor(diff*lim), 0, lim - 1);
  338. ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
  339. offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
  340. block->error_energy[i] -= offset;
  341. }
  342. }
  343. }
  344. static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
  345. {
  346. for (int priority = 0; priority < 2; priority++) {
  347. for (int i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
  348. if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
  349. continue;
  350. for (int ch = 0; ch < f->channels; ch++) {
  351. CeltBlock *block = &f->block[ch];
  352. const float err = block->error_energy[i];
  353. const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
  354. const int sign = FFABS(err + offset) < FFABS(err - offset);
  355. ff_opus_rc_put_raw(rc, sign, 1);
  356. block->error_energy[i] -= offset*(1 - 2*sign);
  357. }
  358. }
  359. }
  360. }
  361. static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
  362. CeltFrame *f, int index)
  363. {
  364. ff_opus_rc_enc_init(rc);
  365. ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
  366. celt_frame_setup_input(s, f);
  367. if (f->silence) {
  368. if (f->framebits >= 16)
  369. ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
  370. for (int ch = 0; ch < s->channels; ch++)
  371. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  372. return;
  373. }
  374. /* Filters */
  375. celt_apply_preemph_filter(s, f);
  376. if (f->pfilter) {
  377. ff_opus_rc_enc_log(rc, 0, 15);
  378. celt_enc_quant_pfilter(rc, f);
  379. }
  380. /* Transform */
  381. celt_frame_mdct(s, f);
  382. /* Need to handle transient/non-transient switches at any point during analysis */
  383. while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
  384. celt_frame_mdct(s, f);
  385. ff_opus_rc_enc_init(rc);
  386. /* Silence */
  387. ff_opus_rc_enc_log(rc, 0, 15);
  388. /* Pitch filter */
  389. if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
  390. celt_enc_quant_pfilter(rc, f);
  391. /* Transient flag */
  392. if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
  393. ff_opus_rc_enc_log(rc, f->transient, 3);
  394. /* Main encoding */
  395. celt_quant_coarse (f, rc, s->last_quantized_energy);
  396. celt_enc_tf (f, rc);
  397. ff_celt_bitalloc (f, rc, 1);
  398. celt_quant_fine (f, rc);
  399. ff_celt_quant_bands(f, rc);
  400. /* Anticollapse bit */
  401. if (f->anticollapse_needed)
  402. ff_opus_rc_put_raw(rc, f->anticollapse, 1);
  403. /* Final per-band energy adjustments from leftover bits */
  404. celt_quant_final(s, rc, f);
  405. for (int ch = 0; ch < f->channels; ch++) {
  406. CeltBlock *block = &f->block[ch];
  407. for (int i = 0; i < CELT_MAX_BANDS; i++)
  408. s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
  409. }
  410. }
  411. static inline int write_opuslacing(uint8_t *dst, int v)
  412. {
  413. dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
  414. dst[1] = v - dst[0] >> 2;
  415. return 1 + (v >= 252);
  416. }
  417. static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
  418. {
  419. int offset, fsize_needed;
  420. /* Write toc */
  421. opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
  422. /* Frame sizes if needed */
  423. if (fsize_needed) {
  424. for (int i = 0; i < s->packet.frames - 1; i++) {
  425. offset += write_opuslacing(avpkt->data + offset,
  426. s->frame[i].framebits >> 3);
  427. }
  428. }
  429. /* Packets */
  430. for (int i = 0; i < s->packet.frames; i++) {
  431. ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
  432. s->frame[i].framebits >> 3);
  433. offset += s->frame[i].framebits >> 3;
  434. }
  435. avpkt->size = offset;
  436. }
  437. /* Used as overlap for the first frame and padding for the last encoded packet */
  438. static AVFrame *spawn_empty_frame(OpusEncContext *s)
  439. {
  440. AVFrame *f = av_frame_alloc();
  441. if (!f)
  442. return NULL;
  443. f->format = s->avctx->sample_fmt;
  444. f->nb_samples = s->avctx->frame_size;
  445. f->channel_layout = s->avctx->channel_layout;
  446. if (av_frame_get_buffer(f, 4)) {
  447. av_frame_free(&f);
  448. return NULL;
  449. }
  450. for (int i = 0; i < s->channels; i++) {
  451. size_t bps = av_get_bytes_per_sample(f->format);
  452. memset(f->extended_data[i], 0, bps*f->nb_samples);
  453. }
  454. return f;
  455. }
  456. static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  457. const AVFrame *frame, int *got_packet_ptr)
  458. {
  459. OpusEncContext *s = avctx->priv_data;
  460. int ret, frame_size, alloc_size = 0;
  461. if (frame) { /* Add new frame to queue */
  462. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  463. return ret;
  464. ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
  465. } else {
  466. ff_opus_psy_signal_eof(&s->psyctx);
  467. if (!s->afq.remaining_samples)
  468. return 0; /* We've been flushed and there's nothing left to encode */
  469. }
  470. /* Run the psychoacoustic system */
  471. if (ff_opus_psy_process(&s->psyctx, &s->packet))
  472. return 0;
  473. frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
  474. if (!frame) {
  475. /* This can go negative, that's not a problem, we only pad if positive */
  476. int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
  477. /* Pad with empty 2.5 ms frames to whatever framesize was decided,
  478. * this should only happen at the very last flush frame. The frames
  479. * allocated here will be freed (because they have no other references)
  480. * after they get used by celt_frame_setup_input() */
  481. for (int i = 0; i < pad_empty; i++) {
  482. AVFrame *empty = spawn_empty_frame(s);
  483. if (!empty)
  484. return AVERROR(ENOMEM);
  485. ff_bufqueue_add(avctx, &s->bufqueue, empty);
  486. }
  487. }
  488. for (int i = 0; i < s->packet.frames; i++) {
  489. celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
  490. alloc_size += s->frame[i].framebits >> 3;
  491. }
  492. /* Worst case toc + the frame lengths if needed */
  493. alloc_size += 2 + s->packet.frames*2;
  494. if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
  495. return ret;
  496. /* Assemble packet */
  497. opus_packet_assembler(s, avpkt);
  498. /* Update the psychoacoustic system */
  499. ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
  500. /* Remove samples from queue and skip if needed */
  501. ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
  502. if (s->packet.frames*frame_size > avpkt->duration) {
  503. uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
  504. if (!side)
  505. return AVERROR(ENOMEM);
  506. AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
  507. }
  508. *got_packet_ptr = 1;
  509. return 0;
  510. }
  511. static av_cold int opus_encode_end(AVCodecContext *avctx)
  512. {
  513. OpusEncContext *s = avctx->priv_data;
  514. for (int i = 0; i < CELT_BLOCK_NB; i++)
  515. ff_mdct15_uninit(&s->mdct[i]);
  516. ff_celt_pvq_uninit(&s->pvq);
  517. av_freep(&s->dsp);
  518. av_freep(&s->frame);
  519. av_freep(&s->rc);
  520. ff_af_queue_close(&s->afq);
  521. ff_opus_psy_end(&s->psyctx);
  522. ff_bufqueue_discard_all(&s->bufqueue);
  523. av_freep(&avctx->extradata);
  524. return 0;
  525. }
  526. static av_cold int opus_encode_init(AVCodecContext *avctx)
  527. {
  528. int ret, max_frames;
  529. OpusEncContext *s = avctx->priv_data;
  530. s->avctx = avctx;
  531. s->channels = avctx->channels;
  532. /* Opus allows us to change the framesize on each packet (and each packet may
  533. * have multiple frames in it) but we can't change the codec's frame size on
  534. * runtime, so fix it to the lowest possible number of samples and use a queue
  535. * to accumulate AVFrames until we have enough to encode whatever the encoder
  536. * decides is the best */
  537. avctx->frame_size = 120;
  538. /* Initial padding will change if SILK is ever supported */
  539. avctx->initial_padding = 120;
  540. if (!avctx->bit_rate) {
  541. int coupled = ff_opus_default_coupled_streams[s->channels - 1];
  542. avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
  543. } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
  544. int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
  545. av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
  546. avctx->bit_rate/1000, clipped_rate/1000);
  547. avctx->bit_rate = clipped_rate;
  548. }
  549. /* Extradata */
  550. avctx->extradata_size = 19;
  551. avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
  552. if (!avctx->extradata)
  553. return AVERROR(ENOMEM);
  554. opus_write_extradata(avctx);
  555. ff_af_queue_init(avctx, &s->afq);
  556. if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
  557. return ret;
  558. if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
  559. return AVERROR(ENOMEM);
  560. /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
  561. for (int i = 0; i < CELT_BLOCK_NB; i++)
  562. if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
  563. return AVERROR(ENOMEM);
  564. /* Zero out previous energy (matters for inter first frame) */
  565. for (int ch = 0; ch < s->channels; ch++)
  566. memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
  567. /* Allocate an empty frame to use as overlap for the first frame of audio */
  568. ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
  569. if (!ff_bufqueue_peek(&s->bufqueue, 0))
  570. return AVERROR(ENOMEM);
  571. if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
  572. return ret;
  573. /* Frame structs and range coder buffers */
  574. max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
  575. s->frame = av_malloc(max_frames*sizeof(CeltFrame));
  576. if (!s->frame)
  577. return AVERROR(ENOMEM);
  578. s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
  579. if (!s->rc)
  580. return AVERROR(ENOMEM);
  581. for (int i = 0; i < max_frames; i++) {
  582. s->frame[i].dsp = s->dsp;
  583. s->frame[i].avctx = s->avctx;
  584. s->frame[i].seed = 0;
  585. s->frame[i].pvq = s->pvq;
  586. s->frame[i].apply_phase_inv = 1;
  587. s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
  588. }
  589. return 0;
  590. }
  591. #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  592. static const AVOption opusenc_options[] = {
  593. { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
  594. { NULL },
  595. };
  596. static const AVClass opusenc_class = {
  597. .class_name = "Opus encoder",
  598. .item_name = av_default_item_name,
  599. .option = opusenc_options,
  600. .version = LIBAVUTIL_VERSION_INT,
  601. };
  602. static const AVCodecDefault opusenc_defaults[] = {
  603. { "b", "0" },
  604. { "compression_level", "10" },
  605. { NULL },
  606. };
  607. AVCodec ff_opus_encoder = {
  608. .name = "opus",
  609. .long_name = NULL_IF_CONFIG_SMALL("Opus"),
  610. .type = AVMEDIA_TYPE_AUDIO,
  611. .id = AV_CODEC_ID_OPUS,
  612. .defaults = opusenc_defaults,
  613. .priv_class = &opusenc_class,
  614. .priv_data_size = sizeof(OpusEncContext),
  615. .init = opus_encode_init,
  616. .encode2 = opus_encode_frame,
  617. .close = opus_encode_end,
  618. .caps_internal = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
  619. .capabilities = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  620. .supported_samplerates = (const int []){ 48000, 0 },
  621. .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
  622. AV_CH_LAYOUT_STEREO, 0 },
  623. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  624. AV_SAMPLE_FMT_NONE },
  625. };