h264.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. package codec
  2. import "encoding/binary"
  3. // nal_unit( NumBytesInNALunit ) {
  4. // forbidden_zero_bit All f(1)
  5. // nal_ref_idc All u(2)
  6. // nal_unit_type u(5)
  7. // }
  8. type H264NaluHdr struct {
  9. Forbidden_zero_bit uint8
  10. Nal_ref_idc uint8
  11. Nal_unit_type uint8
  12. }
  13. func (hdr *H264NaluHdr) Decode(bs *BitStream) {
  14. hdr.Forbidden_zero_bit = bs.GetBit()
  15. hdr.Nal_ref_idc = bs.Uint8(2)
  16. hdr.Nal_unit_type = bs.Uint8(5)
  17. }
  18. type SliceHeader struct {
  19. First_mb_in_slice uint64
  20. Slice_type uint64
  21. Pic_parameter_set_id uint64
  22. Frame_num uint64
  23. }
  24. //调用方根据sps中的log2_max_frame_num_minus4的值来解析Frame_num
  25. func (sh *SliceHeader) Decode(bs *BitStream) {
  26. sh.First_mb_in_slice = bs.ReadUE()
  27. sh.Slice_type = bs.ReadUE()
  28. sh.Pic_parameter_set_id = bs.ReadUE()
  29. }
  30. type SPS struct {
  31. Profile_idc uint8
  32. Constraint_set0_flag uint8
  33. Constraint_set1_flag uint8
  34. Constraint_set2_flag uint8
  35. Constraint_set3_flag uint8
  36. Constraint_set4_flag uint8
  37. Constraint_set5_flag uint8
  38. Reserved_zero_2bits uint8
  39. Level_idc uint8
  40. Seq_parameter_set_id uint64
  41. Chroma_format_idc uint64
  42. Separate_colour_plane_flag uint8
  43. Bit_depth_luma_minus8 uint64
  44. Bit_depth_chroma_minus8 uint64
  45. Log2_max_frame_num_minus4 uint64
  46. Pic_order_cnt_type uint64
  47. Max_num_ref_frames uint64
  48. Gaps_in_frame_num_value_allowed_flag uint8
  49. Pic_width_in_mbs_minus1 uint64
  50. Pic_height_in_map_units_minus1 uint64
  51. Frame_mbs_only_flag uint8
  52. Direct_8x8_inference_flag uint8
  53. Frame_cropping_flag uint8
  54. Frame_crop_left_offset uint64
  55. Frame_crop_right_offset uint64
  56. Frame_crop_top_offset uint64
  57. Frame_crop_bottom_offset uint64
  58. Vui_parameters_present_flag uint8
  59. }
  60. func (sps *SPS) Decode(bs *BitStream) {
  61. sps.Profile_idc = bs.Uint8(8)
  62. sps.Constraint_set0_flag = bs.GetBit()
  63. sps.Constraint_set1_flag = bs.GetBit()
  64. sps.Constraint_set2_flag = bs.GetBit()
  65. sps.Constraint_set3_flag = bs.GetBit()
  66. sps.Constraint_set4_flag = bs.GetBit()
  67. sps.Constraint_set5_flag = bs.GetBit()
  68. sps.Reserved_zero_2bits = bs.Uint8(2)
  69. sps.Level_idc = bs.Uint8(8)
  70. sps.Seq_parameter_set_id = bs.ReadUE()
  71. if sps.Profile_idc == 100 || sps.Profile_idc == 110 ||
  72. sps.Profile_idc == 122 || sps.Profile_idc == 244 ||
  73. sps.Profile_idc == 44 || sps.Profile_idc == 83 ||
  74. sps.Profile_idc == 86 || sps.Profile_idc == 118 || sps.Profile_idc == 128 {
  75. sps.Chroma_format_idc = bs.ReadUE()
  76. if sps.Chroma_format_idc == 3 {
  77. sps.Separate_colour_plane_flag = bs.Uint8(1) //separate_colour_plane_flag
  78. }
  79. sps.Bit_depth_luma_minus8 = bs.ReadUE() //bit_depth_luma_minus8
  80. sps.Bit_depth_chroma_minus8 = bs.ReadUE() //bit_depth_chroma_minus8
  81. bs.SkipBits(1) //qpprime_y_zero_transform_bypass_flag
  82. seq_scaling_matrix_present_flag := bs.GetBit()
  83. if seq_scaling_matrix_present_flag == 1 {
  84. //seq_scaling_list_present_flag[i]
  85. if sps.Chroma_format_idc == 3 {
  86. bs.SkipBits(12)
  87. } else {
  88. bs.SkipBits(8)
  89. }
  90. }
  91. }
  92. sps.Log2_max_frame_num_minus4 = bs.ReadUE()
  93. sps.Pic_order_cnt_type = bs.ReadUE()
  94. if sps.Pic_order_cnt_type == 0 {
  95. bs.ReadUE() // log2_max_pic_order_cnt_lsb_minus4
  96. } else if sps.Pic_order_cnt_type == 1 {
  97. bs.SkipBits(1) //delta_pic_order_always_zero_flag
  98. bs.ReadSE() //offset_for_non_ref_pic
  99. bs.ReadSE() //offset_for_top_to_bottom_field
  100. num_ref_frames_in_pic_order_cnt_cycle := bs.ReadUE()
  101. for i := 0; i < int(num_ref_frames_in_pic_order_cnt_cycle); i++ {
  102. bs.ReadSE() //offset_for_ref_frame
  103. }
  104. }
  105. sps.Max_num_ref_frames = bs.ReadUE()
  106. sps.Gaps_in_frame_num_value_allowed_flag = bs.GetBit()
  107. sps.Pic_width_in_mbs_minus1 = bs.ReadUE()
  108. sps.Pic_height_in_map_units_minus1 = bs.ReadUE()
  109. sps.Frame_mbs_only_flag = bs.GetBit()
  110. if sps.Frame_mbs_only_flag == 0 {
  111. bs.SkipBits(1) // mb_adaptive_frame_field_flag
  112. }
  113. sps.Direct_8x8_inference_flag = bs.GetBit()
  114. sps.Frame_cropping_flag = bs.GetBit()
  115. if sps.Frame_cropping_flag == 1 {
  116. sps.Frame_crop_left_offset = bs.ReadUE() //frame_crop_left_offset
  117. sps.Frame_crop_right_offset = bs.ReadUE() //frame_crop_right_offset
  118. sps.Frame_crop_top_offset = bs.ReadUE() //frame_crop_top_offset
  119. sps.Frame_crop_bottom_offset = bs.ReadUE() //frame_crop_bottom_offset
  120. }
  121. sps.Vui_parameters_present_flag = bs.GetBit()
  122. }
  123. type PPS struct {
  124. Pic_parameter_set_id uint64
  125. Seq_parameter_set_id uint64
  126. Entropy_coding_mode_flag uint8
  127. Bottom_field_pic_order_in_frame_present_flag uint8
  128. Num_slice_groups_minus1 uint64
  129. }
  130. func (pps *PPS) Decode(bs *BitStream) {
  131. pps.Pic_parameter_set_id = bs.ReadUE()
  132. pps.Seq_parameter_set_id = bs.ReadUE()
  133. pps.Entropy_coding_mode_flag = bs.GetBit()
  134. pps.Bottom_field_pic_order_in_frame_present_flag = bs.GetBit()
  135. pps.Num_slice_groups_minus1 = bs.ReadUE()
  136. }
  137. type SEIReaderWriter interface {
  138. Read(size uint16, bs *BitStream)
  139. Write(bsw *BitStreamWriter)
  140. }
  141. type UserDataUnregistered struct {
  142. UUID []byte
  143. UserData []byte
  144. }
  145. func (udu *UserDataUnregistered) Read(size uint16, bs *BitStream) {
  146. udu.UUID = bs.GetBytes(16)
  147. udu.UserData = bs.GetBytes(int(size - 16))
  148. }
  149. func (udu *UserDataUnregistered) Write(bsw *BitStreamWriter) {
  150. bsw.PutBytes(udu.UUID)
  151. bsw.PutBytes(udu.UserData)
  152. }
  153. type SEI struct {
  154. PayloadType uint16
  155. PayloadSize uint16
  156. Sei_payload SEIReaderWriter
  157. }
  158. func (sei *SEI) Decode(bs *BitStream) {
  159. for bs.NextBits(8) == 0xFF {
  160. sei.PayloadType += 255
  161. }
  162. sei.PayloadType += uint16(bs.Uint8(8))
  163. for bs.NextBits(8) == 0xFF {
  164. sei.PayloadSize += 255
  165. }
  166. sei.PayloadSize += uint16(bs.Uint8(8))
  167. if sei.PayloadType == 5 {
  168. sei.Sei_payload = new(UserDataUnregistered)
  169. sei.Sei_payload.Read(sei.PayloadSize, bs)
  170. }
  171. }
  172. func (sei *SEI) Encode(bsw *BitStreamWriter) []byte {
  173. payloadType := sei.PayloadType
  174. payloadSize := sei.PayloadSize
  175. for payloadType >= 0xFF {
  176. bsw.PutByte(0xFF)
  177. payloadType -= 255
  178. }
  179. bsw.PutByte(uint8(payloadType))
  180. for payloadSize >= 0xFF {
  181. bsw.PutByte(0xFF)
  182. payloadSize -= 255
  183. }
  184. bsw.PutByte(uint8(payloadSize))
  185. sei.Sei_payload.Write(bsw)
  186. return bsw.Bits()
  187. }
  188. func GetSPSIdWithStartCode(sps []byte) uint64 {
  189. start, sc := FindStartCode(sps, 0)
  190. return GetSPSId(sps[start+int(sc):])
  191. }
  192. func GetSPSId(sps []byte) uint64 {
  193. sps = sps[1:]
  194. bs := NewBitStream(sps)
  195. bs.SkipBits(24)
  196. return bs.ReadUE()
  197. }
  198. func GetPPSIdWithStartCode(pps []byte) uint64 {
  199. start, sc := FindStartCode(pps, 0)
  200. return GetPPSId(pps[start+int(sc):])
  201. }
  202. func GetPPSId(pps []byte) uint64 {
  203. pps = pps[1:]
  204. bs := NewBitStream(pps)
  205. return bs.ReadUE()
  206. }
  207. //https://stackoverflow.com/questions/12018535/get-the-width-height-of-the-video-from-h-264-nalu
  208. //int Width = ((pic_width_in_mbs_minus1 +1)*16) - frame_crop_right_offset *2 - frame_crop_left_offset *2;
  209. //int Height = ((2 - frame_mbs_only_flag)* (pic_height_in_map_units_minus1 +1) * 16) - (frame_crop_bottom_offset* 2) - (frame_crop_top_offset* 2);
  210. func GetH264Resolution(sps []byte) (width uint32, height uint32) {
  211. start, sc := FindStartCode(sps, 0)
  212. bs := NewBitStream(sps[start+int(sc)+1:])
  213. var s SPS
  214. s.Decode(bs)
  215. widthInSample := (uint32(s.Pic_width_in_mbs_minus1) + 1) * 16
  216. widthCrop := uint32(s.Frame_crop_left_offset)*2 - uint32(s.Frame_crop_right_offset)*2
  217. width = widthInSample - widthCrop
  218. heightInSample := ((2 - uint32(s.Frame_mbs_only_flag)) * (uint32(s.Pic_height_in_map_units_minus1) + 1) * 16)
  219. heightCrop := uint32(s.Frame_crop_bottom_offset)*2 - uint32(s.Frame_crop_top_offset)*2
  220. height = heightInSample - heightCrop
  221. return
  222. }
  223. // aligned(8) class AVCDecoderConfigurationRecord {
  224. // unsigned int(8) configurationVersion = 1;
  225. // unsigned int(8) AVCProfileIndication;
  226. // unsigned int(8) profile_compatibility;
  227. // unsigned int(8) AVCLevelIndication;
  228. // bit(6) reserved = ‘111111’b;
  229. // unsigned int(2) lengthSizeMinusOne;
  230. // bit(3) reserved = ‘111’b;
  231. // unsigned int(5) numOfSequenceParameterSets;
  232. // for (i=0; i< numOfSequenceParameterSets; i++) {
  233. // unsigned int(16) sequenceParameterSetLength ;
  234. // bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit;
  235. // }
  236. // unsigned int(8) numOfPictureParameterSets;
  237. // for (i=0; i< numOfPictureParameterSets; i++) {
  238. // unsigned int(16) pictureParameterSetLength;
  239. // bit(8*pictureParameterSetLength) pictureParameterSetNALUnit;
  240. // }
  241. // if( profile_idc == 100 || profile_idc == 110 ||
  242. // profile_idc == 122 || profile_idc == 144 )
  243. // {
  244. // bit(6) reserved = ‘111111’b;
  245. // unsigned int(2) chroma_format;
  246. // bit(5) reserved = ‘11111’b;
  247. // unsigned int(3) bit_depth_luma_minus8;
  248. // bit(5) reserved = ‘11111’b;
  249. // unsigned int(3) bit_depth_chroma_minus8;
  250. // unsigned int(8) numOfSequenceParameterSetExt;
  251. // for (i=0; i< numOfSequenceParameterSetExt; i++) {
  252. // unsigned int(16) sequenceParameterSetExtLength;
  253. // bit(8*sequenceParameterSetExtLength) sequenceParameterSetExtNALUnit;
  254. // }
  255. // }
  256. // }
  257. // }
  258. // bits
  259. // 8 version ( always 0x01 )
  260. // 8 avc profile ( sps[0][1] )
  261. // 8 avc compatibility ( sps[0][2] )
  262. // 8 avc level ( sps[0][3] )
  263. // 6 reserved ( all bits on )
  264. // 2 NALULengthSizeMinusOne
  265. // 3 reserved ( all bits on )
  266. // 5 number of SPS NALUs (usually 1)
  267. // repeated once per SPS:
  268. // 16 SPS size
  269. // variable SPS NALU data
  270. // 8 number of PPS NALUs (usually 1)
  271. // repeated once per PPS:
  272. // 16 PPS size
  273. // variable PPS NALU data
  274. func CreateH264AVCCExtradata(spss [][]byte, ppss [][]byte) []byte {
  275. extradata := make([]byte, 6, 256)
  276. for i, sps := range spss {
  277. start, sc := FindStartCode(sps, 0)
  278. spss[i] = sps[start+int(sc):]
  279. }
  280. for i, pps := range ppss {
  281. start, sc := FindStartCode(pps, 0)
  282. ppss[i] = pps[start+int(sc):]
  283. }
  284. extradata[0] = 0x01
  285. extradata[1] = spss[0][1]
  286. extradata[2] = spss[0][2]
  287. extradata[3] = spss[0][3]
  288. extradata[4] = 0xFF
  289. extradata[5] = 0xE0 | uint8(len(spss))
  290. for _, sps := range spss {
  291. spssize := make([]byte, 2)
  292. binary.BigEndian.PutUint16(spssize, uint16(len(sps)))
  293. extradata = append(extradata, spssize...)
  294. extradata = append(extradata, sps...)
  295. }
  296. extradata = append(extradata, uint8(len(ppss)))
  297. for _, pps := range ppss {
  298. ppssize := make([]byte, 2)
  299. binary.BigEndian.PutUint16(ppssize, uint16(len(pps)))
  300. extradata = append(extradata, ppssize...)
  301. extradata = append(extradata, pps...)
  302. }
  303. var h264sps SPS
  304. h264sps.Decode(NewBitStream(spss[0][1:]))
  305. if h264sps.Profile_idc == 100 ||
  306. h264sps.Profile_idc == 110 ||
  307. h264sps.Profile_idc == 122 ||
  308. h264sps.Profile_idc == 144 {
  309. tmp := make([]byte, 4)
  310. tmp[0] = 0xFC | uint8(h264sps.Chroma_format_idc&0x03)
  311. tmp[1] = 0xF8 | uint8(h264sps.Bit_depth_luma_minus8&0x07)
  312. tmp[2] = 0xF8 | uint8(h264sps.Bit_depth_chroma_minus8&0x07)
  313. tmp[3] = 0
  314. extradata = append(extradata, tmp...)
  315. }
  316. return extradata
  317. }
  318. func CovertExtradata(extraData []byte) ([][]byte, [][]byte) {
  319. spsnum := extraData[5] & 0x1F
  320. spss := make([][]byte, spsnum)
  321. offset := 6
  322. for i := 0; i < int(spsnum); i++ {
  323. spssize := binary.BigEndian.Uint16(extraData[offset:])
  324. sps := make([]byte, spssize+4)
  325. copy(sps, []byte{0x00, 0x00, 0x00, 0x01})
  326. copy(sps[4:], extraData[offset+2:offset+2+int(spssize)])
  327. offset += 2 + int(spssize)
  328. spss[i] = sps
  329. }
  330. ppsnum := extraData[offset]
  331. ppss := make([][]byte, ppsnum)
  332. offset++
  333. for i := 0; i < int(ppsnum); i++ {
  334. ppssize := binary.BigEndian.Uint16(extraData[offset:])
  335. pps := make([]byte, ppssize+4)
  336. copy(pps, []byte{0x00, 0x00, 0x00, 0x01})
  337. copy(pps[4:], extraData[offset+2:offset+2+int(ppssize)])
  338. offset += 2 + int(ppssize)
  339. ppss[i] = pps
  340. }
  341. return spss, ppss
  342. }
  343. func ConvertAnnexBToAVCC(annexb []byte) []byte {
  344. start, sc := FindStartCode(annexb, 0)
  345. if sc == START_CODE_4 {
  346. binary.BigEndian.PutUint32(annexb[start:], uint32(len(annexb)-4))
  347. return annexb
  348. } else {
  349. avcc := make([]byte, 1+len(annexb))
  350. binary.BigEndian.PutUint32(avcc, uint32(len(annexb)-3))
  351. copy(avcc[4:], annexb[start+3:])
  352. return avcc
  353. }
  354. }
  355. func CovertAVCCToAnnexB(avcc []byte) {
  356. avcc[0] = 0x00
  357. avcc[1] = 0x00
  358. avcc[2] = 0x00
  359. avcc[3] = 0x01
  360. }