最近刚好有空闲时间,重新实现了下rtmp推流h264和aac,也解决了推流时音视频不同步的问题,可以参考最新的实现方式。博客链接:RTMP推流H264和AAC-CSDN博客。
利用了librtmp库实现,rtmp服务器使用Nginx搭建,拉流端使用VLC。由于是从文件中获取的流,时间戳设置的有点小问题,播放过程中会出现一小段音视频不同步的问题。。。
封装部分代码参考了:flvmuxer
h264 aac文件解析,环形队列的相关代码在我其他博客中已经贴出来了:
rtmp_pushStream.h:
#ifndef _RTMP_PUSHSTREAM_H_
#define _RTMP_PUSHSTREAM_H_
#include <cstdint>
#include <string>
#include <mutex>
#include "librtmp/rtmp.h"
typedef struct
{
uint8_t profile; // AAC profile
uint8_t sample_frequency_index; // 采样率
uint8_t channel_configuration; // 声道数
} aac_config_t;
class RtmpPushStream
{
public:
RtmpPushStream(uint32_t bufsize);
~RtmpPushStream();
int connect_server(std::string url);
int send_avcSequenceHeader(const uint8_t *sps, uint32_t sps_len,
const uint8_t *pps, uint32_t pps_len,
uint32_t dts_ms);
int push_h264(const uint8_t *buf, uint32_t len, uint32_t dts_ms);
int send_aacSequenceHeader(const aac_config_t *aac_config, uint32_t dts_ms);
int push_aac(const uint8_t *buf, uint32_t len, uint32_t dts_ms);
private:
RTMP *m_rtmp = NULL;
uint32_t m_bufsize = 0;
uint8_t *m_buffer = NULL;
std::mutex m_mutex;
uint8_t m_audioParameter = 0;
int check_startCode_len(const uint8_t *pnalu);
int packVideoTag_spsAndpps(const uint8_t *sps, uint32_t sps_len,
const uint8_t *pps, uint32_t pps_len,
uint32_t dts_ms);
int packVideoTag_keyFrame(const uint8_t *nal, uint32_t nal_len, uint32_t dts_ms);
int packVideoTag_notkeyFrame(const uint8_t *nal, uint32_t nal_len, uint32_t dts_ms);
void set_audioParameter(const aac_config_t *config);
int pack_audioSpecificConfig(const aac_config_t *aac_config, uint32_t dts_ms);
int pack_audioTag(const uint8_t *frame, uint32_t len, uint32_t dts_ms);
};
#endif // _RTMP_PUSHSTREAM_H_
rtmp_pushStream.cpp:
#include <iostream>
#include <string.h>
#include <unistd.h>
#include "rtmp_pushStream.h"
#define AAC_ADTS_HEADER_SIZE 7
#define FLV_TAG_HEAD_LEN 11
#define FLV_PRE_TAG_LEN 4
RtmpPushStream::RtmpPushStream(uint32_t bufsize)
{
m_buffer = new uint8_t[bufsize]();
if (!m_buffer)
{
std::cout << "new m_buffer failed!" << std::endl;
}
m_bufsize = bufsize;
}
RtmpPushStream::~RtmpPushStream()
{
if (m_buffer)
{
delete[] m_buffer;
}
if (m_rtmp)
{
RTMP_Close(m_rtmp);
RTMP_Free(m_rtmp);
}
}
int RtmpPushStream::connect_server(std::string url)
{
m_rtmp = RTMP_Alloc();
if (!m_rtmp)
{
std::cout << "RTMP_Alloc failed" << std::endl;
return -1;
}
RTMP_Init(m_rtmp);
m_rtmp->Link.timeout = 10; // 10秒超时
m_rtmp->Link.lFlags |= RTMP_LF_LIVE;
if (!RTMP_SetupURL(m_rtmp, (char *)url.c_str()))
{
std::cout << "RTMP_SetupURL failed" << std::endl;
goto err;
}
RTMP_EnableWrite(m_rtmp);
if (!RTMP_Connect(m_rtmp, NULL))
{
std::cout << "RTMP_Connect failed" << std::endl;
goto err;
}
if (!RTMP_ConnectStream(m_rtmp, 0))
{
std::cout << "RTMP_ConnectStream failed" << std::endl;
goto err;
}
return 0;
err:
if (m_rtmp)
{
RTMP_Close(m_rtmp);
RTMP_Free(m_rtmp);
}
return -1;
}
// 封装sps和pps, sps和pps数据封装在一个Tag中
int RtmpPushStream::packVideoTag_spsAndpps(const uint8_t *sps, uint32_t sps_len,
const uint8_t *pps, uint32_t pps_len,
uint32_t dts_ms)
{
uint32_t body_len;
uint32_t offset = 0;
uint32_t output_len = 0;
body_len = sps_len + pps_len + 16;
output_len = body_len + FLV_TAG_HEAD_LEN + FLV_PRE_TAG_LEN;
if (output_len > m_bufsize)
{
std::cout << "buffer(m_bufsize) is too small" << std::endl;
return -1;
}
// flv tag header
m_buffer[offset++] = 0x09; // tagtype video
m_buffer[offset++] = (uint8_t)(body_len >> 16); // data len
m_buffer[offset++] = (uint8_t)(body_len >> 8); // data len
m_buffer[offset++] = (uint8_t)(body_len); // data len
m_buffer[offset++] = (uint8_t)(dts_ms >> 16); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 8); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 24); // time stamp
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
// flv VideoTagHeader
m_buffer[offset++] = 0x17; // key frame, AVC
m_buffer[offset++] = 0x00; // avc sequence header
m_buffer[offset++] = 0x00; // composit time ??????????
m_buffer[offset++] = 0x00; // composit time
m_buffer[offset++] = 0x00; // composit time
// flv VideoTagBody --AVCDecoderCOnfigurationRecord
m_buffer[offset++] = 0x01; // configurationversion
m_buffer[offset++] = pps[1]; // avcprofileindication
m_buffer[offset++] = pps[2]; // profilecompatibilty
m_buffer[offset++] = pps[3]; // avclevelindication
m_buffer[offset++] = 0xff; // reserved + lengthsizeminusone
m_buffer[offset++] = 0xe1; // numofsequenceset
m_buffer[offset++] = (uint8_t)(sps_len >> 8); // sequence parameter set length high 8 bits
m_buffer[offset++] = (uint8_t)(sps_len); // sequence parameter set length low 8 bits
memcpy(m_buffer + offset, sps, sps_len); // H264 sequence parameter set
offset += sps_len;
m_buffer[offset++] = 0x01; // numofpictureset
m_buffer[offset++] = (uint8_t)(pps_len >> 8); // picture parameter set length high 8 bits
m_buffer[offset++] = (uint8_t)(pps_len); // picture parameter set length low 8 bits
memcpy(m_buffer + offset, pps, pps_len); // H264 picture parameter set
return output_len;
}
// 封装关键帧
int RtmpPushStream::packVideoTag_keyFrame(const uint8_t *nal, uint32_t nal_len, uint32_t dts_ms)
{
uint32_t body_len;
uint32_t offset = 0;
uint32_t output_len = 0;
body_len = nal_len + 5 + 4; // flv VideoTagHeader + NALU length
output_len = body_len + FLV_TAG_HEAD_LEN + FLV_PRE_TAG_LEN;
if (output_len > m_bufsize)
{
std::cout << "buffer(m_bufsize) is too small" << std::endl;
return -1;
}
// flv tag header
m_buffer[offset++] = 0x09; // tagtype video
m_buffer[offset++] = (uint8_t)(body_len >> 16); // data len
m_buffer[offset++] = (uint8_t)(body_len >> 8); // data len
m_buffer[offset++] = (uint8_t)(body_len); // data len
m_buffer[offset++] = (uint8_t)(dts_ms >> 16); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 8); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 24); // time stamp
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
// flv VideoTagHeader
m_buffer[offset++] = 0x17; // key frame, AVC
m_buffer[offset++] = 0x01; // avc NALU unit
m_buffer[offset++] = 0x00; // composit time ??????????
m_buffer[offset++] = 0x00; // composit time
m_buffer[offset++] = 0x00; // composit time
m_buffer[offset++] = (uint8_t)(nal_len >> 24); // nal length
m_buffer[offset++] = (uint8_t)(nal_len >> 16); // nal length
m_buffer[offset++] = (uint8_t)(nal_len >> 8); // nal length
m_buffer[offset++] = (uint8_t)(nal_len); // nal length
memcpy(m_buffer + offset, nal, nal_len);
return output_len;
}
int RtmpPushStream::packVideoTag_notkeyFrame(const uint8_t *nal, uint32_t nal_len, uint32_t dts_ms)
{
uint32_t body_len;
uint32_t offset = 0;
uint32_t output_len = 0;
body_len = nal_len + 5 + 4; // flv VideoTagHeader + NALU length
output_len = body_len + FLV_TAG_HEAD_LEN + FLV_PRE_TAG_LEN;
if (output_len > m_bufsize)
{
std::cout << "buffer(m_bufsize) is too small" << std::endl;
return -1;
}
// flv tag header
m_buffer[offset++] = 0x09; // tagtype video
m_buffer[offset++] = (uint8_t)(body_len >> 16); // data len
m_buffer[offset++] = (uint8_t)(body_len >> 8); // data len
m_buffer[offset++] = (uint8_t)(body_len); // data len
m_buffer[offset++] = (uint8_t)(dts_ms >> 16); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 8); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 24); // time stamp
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
// flv VideoTagHeader
m_buffer[offset++] = 0x27; // not key frame, AVC
m_buffer[offset++] = 0x01; // avc NALU unit
m_buffer[offset++] = 0x00; // composit time ??????????
m_buffer[offset++] = 0x00; // composit time
m_buffer[offset++] = 0x00; // composit time
m_buffer[offset++] = (uint8_t)(nal_len >> 24); // nal length
m_buffer[offset++] = (uint8_t)(nal_len >> 16); // nal length
m_buffer[offset++] = (uint8_t)(nal_len >> 8); // nal length
m_buffer[offset++] = (uint8_t)(nal_len); // nal length
memcpy(m_buffer + offset, nal, nal_len);
return output_len;
}
int RtmpPushStream::check_startCode_len(const uint8_t *pnalu)
{
int startCode_len = -1;
// 先判断起始码长度
if (pnalu[0] == 0x00 && pnalu[1] == 0x00 && pnalu[2] == 0x00 && pnalu[3] == 0x01)
{
startCode_len = 4;
}
else if (pnalu[0] == 0x00 && pnalu[1] == 0x00 && pnalu[2] == 0x01)
{
startCode_len = 3;
}
return startCode_len;
}
int RtmpPushStream::send_avcSequenceHeader(const uint8_t *sps, uint32_t sps_len,
const uint8_t *pps, uint32_t pps_len,
uint32_t dts_ms)
{
std::lock_guard<std::mutex> lock(m_mutex);
uint32_t packLen = 0;
int startCode_len;
startCode_len = check_startCode_len(sps);
if (startCode_len < 0)
{
std::cout << "sps start code is not exist" << std::endl;
return -1;
}
startCode_len = check_startCode_len(pps);
if (startCode_len < 0)
{
std::cout << "pps start code is not exist" << std::endl;
return -1;
}
if ((sps[startCode_len] & 0x1f) == 0x07 && (pps[startCode_len] & 0x1f) == 0x08) // sps pps
{
packLen = packVideoTag_spsAndpps(sps, sps_len, pps, pps_len, dts_ms);
if (packLen > 0)
{
RTMP_Write(m_rtmp, (char *)m_buffer, packLen);
}
}
else
{
std::cout << "sps or pps is not exist" << std::endl;
return -1;
}
return 0;
}
int RtmpPushStream::push_h264(const uint8_t *buf, uint32_t len, uint32_t dts_ms)
{
std::lock_guard<std::mutex> lock(m_mutex);
uint32_t packLen = 0;
int startCode_len;
// 判断起始码长度
startCode_len = check_startCode_len(buf);
if (startCode_len < 0)
{
std::cout << "start code is not exist" << std::endl;
return -1;
}
const uint8_t *nal = buf + startCode_len;
uint8_t nal_type = nal[0] & 0x1f;
uint32_t nal_len = len - startCode_len;
if (nal_type == 0x01) // 非关键帧
{
packLen = packVideoTag_notkeyFrame(nal, nal_len, dts_ms);
}
else if (nal_type == 0x05) // 关键帧
{
packLen = packVideoTag_keyFrame(nal, nal_len, dts_ms);
}
else
{
std::cout << "warn: lose, nal type:" << static_cast<uint16_t>(nal_type) << std::endl;
return -1;
}
if (packLen > 0)
{
RTMP_Write(m_rtmp, (char *)m_buffer, packLen);
}
return 0;
}
void RtmpPushStream::set_audioParameter(const aac_config_t *config)
{
uint8_t soundType = 0, soundRate = 0;
if (config->sample_frequency_index <= 4)
{
// 采样率 44.1k - 96k
soundRate = 3;
}
else if (config->sample_frequency_index <= 7)
{
// 22.05k - 32k
soundRate = 2;
}
else if (config->sample_frequency_index <= 11)
{
// 8k - 16k
soundRate = 1;
}
else
{
// 7.35K
soundRate = 0;
}
// std::cout << "soundRate:" << static_cast<uint16_t>(soundRate) << std::endl;
soundType = config->channel_configuration - 1; // 0 mono, 1 stero
m_audioParameter = 0xA0 | (soundRate << 2) | 0x02 | soundType;
}
int RtmpPushStream::pack_audioSpecificConfig(const aac_config_t *aac_config, uint32_t dts_ms)
{
uint32_t output_len = 0;
uint32_t body_len;
uint32_t offset = 0;
set_audioParameter(aac_config);
body_len = 2 + 2; // AudioTagHeader + AudioSpecificConfig
output_len = body_len + FLV_TAG_HEAD_LEN + FLV_PRE_TAG_LEN;
if (output_len > m_bufsize)
{
std::cout << "buffer(m_bufsize) is too small" << std::endl;
return -1;
}
// flv tag header
m_buffer[offset++] = 0x08; // tagtype audio
m_buffer[offset++] = (uint8_t)(body_len >> 16); // data len
m_buffer[offset++] = (uint8_t)(body_len >> 8); // data len
m_buffer[offset++] = (uint8_t)(body_len); // data len
m_buffer[offset++] = (uint8_t)(dts_ms >> 16); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 8); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 24); // time stamp
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = m_audioParameter; // sound format aac
m_buffer[offset++] = 0x00; // AACPacketType:0: aac sequence header 1: aac raw
// AAC sequence header 其实就是 AudioSpecificConfiguration
// AudioSpecificConfiguration
uint8_t aac_object_type = aac_config->profile + 1;
m_buffer[offset++] = (aac_object_type << 3) | (aac_config->sample_frequency_index >> 1);
m_buffer[offset++] = ((aac_config->sample_frequency_index & 0x01) << 7) | (aac_config->channel_configuration << 3);
return output_len;
}
int RtmpPushStream::pack_audioTag(const uint8_t *frame, uint32_t len, uint32_t dts_ms)
{
uint32_t body_len;
uint32_t offset = 0;
uint32_t output_len = 0;
body_len = 2 + len - AAC_ADTS_HEADER_SIZE; // remove adts header + AudioTagHeader
output_len = body_len + FLV_TAG_HEAD_LEN + FLV_PRE_TAG_LEN;
if (output_len > m_bufsize)
{
std::cout << "buffer(m_bufsize) is too small" << std::endl;
return -1;
}
// flv tag header
m_buffer[offset++] = 0x08; // tagtype audio
m_buffer[offset++] = (uint8_t)(body_len >> 16); // data len
m_buffer[offset++] = (uint8_t)(body_len >> 8); // data len
m_buffer[offset++] = (uint8_t)(body_len); // data len
m_buffer[offset++] = (uint8_t)(dts_ms >> 16); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 8); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms); // time stamp
m_buffer[offset++] = (uint8_t)(dts_ms >> 24); // time stamp
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
m_buffer[offset++] = 0x00; // stream id 0
// flv tag data
m_buffer[offset++] = m_audioParameter; // 音频参数
m_buffer[offset++] = 0x01; // AACPacketType:0: aac sequence header 1: aac raw
// aac raw data
memcpy(m_buffer + offset, frame + AAC_ADTS_HEADER_SIZE, (len - AAC_ADTS_HEADER_SIZE));
return output_len;
}
int RtmpPushStream::send_aacSequenceHeader(const aac_config_t *aac_config, uint32_t dts_ms)
{
std::lock_guard<std::mutex> lock(m_mutex);
uint32_t packLen = 0;
packLen = pack_audioSpecificConfig(aac_config, dts_ms);
if (packLen > 0)
{
RTMP_Write(m_rtmp, (char *)m_buffer, packLen);
}
return 0;
}
int RtmpPushStream::push_aac(const uint8_t *buf, uint32_t len, uint32_t dts_ms)
{
std::lock_guard<std::mutex> lock(m_mutex);
uint32_t packLen = 0;
packLen = pack_audioTag(buf, len, dts_ms);
if (packLen > 0)
{
RTMP_Write(m_rtmp, (char *)m_buffer, packLen);
}
return 0;
}
main.cpp:
#include <iostream>
#include <thread>
#include <time.h>
#include <unistd.h>
#include "rtmp_pushStream.h"
#include "h264aac_parse.h"
#include "ringQueue.h"
using namespace std;
ringQueue *rQueue_a = NULL;
ringQueue *rQueue_v = NULL;
AACParse aac;
H264Parse h264;
aac_config_t aac_config;
bool readAAC_thread_exit = false;
bool readH264_thread_exit = false;
void readAAC_thread(void)
{
uint8_t buf[1024 * 2] = {0};
uint32_t len = 0;
rQueue_data e;
int ret;
e.buffer = buf;
while (1)
{
ret = aac.get_adts(buf, sizeof(buf), &len);
if (ret == -1)
{
cout << "get_adts failed" << std::endl;
break;
}
if (ret == 1)
{
e.len = len;
rQueue_en(rQueue_a, &e);
}
else
{
e.len = len;
rQueue_en(rQueue_a, &e);
break;
}
usleep(1000 * 21);
}
cout << "readAAC_thread exit" << std::endl;
readAAC_thread_exit = true;
}
void readH264_thread(void)
{
uint8_t buf[1024 * 1024] = {0};
uint32_t len = 0;
rQueue_data e;
e.buffer = buf;
while (1)
{
int ret = h264.get_nalu(buf, sizeof(buf), &len, 1024 * 10);
if (ret == -1)
{
cout << "get_nalu failed" << std::endl;
break;
}
if (ret == 1)
{
int startCode_len = h264.check_startCode_len(buf);
if (startCode_len == -1)
{
cout << "check_startCode_len failed" << std::endl;
break;
}
uint8_t nalu_type = buf[startCode_len] & 0x1f;
if (nalu_type != 0x01 && nalu_type != 0x05)
{
continue;
}
e.len = len;
rQueue_en(rQueue_v, &e);
}
else
{
e.len = len;
rQueue_en(rQueue_v, &e);
break;
}
usleep(1000 * 40);
}
cout << "readH264_thread exit" << std::endl;
readH264_thread_exit = true;
}
void sendAAC_thread(RtmpPushStream *rtmp)
{
uint8_t buf[1024 * 2] = {0};
rQueue_data e;
uint32_t adts = 0, count = 0;
uint32_t add = 21; // 1024 * 1000 / 48000 = 21.333 ms
e.buffer = buf;
while (1)
{
if (rQueue_isEmpty(rQueue_a))
{
usleep(1000 * 2);
if (readAAC_thread_exit)
{
break;
}
continue;
}
e.len = sizeof(buf);
rQueue_de(rQueue_a, &e);
rtmp->push_aac((uint8_t *)e.buffer, e.len, adts);
adts += add;
if (count == 2)
{
// 21.333,0.333毫秒的这个精度不能舍弃,所以每隔3帧dts间隔由21变为22
add = 22;
count = 0;
}
else
{
add = 21;
}
count++;
}
}
void sendH264_thread(RtmpPushStream *rtmp)
{
uint8_t buf[1024 * 1024] = {0};
rQueue_data e;
uint32_t vdts = 0;
e.buffer = buf;
while (1)
{
if (rQueue_isEmpty(rQueue_v))
{
usleep(1000 * 2);
if (readH264_thread_exit)
{
break;
}
continue;
}
e.len = sizeof(buf);
rQueue_de(rQueue_v, &e);
rtmp->push_h264((uint8_t *)e.buffer, e.len, vdts);
vdts += 40; // 1000 / 25 = 40
}
}
void main(void)
{
uint8_t sps[64] = {0}, pps[64] = {0};
uint32_t sps_len = 0, pps_len = 0;
RtmpPushStream rtmp(1024 * 1024);
rQueue_a = rQueue_init(10, 1024 * 2);
rQueue_v = rQueue_init(10, 1024 * 1024);
aac.open_aacFile("res/test.aac");
aac.get_configInfo(&aac_config.profile, &aac_config.sample_frequency_index,
&aac_config.channel_configuration);
h264.open_h264File("res/test.h264");
h264.get_nalu(sps, sizeof(sps), &sps_len, sizeof(sps) - 4);
h264.get_nalu(pps, sizeof(pps), &pps_len, sizeof(pps) - 4);
rtmp.connect_server("rtmp://127.0.0.1/live/test");
rtmp.send_aacSequenceHeader(&aac_config, 0);
rtmp.send_avcSequenceHeader(sps, sps_len, pps, pps_len, 0);
std::thread t1(readAAC_thread);
std::thread t2(readH264_thread);
std::thread t3(sendAAC_thread, &rtmp);
std::thread t4(sendH264_thread, &rtmp);
t1.join();
t2.join();
t3.join();
t4.join();
aac.close_aacFile();
h264.close_h264File();
rQueue_destroy(&rQueue_a);
rQueue_destroy(&rQueue_v);
}