一、系统架构设计
1.1 整体架构
1.2 模块职责
| 模块 | 职责 | 关键指标 |
|---|---|---|
| 采集 | 音频采集、格式转换 | 采样率、延迟 |
| 3A处理 | 回声消除、增益、降噪 | ERLE、SNR改善 |
| 编码 | 压缩、抗丢包 | 码率、质量 |
| 网络传输 | 发送、接收、反馈 | 丢包率、RTT |
| 抖动缓冲 | 吸收抖动、平滑播放 | 延迟、连续性 |
| 解码播放 | 解码、音量控制 | 质量、音量 |
1.3 数据流
发送方向:
麦克风 → 音频帧(10ms) → 3A处理 → 编码 → RTP包 → UDP发送
接收方向:
UDP接收 → RTP包 → 抖动缓冲 → 解码 → 音频帧 → 播放
二、核心模块实现
2.1 音频采集模块
class AudioCapture {
public:
struct Config {
int sample_rate = 48000;
int channels = 1;
int frame_duration_ms = 10;
};
AudioCapture(const Config& config);
~AudioCapture();
// 开始采集
bool Start();
// 停止采集
void Stop();
// 设置回调
void SetCallback(std::function<void(const AudioFrame&)> callback);
private:
Config config_;
std::unique_ptr<AudioDevice> device_;
std::thread capture_thread_;
std::function<void(const AudioFrame&)> callback_;
void CaptureLoop() {
while (running_) {
AudioFrame frame;
frame.sample_rate = config_.sample_rate;
frame.samples.resize(config_.sample_rate * config_.frame_duration_ms / 1000);
// 从设备读取
device_->Read(frame.samples.data(), frame.samples.size());
// 回调
if (callback_) {
callback_(frame);
}
}
}
};
2.2 音频处理模块
class AudioProcessing {
public:
struct Config {
bool aec_enabled = true;
bool agc_enabled = true;
bool ns_enabled = true;
bool vad_enabled = true;
int aec_mode = 2; // 激进模式
int ns_level = 2; // 中等抑制
int agc_target_level = 3; // -3dBFS
};
AudioProcessing(const Config& config);
~AudioProcessing();
// 处理发送音频
void ProcessCapture(AudioFrame* frame);
// 处理接收音频(远端参考)
void ProcessRender(const AudioFrame& frame);
// 获取统计信息
Stats GetStats() const;
private:
Config config_;
// WebRTC处理模块
std::unique_ptr<AecInst> aec_;
std::unique_ptr<AgcInst> agc_;
std::unique_ptr<NsHandle> ns_;
std::unique_ptr<VadInst> vad_;
Stats stats_;
void ProcessCapture(AudioFrame* frame) {
// 1. 回声消除
if (config_.aec_enabled) {
WebRtcAec_Process(aec_.get(), frame->samples.data(), nullptr,
frame->samples.data(), frame->samples.size(), 1, 0);
}
// 2. 自动增益
if (config_.agc_enabled) {
int mic_level = 128;
int mic_level_out;
WebRtcAgc_Process(agc_.get(), frame->samples.data(), nullptr,
frame->samples.size(), frame->samples.data(),
mic_level, &mic_level_out, 0);
}
// 3. 噪声抑制
if (config_.ns_enabled) {
WebRtcNs_Process(ns_.get(), frame->samples.data(), nullptr,
frame->samples.data());
}
// 4. 语音检测
if (config_.vad_enabled) {
int vad_result = WebRtcVad_Process(vad_.get(), frame->sample_rate,
frame->samples.data(),
frame->samples.size());
stats_.voice_detected = (vad_result == 1);
}
}
};
2.3 音频编码模块
class AudioEncoder {
public:
struct Config {
int sample_rate = 48000;
int channels = 1;
int bitrate = 32000;
int complexity = 5;
bool fec_enabled = true;
int packet_loss_perc = 10;
};
AudioEncoder(const Config& config);
~AudioEncoder();
// 编码
std::vector<uint8_t> Encode(const AudioFrame& frame);
// 设置码率
void SetBitrate(int bitrate);
private:
Config config_;
OpusEncoder* encoder_;
std::vector<uint8_t> Encode(const AudioFrame& frame) {
std::vector<uint8_t> output(4000); // 最大输出大小
int encoded_size = opus_encode(encoder_, frame.samples.data(),
frame.samples.size(), output.data(),
output.size());
output.resize(encoded_size);
return output;
}
};
2.4 网络传输模块
class NetworkTransport {
public:
struct Config {
std::string local_ip = "0.0.0.0";
int local_port = 0;
int target_port = 5000;
int rtp_port = 5002;
int rtcp_port = 5003;
};
NetworkTransport(const Config& config);
~NetworkTransport();
// 发送RTP
void SendRtp(const std::vector<uint8_t>& payload, uint32_t timestamp);
// 发送RTCP
void SendRtcp(const std::vector<uint8_t>& packet);
// 设置接收回调
void SetRtpCallback(std::function<void(const RtpPacket&)> callback);
void SetRtcpCallback(std::function<void(const RtcpPacket&)> callback);
private:
Config config_;
std::unique_ptr<UdpSocket> rtp_socket_;
std::unique_ptr<UdpSocket> rtcp_socket_;
uint16_t sequence_number_ = 0;
uint32_t ssrc_;
void SendRtp(const std::vector<uint8_t>& payload, uint32_t timestamp) {
RtpHeader header;
header.version = 2;
header.payload_type = 111; // Opus
header.sequence_number = sequence_number_++;
header.timestamp = timestamp;
header.ssrc = ssrc_;
// 序列化
auto packet = SerializeRtp(header, payload);
// 发送
rtp_socket_->SendTo(packet.data(), packet.size(), target_addr_);
}
};
2.5 抖动缓冲模块
class JitterBuffer {
public:
struct Config {
int min_delay_ms = 20;
int max_delay_ms = 200;
int target_delay_ms = 60;
};
JitterBuffer(const Config& config);
~JitterBuffer();
// 插入包
void InsertPacket(const RtpPacket& packet);
// 获取播放帧
std::optional<AudioFrame> GetFrame(int64_t play_time_ms);
// 获取统计信息
Stats GetStats() const;
private:
Config config_;
// NetEQ实例
std::unique_ptr<NetEq> neteq_;
// 统计
Stats stats_;
void InsertPacket(const RtpPacket& packet) {
WebRtcNetEQ_RecIn(neteq_.get(), packet.payload.data(),
packet.payload.size(), packet.sequence_number,
packet.timestamp, 0);
}
std::optional<AudioFrame> GetFrame(int64_t play_time_ms) {
AudioFrame frame;
frame.samples.resize(480); // 10ms @ 48kHz
int16_t output[480];
int num_samples;
int last_packet_type;
int result = WebRtcNetEQ_RecOut(neteq_.get(), output, &num_samples,
&last_packet_type);
if (result == 0) {
for (int i = 0; i < num_samples; i++) {
frame.samples[i] = output[i];
}
return frame;
}
return std::nullopt;
}
};
2.6 音频解码模块
class AudioDecoder {
public:
AudioDecoder(int sample_rate = 48000);
~AudioDecoder();
// 解码
std::optional<AudioFrame> Decode(const std::vector<uint8_t>& payload);
// PLC(丢包隐藏)
AudioFrame DecodeFec();
private:
OpusDecoder* decoder_;
std::optional<AudioFrame> Decode(const std::vector<uint8_t>& payload) {
AudioFrame frame;
frame.samples.resize(5760); // 最大帧大小
int decoded_samples = opus_decode(decoder_, payload.data(),
payload.size(), frame.samples.data(),
frame.samples.size(), 0);
if (decoded_samples > 0) {
frame.samples.resize(decoded_samples);
return frame;
}
return std::nullopt;
}
};
三、系统集成
3.1 音频引擎
class AudioEngine {
public:
struct Config {
AudioCapture::Config capture;
AudioProcessing::Config processing;
AudioEncoder::Config encoder;
NetworkTransport::Config transport;
JitterBuffer::Config jitter;
};
AudioEngine(const Config& config);
~AudioEngine();
// 启动
bool Start();
// 停止
void Stop();
// 设置远端地址
void SetRemote(const std::string& ip, int port);
// 获取统计信息
Stats GetStats() const;
private:
Config config_;
// 模块
std::unique_ptr<AudioCapture> capture_;
std::unique_ptr<AudioProcessing> processing_;
std::unique_ptr<AudioEncoder> encoder_;
std::unique_ptr<NetworkTransport> transport_;
std::unique_ptr<JitterBuffer> jitter_;
std::unique_ptr<AudioDecoder> decoder_;
// 播放
std::unique_ptr<AudioPlayer> player_;
// 统计
Stats stats_;
// 发送方向回调
void OnCaptureFrame(const AudioFrame& frame) {
// 处理
AudioFrame processed = frame;
processing_->ProcessCapture(&processed);
// 编码
auto encoded = encoder_->Encode(processed);
// 发送
uint32_t timestamp = timestamp_ + processed.samples.size();
transport_->SendRtp(encoded, timestamp);
timestamp_ = timestamp;
// 更新统计
stats_.sent_packets++;
stats_.sent_bytes += encoded.size();
}
// 接收方向回调
void OnRtpReceived(const RtpPacket& packet) {
// 插入抖动缓冲
jitter_->InsertPacket(packet);
// 更新统计
stats_.received_packets++;
stats_.received_bytes += packet.payload.size();
}
// 播放循环
void PlayLoop() {
while (running_) {
int64_t play_time = GetTimeMs() + config_.jitter.target_delay_ms;
auto frame = jitter_->GetFrame(play_time);
if (frame) {
player_->Play(*frame);
}
Sleep(10); // 10ms帧间隔
}
}
};
3.2 配置管理
// 默认配置
AudioEngine::Config DefaultConfig() {
return {
.capture = {
.sample_rate = 48000,
.channels = 1,
.frame_duration_ms = 10
},
.processing = {
.aec_enabled = true,
.agc_enabled = true,
.ns_enabled = true,
.vad_enabled = true,
.aec_mode = 2,
.ns_level = 2,
.agc_target_level = 3
},
.encoder = {
.sample_rate = 48000,
.channels = 1,
.bitrate = 32000,
.complexity = 5,
.fec_enabled = true,
.packet_loss_perc = 10
},
.jitter = {
.min_delay_ms = 20,
.max_delay_ms = 200,
.target_delay_ms = 60
}
};
}
// 场景配置
AudioEngine::Config VoiceChatConfig() {
auto config = DefaultConfig();
config.processing.aec_mode = 2; // 激进AEC
config.processing.ns_level = 2; // 中等降噪
config.encoder.bitrate = 32000; // 32kbps
return config;
}
AudioEngine::Config ConferenceConfig() {
auto config = DefaultConfig();
config.processing.aec_mode = 1; // 适中AEC
config.processing.ns_level = 1; // 轻度降噪
config.encoder.bitrate = 24000; // 24kbps(多人场景)
return config;
}
四、质量监控
4.1 统计信息收集
struct AudioEngineStats {
// 发送统计
int64_t sent_packets;
int64_t sent_bytes;
int send_bitrate;
// 接收统计
int64_t received_packets;
int64_t received_bytes;
int receive_bitrate;
int64_t lost_packets;
float loss_rate;
// 网络统计
int64_t rtt_ms;
float jitter_ms;
// 音频统计
float audio_level;
float noise_level;
bool voice_detected;
// AEC统计
float erl;
float erle;
// 抖动缓冲统计
int jitter_buffer_delay_ms;
int jitter_buffer_size;
int underrun_count;
int overrun_count;
};
4.2 质量评估
enum class QualityLevel {
Excellent,
Good,
Fair,
Poor,
Bad
};
QualityLevel EvaluateQuality(const AudioEngineStats& stats) {
// 综合评分
int score = 100;
// 丢包影响
if (stats.loss_rate > 0.1) score -= 30;
else if (stats.loss_rate > 0.05) score -= 20;
else if (stats.loss_rate > 0.02) score -= 10;
// RTT影响
if (stats.rtt_ms > 300) score -= 20;
else if (stats.rtt_ms > 200) score -= 10;
// 抖动影响
if (stats.jitter_ms > 50) score -= 15;
else if (stats.jitter_ms > 30) score -= 5;
// AEC效果
if (stats.erle < 10) score -= 15;
else if (stats.erle < 20) score -= 5;
// 映射到质量等级
if (score >= 90) return QualityLevel::Excellent;
if (score >= 70) return QualityLevel::Good;
if (score >= 50) return QualityLevel::Fair;
if (score >= 30) return QualityLevel::Poor;
return QualityLevel::Bad;
}
4.3 自适应调整
void AdaptToQuality(AudioEngine* engine, QualityLevel quality) {
switch (quality) {
case QualityLevel::Excellent:
// 网络好,可以降低延迟
engine->SetJitterDelay(40);
engine->SetBitrate(48000);
break;
case QualityLevel::Good:
engine->SetJitterDelay(60);
engine->SetBitrate(32000);
break;
case QualityLevel::Fair:
engine->SetJitterDelay(100);
engine->SetBitrate(24000);
break;
case QualityLevel::Poor:
// 网络差,增加延迟和FEC
engine->SetJitterDelay(150);
engine->SetBitrate(16000);
engine->SetFecEnabled(true);
break;
case QualityLevel::Bad:
engine->SetJitterDelay(200);
engine->SetBitrate(12000);
engine->SetFecEnabled(true);
break;
}
}
五、测试与验证
5.1 单元测试
// 测试AEC
TEST(AecTest, EchoCancellation) {
AudioProcessing::Config config;
config.aec_enabled = true;
AudioProcessing ap(config);
// 生成测试信号
auto far_end = GenerateSineWave(1000, 48000, 480); // 1kHz
auto near_end = GenerateEcho(far_end, 0.5); // 添加回声
// 处理
ap.ProcessRender(far_end);
ap.ProcessCapture(&near_end);
// 验证回声被抑制
float echo_level = ComputeLevel(near_end);
EXPECT_LT(echo_level, 0.1); // 回声应被显著抑制
}
// 测试编码
TEST(EncoderTest, OpusEncode) {
AudioEncoder::Config config;
config.bitrate = 32000;
AudioEncoder encoder(config);
auto frame = GenerateTestFrame(480);
auto encoded = encoder.Encode(frame);
EXPECT_GT(encoded.size(), 0);
EXPECT_LT(encoded.size(), 4000);
}
5.2 集成测试
// 端到端测试
TEST(AudioEngineTest, EndToEnd) {
// 创建两个引擎
AudioEngine::Config config = DefaultConfig();
AudioEngine engine1(config);
AudioEngine engine2(config);
// 启动
engine1.Start();
engine2.Start();
// 设置远端
engine1.SetRemote("127.0.0.1", 5002);
engine2.SetRemote("127.0.0.1", 5000);
// 运行一段时间
Sleep(5000); // 5秒
// 检查统计
auto stats1 = engine1.GetStats();
auto stats2 = engine2.GetStats();
EXPECT_GT(stats1.sent_packets, 0);
EXPECT_GT(stats2.received_packets, 0);
EXPECT_LT(stats2.loss_rate, 0.01); // 丢包率应很低
}
5.3 性能测试
// CPU占用测试
TEST(PerformanceTest, CpuUsage) {
AudioEngine engine(DefaultConfig());
engine.Start();
// 运行并监控CPU
CpuMonitor monitor;
monitor.Start();
Sleep(10000); // 10秒
auto cpu_usage = monitor.Stop();
EXPECT_LT(cpu_usage, 0.1); // CPU占用应小于10%
}
// 延迟测试
TEST(PerformanceTest, Latency) {
AudioEngine engine(DefaultConfig());
engine.Start();
// 测量端到端延迟
auto start = GetTimeMs();
// ...发送并接收
auto end = GetTimeMs();
int64_t latency = end - start;
EXPECT_LT(latency, 100); // 延迟应小于100ms
}
六、本章小结
本章我们从零搭建了完整的RTC语音质量优化系统:
- 架构设计:模块划分、数据流设计
- 核心模块:采集、处理、编码、传输、缓冲、解码
- 系统集成:音频引擎、配置管理
- 质量监控:统计收集、质量评估、自适应调整
- 测试验证:单元测试、集成测试、性能测试
下一章,我们将探讨移动端优化,解决移动设备上的特殊挑战。