/* rtp_audio_frame.cpp * * Wireshark - Network traffic analyzer * By Gerald Combs * Copyright 1998 Gerald Combs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include "rtp_audio_stream.h" #ifdef QT_MULTIMEDIA_LIB #ifdef HAVE_SPEEXDSP #include #else #include #endif /* HAVE_SPEEXDSP */ #include #include #include #include #include #include #include #include #include #include // To do: // - Only allow one rtp_stream_info_t per RtpAudioStream? static spx_int16_t default_audio_sample_rate_ = 8000; static const spx_int16_t visual_sample_rate_ = 1000; RtpAudioStream::RtpAudioStream(QObject *parent, _rtp_stream_info *rtp_stream) : QObject(parent), decoders_hash_(rtp_decoder_hash_table_new()), global_start_rel_time_(0.0), start_abs_offset_(0.0), start_rel_time_(0.0), stop_rel_time_(0.0), audio_out_rate_(0), audio_resampler_(0), audio_output_(0), max_sample_val_(1), color_(0), jitter_buffer_size_(50), timing_mode_(RtpAudioStream::JitterBuffer) { copy_address(&src_addr_, &rtp_stream->src_addr); src_port_ = rtp_stream->src_port; copy_address(&dst_addr_, &rtp_stream->dest_addr); dst_port_ = rtp_stream->dest_port; ssrc_ = rtp_stream->ssrc; // We keep visual samples in memory. Make fewer of them. visual_resampler_ = speex_resampler_init(1, default_audio_sample_rate_, visual_sample_rate_, SPEEX_RESAMPLER_QUALITY_MIN, NULL); speex_resampler_skip_zeros(visual_resampler_); QString tempname = QString("%1/wireshark_rtp_stream").arg(QDir::tempPath()); tempfile_ = new QTemporaryFile(tempname, this); tempfile_->open(); // RTP_STREAM_DEBUG("Writing to %s", tempname.toUtf8().constData()); } RtpAudioStream::~RtpAudioStream() { for (int i = 0; i < rtp_packets_.size(); i++) { rtp_packet_t *rtp_packet = rtp_packets_[i]; g_free(rtp_packet->info); g_free(rtp_packet->payload_data); g_free(rtp_packet); } g_hash_table_destroy(decoders_hash_); if (audio_resampler_) speex_resampler_destroy (audio_resampler_); speex_resampler_destroy (visual_resampler_); } bool RtpAudioStream::isMatch(const _rtp_stream_info *rtp_stream) const { if (rtp_stream && addresses_equal(&rtp_stream->src_addr, &src_addr_) && rtp_stream->src_port == src_port_ && addresses_equal(&rtp_stream->dest_addr, &dst_addr_) && rtp_stream->dest_port == dst_port_ && rtp_stream->ssrc == ssrc_) return true; return false; } bool RtpAudioStream::isMatch(const _packet_info *pinfo, const _rtp_info *rtp_info) const { if (pinfo && rtp_info && addresses_equal(&pinfo->src, &src_addr_) && pinfo->srcport == src_port_ && addresses_equal(&pinfo->dst, &dst_addr_) && pinfo->destport == dst_port_ && rtp_info->info_sync_src == ssrc_) return true; return false; } // XXX We add multiple RTP streams here because that's what the GTK+ UI does. // Should we make these distinct, with their own waveforms? It seems like // that would simplify a lot of things. void RtpAudioStream::addRtpStream(const _rtp_stream_info *rtp_stream) { if (!rtp_stream) return; // RTP_STREAM_DEBUG("added %d:%u packets", g_list_length(rtp_stream->rtp_packet_list), rtp_stream->packet_count); rtp_streams_ << rtp_stream; } void RtpAudioStream::addRtpPacket(const struct _packet_info *pinfo, const struct _rtp_info *rtp_info) { // gtk/rtp_player.c:decode_rtp_packet if (!rtp_info) return; rtp_packet_t *rtp_packet = g_new0(rtp_packet_t, 1); rtp_packet->info = (struct _rtp_info *) g_memdup(rtp_info, sizeof(struct _rtp_info)); if (rtp_info->info_all_data_present && (rtp_info->info_payload_len != 0)) { rtp_packet->payload_data = (guint8 *) g_memdup(&(rtp_info->info_data[rtp_info->info_payload_offset]), rtp_info->info_payload_len); } if (rtp_packets_.size() < 1) { // First packet start_abs_offset_ = nstime_to_sec(&pinfo->abs_ts) - start_rel_time_; start_rel_time_ = stop_rel_time_ = nstime_to_sec(&pinfo->rel_ts); } rtp_packet->frame_num = pinfo->num; rtp_packet->arrive_offset = nstime_to_sec(&pinfo->rel_ts) - start_rel_time_; rtp_packets_ << rtp_packet; } void RtpAudioStream::reset(double start_rel_time) { global_start_rel_time_ = start_rel_time; stop_rel_time_ = start_rel_time_; audio_out_rate_ = 0; max_sample_val_ = 1; packet_timestamps_.clear(); visual_samples_.clear(); out_of_seq_timestamps_.clear(); jitter_drop_timestamps_.clear(); if (audio_resampler_) { speex_resampler_reset_mem(audio_resampler_); } if (visual_resampler_) { speex_resampler_reset_mem(visual_resampler_); } tempfile_->seek(0); } static const int sample_bytes_ = sizeof(SAMPLE) / sizeof(char); /* Fix for bug 4119/5902: don't insert too many silence frames. * XXX - is there a better thing to do here? */ static const int max_silence_samples_ = MAX_SILENCE_FRAMES; void RtpAudioStream::decode() { if (rtp_packets_.size() < 1) return; // gtk/rtp_player.c:decode_rtp_stream // XXX This is more messy than it should be. gsize resample_buff_len = 0x1000; SAMPLE *resample_buff = (SAMPLE *) g_malloc(resample_buff_len); spx_uint32_t cur_in_rate = 0, visual_out_rate = 0; char *write_buff = NULL; qint64 write_bytes = 0; unsigned channels = 0; unsigned sample_rate = 0; int last_sequence = 0; double rtp_time_prev = 0.0; double arrive_time_prev = 0.0; double pack_period = 0.0; double start_time = 0.0; double start_rtp_time = 0.0; guint32 start_timestamp = 0; size_t decoded_bytes_prev = 0; for (int cur_packet = 0; cur_packet < rtp_packets_.size(); cur_packet++) { SAMPLE *decode_buff = NULL; // XXX The GTK+ UI updates a progress bar here. rtp_packet_t *rtp_packet = rtp_packets_[cur_packet]; stop_rel_time_ = start_rel_time_ + rtp_packet->arrive_offset; speex_resampler_get_rate(visual_resampler_, &cur_in_rate, &visual_out_rate); QString payload_name; if (rtp_packet->info->info_payload_type_str) { payload_name = rtp_packet->info->info_payload_type_str; } else { payload_name = try_val_to_str_ext(rtp_packet->info->info_payload_type, &rtp_payload_type_short_vals_ext); } if (!payload_name.isEmpty()) { payload_names_ << payload_name; } if (cur_packet < 1) { // First packet start_timestamp = rtp_packet->info->info_timestamp; start_rtp_time = 0; rtp_time_prev = 0; last_sequence = rtp_packet->info->info_seq_num - 1; } size_t decoded_bytes = decode_rtp_packet(rtp_packet, &decode_buff, decoders_hash_, &channels, &sample_rate); unsigned rtp_clock_rate = sample_rate; if (rtp_packet->info->info_payload_type == PT_G722) { // G.722 sample rate is 16kHz, but RTP clock rate is 8kHz for historic reasons. rtp_clock_rate = 8000; } if (decoded_bytes == 0 || sample_rate == 0) { // We didn't decode anything. Clean up and prep for the next packet. last_sequence = rtp_packet->info->info_seq_num; g_free(decode_buff); continue; } if (audio_out_rate_ == 0) { // Use the first non-zero rate we find. Ajust it to match our audio hardware. QAudioDeviceInfo cur_out_device = QAudioDeviceInfo::defaultOutputDevice(); QString cur_out_name = parent()->property("currentOutputDeviceName").toString(); foreach (QAudioDeviceInfo out_device, QAudioDeviceInfo::availableDevices(QAudio::AudioOutput)) { if (cur_out_name == out_device.deviceName()) { cur_out_device = out_device; } } QAudioFormat format; format.setSampleRate(sample_rate); format.setSampleSize(sample_bytes_ * 8); // bits format.setSampleType(QAudioFormat::SignedInt); format.setChannelCount(1); format.setCodec("audio/pcm"); if (!cur_out_device.isFormatSupported(format)) { sample_rate = cur_out_device.nearestFormat(format).sampleRate(); } audio_out_rate_ = sample_rate; RTP_STREAM_DEBUG("Audio sample rate is %u", audio_out_rate_); // Prepend silence to match our sibling streams. tempfile_->seek(0); int prepend_samples = (start_rel_time_ - global_start_rel_time_) * audio_out_rate_; if (prepend_samples > 0) { writeSilence(prepend_samples); } } if (rtp_packet->info->info_seq_num != last_sequence+1) { out_of_seq_timestamps_.append(stop_rel_time_); } last_sequence = rtp_packet->info->info_seq_num; double rtp_time = (double)(rtp_packet->info->info_timestamp-start_timestamp)/rtp_clock_rate - start_rtp_time; double arrive_time; if (timing_mode_ == RtpTimestamp) { arrive_time = rtp_time; } else { arrive_time = rtp_packet->arrive_offset - start_time; } double diff = qAbs(arrive_time - rtp_time); if (diff*1000 > jitter_buffer_size_ && timing_mode_ != Uninterrupted) { // rtp_player.c:628 jitter_drop_timestamps_.append(stop_rel_time_); RTP_STREAM_DEBUG("Packet drop by jitter buffer exceeded %f > %d", diff*1000, jitter_buffer_size_); /* if there was a silence period (more than two packetization period) resync the source */ if ((rtp_time - rtp_time_prev) > pack_period*2) { int silence_samples; RTP_STREAM_DEBUG("Resync..."); silence_samples = (int)((arrive_time - arrive_time_prev)*sample_rate - decoded_bytes_prev / sample_bytes_); /* Fix for bug 4119/5902: don't insert too many silence frames. * XXX - is there a better thing to do here? */ silence_samples = qMin(silence_samples, max_silence_samples_); writeSilence(silence_samples); silence_timestamps_.append(stop_rel_time_); decoded_bytes_prev = 0; /* defined start_timestmp to avoid overflow in timestamp. TODO: handle the timestamp correctly */ /* XXX: if timestamps (RTP) are missing/ignored try use packet arrive time only (see also "rtp_time") */ start_timestamp = rtp_packet->info->info_timestamp; start_rtp_time = 0; start_time = rtp_packet->arrive_offset; rtp_time_prev = 0; } } else { // rtp_player.c:664 /* Add silence if it is necessary */ int silence_samples; if (timing_mode_ == Uninterrupted) { silence_samples = 0; } else { silence_samples = (int)((rtp_time - rtp_time_prev)*sample_rate - decoded_bytes_prev / sample_bytes_); } if (silence_samples != 0) { wrong_timestamp_timestamps_.append(stop_rel_time_); } if (silence_samples > 0) { /* Fix for bug 4119/5902: don't insert too many silence frames. * XXX - is there a better thing to do here? */ silence_samples = qMin(silence_samples, max_silence_samples_); writeSilence(silence_samples); silence_timestamps_.append(stop_rel_time_); } // XXX rtp_player.c:696 adds audio here. rtp_time_prev = rtp_time; pack_period = (double) decoded_bytes / sample_bytes_ / sample_rate; decoded_bytes_prev = decoded_bytes; arrive_time_prev = arrive_time; } // Write samples to our file. write_buff = (char *) decode_buff; write_bytes = decoded_bytes; if (audio_out_rate_ != sample_rate) { // Resample the audio to match our previous output rate. if (!audio_resampler_) { audio_resampler_ = speex_resampler_init(1, sample_rate, audio_out_rate_, 10, NULL); speex_resampler_skip_zeros(audio_resampler_); RTP_STREAM_DEBUG("Started resampling from %u to (out) %u Hz.", sample_rate, audio_out_rate_); } else { spx_uint32_t audio_out_rate; speex_resampler_get_rate(audio_resampler_, &cur_in_rate, &audio_out_rate); // Adjust rates if needed. if (sample_rate != cur_in_rate) { speex_resampler_set_rate(audio_resampler_, sample_rate, audio_out_rate); speex_resampler_set_rate(visual_resampler_, sample_rate, visual_out_rate); RTP_STREAM_DEBUG("Changed input rate from %u to %u Hz. Out is %u.", cur_in_rate, sample_rate, audio_out_rate_); } } spx_uint32_t in_len = (spx_uint32_t)rtp_packet->info->info_payload_len; spx_uint32_t out_len = (audio_out_rate_ * (spx_uint32_t)rtp_packet->info->info_payload_len / sample_rate) + (audio_out_rate_ % sample_rate != 0); if (out_len * sample_bytes_ > resample_buff_len) { while ((out_len * sample_bytes_ > resample_buff_len)) resample_buff_len *= 2; resample_buff = (SAMPLE *) g_realloc(resample_buff, resample_buff_len); } speex_resampler_process_int(audio_resampler_, 0, decode_buff, &in_len, resample_buff, &out_len); write_buff = (char *) resample_buff; write_bytes = out_len * sample_bytes_; } // Write the decoded, possibly-resampled audio to our temp file. tempfile_->write(write_buff, write_bytes); // Collect our visual samples. spx_uint32_t in_len = (spx_uint32_t)rtp_packet->info->info_payload_len; spx_uint32_t out_len = (visual_out_rate * in_len / sample_rate) + (visual_out_rate % sample_rate != 0); if (out_len * sample_bytes_ > resample_buff_len) { while ((out_len * sample_bytes_ > resample_buff_len)) resample_buff_len *= 2; resample_buff = (SAMPLE *) g_realloc(resample_buff, resample_buff_len); } speex_resampler_process_int(visual_resampler_, 0, decode_buff, &in_len, resample_buff, &out_len); for (unsigned i = 0; i < out_len; i++) { packet_timestamps_[stop_rel_time_ + (double) i / visual_out_rate] = rtp_packet->frame_num; if (qAbs(resample_buff[i]) > max_sample_val_) max_sample_val_ = qAbs(resample_buff[i]); visual_samples_.append(resample_buff[i]); } // Finally, write the resampled audio to our temp file and clean up. g_free(decode_buff); } g_free(resample_buff); } const QStringList RtpAudioStream::payloadNames() const { QStringList payload_names = payload_names_.toList(); payload_names.sort(); return payload_names; } const QVector RtpAudioStream::visualTimestamps(bool relative) { QVector ts_keys = packet_timestamps_.keys().toVector(); if (relative) return ts_keys; QVector adj_timestamps; for (int i = 0; i < ts_keys.size(); i++) { adj_timestamps.append(ts_keys[i] + start_abs_offset_); } return adj_timestamps; } // Scale the height of the waveform (max_sample_val_) and adjust its Y // offset so that they overlap slightly (stack_offset_). // XXX This means that waveforms can be misleading with respect to relative // amplitude. We might want to add a "global" max_sample_val_. static const double stack_offset_ = G_MAXINT16 / 3; const QVector RtpAudioStream::visualSamples(int y_offset) { QVector adj_samples; double scaled_offset = y_offset * stack_offset_; for (int i = 0; i < visual_samples_.size(); i++) { adj_samples.append(((double)visual_samples_[i] * G_MAXINT16 / max_sample_val_) + scaled_offset); } return adj_samples; } const QVector RtpAudioStream::outOfSequenceTimestamps(bool relative) { if (relative) return out_of_seq_timestamps_; QVector adj_timestamps; for (int i = 0; i < out_of_seq_timestamps_.size(); i++) { adj_timestamps.append(out_of_seq_timestamps_[i] + start_abs_offset_); } return adj_timestamps; } const QVector RtpAudioStream::outOfSequenceSamples(int y_offset) { QVector adj_samples; double scaled_offset = y_offset * stack_offset_; // XXX Should be different for seq, jitter, wrong & silence for (int i = 0; i < out_of_seq_timestamps_.size(); i++) { adj_samples.append(scaled_offset); } return adj_samples; } const QVector RtpAudioStream::jitterDroppedTimestamps(bool relative) { if (relative) return jitter_drop_timestamps_; QVector adj_timestamps; for (int i = 0; i < jitter_drop_timestamps_.size(); i++) { adj_timestamps.append(jitter_drop_timestamps_[i] + start_abs_offset_); } return adj_timestamps; } const QVector RtpAudioStream::jitterDroppedSamples(int y_offset) { QVector adj_samples; double scaled_offset = y_offset * stack_offset_; // XXX Should be different for seq, jitter, wrong & silence for (int i = 0; i < jitter_drop_timestamps_.size(); i++) { adj_samples.append(scaled_offset); } return adj_samples; } const QVector RtpAudioStream::wrongTimestampTimestamps(bool relative) { if (relative) return wrong_timestamp_timestamps_; QVector adj_timestamps; for (int i = 0; i < wrong_timestamp_timestamps_.size(); i++) { adj_timestamps.append(wrong_timestamp_timestamps_[i] + start_abs_offset_); } return adj_timestamps; } const QVector RtpAudioStream::wrongTimestampSamples(int y_offset) { QVector adj_samples; double scaled_offset = y_offset * stack_offset_; // XXX Should be different for seq, jitter, wrong & silence for (int i = 0; i < wrong_timestamp_timestamps_.size(); i++) { adj_samples.append(scaled_offset); } return adj_samples; } const QVector RtpAudioStream::insertedSilenceTimestamps(bool relative) { if (relative) return silence_timestamps_; QVector adj_timestamps; for (int i = 0; i < silence_timestamps_.size(); i++) { adj_timestamps.append(silence_timestamps_[i] + start_abs_offset_); } return adj_timestamps; } const QVector RtpAudioStream::insertedSilenceSamples(int y_offset) { QVector adj_samples; double scaled_offset = y_offset * stack_offset_; // XXX Should be different for seq, jitter, wrong & silence for (int i = 0; i < silence_timestamps_.size(); i++) { adj_samples.append(scaled_offset); } return adj_samples; } quint32 RtpAudioStream::nearestPacket(double timestamp, bool is_relative) { if (packet_timestamps_.keys().count() < 1) return 0; if (!is_relative) timestamp -= start_abs_offset_; QMap::const_iterator it = packet_timestamps_.lowerBound(timestamp); if (it == packet_timestamps_.end()) return 0; return it.value(); } QAudio::State RtpAudioStream::outputState() const { if (!audio_output_) return QAudio::IdleState; return audio_output_->state(); } const QString RtpAudioStream::formatDescription(const QAudioFormat &format) { QString fmt_descr = QString("%1 Hz, ").arg(format.sampleRate()); switch (format.sampleType()) { case QAudioFormat::SignedInt: fmt_descr += "Int"; break; case QAudioFormat::UnSignedInt: fmt_descr += "UInt"; break; case QAudioFormat::Float: fmt_descr += "Float"; break; default: fmt_descr += "Unknown"; break; } fmt_descr += QString::number(format.sampleSize()); fmt_descr += format.byteOrder() == QAudioFormat::BigEndian ? "BE" : "LE"; return fmt_descr; } void RtpAudioStream::startPlaying() { if (audio_output_) return; if (audio_out_rate_ == 0) { emit playbackError(tr("RTP stream is empty or codec is unsupported.")); return; } QAudioDeviceInfo cur_out_device = QAudioDeviceInfo::defaultOutputDevice(); QString cur_out_name = parent()->property("currentOutputDeviceName").toString(); foreach (QAudioDeviceInfo out_device, QAudioDeviceInfo::availableDevices(QAudio::AudioOutput)) { if (cur_out_name == out_device.deviceName()) { cur_out_device = out_device; } } QAudioFormat format; format.setSampleRate(audio_out_rate_); format.setSampleSize(sample_bytes_ * 8); // bits format.setSampleType(QAudioFormat::SignedInt); format.setChannelCount(1); format.setCodec("audio/pcm"); // RTP_STREAM_DEBUG("playing %s %d samples @ %u Hz", // tempfile_->fileName().toUtf8().constData(), // (int) tempfile_->size(), audio_out_rate_); if (!cur_out_device.isFormatSupported(format)) { QString playback_error = tr("%1 does not support PCM at %2. Preferred format is %3") .arg(cur_out_device.deviceName()) .arg(formatDescription(format)) .arg(formatDescription(cur_out_device.nearestFormat(format))); emit playbackError(playback_error); } audio_output_ = new QAudioOutput(cur_out_device, format, this); audio_output_->setNotifyInterval(65); // ~15 fps connect(audio_output_, SIGNAL(stateChanged(QAudio::State)), this, SLOT(outputStateChanged(QAudio::State))); connect(audio_output_, SIGNAL(notify()), this, SLOT(outputNotify())); tempfile_->seek(0); audio_output_->start(tempfile_); emit startedPlaying(); // QTBUG-6548 StoppedState is not always emitted on error, force a cleanup // in case playback fails immediately. if (audio_output_ && audio_output_->state() == QAudio::StoppedState) { outputStateChanged(QAudio::StoppedState); } } void RtpAudioStream::stopPlaying() { if (audio_output_) { audio_output_->stop(); } } void RtpAudioStream::writeSilence(int samples) { if (samples < 1 || audio_out_rate_ == 0) return; unsigned silence_bytes = samples * sample_bytes_; char *silence_buff = (char *) g_malloc0(silence_bytes); RTP_STREAM_DEBUG("Writing %u silence samples", samples); tempfile_->write(silence_buff, silence_bytes); g_free(silence_buff); QVector visual_fill(samples * visual_sample_rate_ / audio_out_rate_, 0); visual_samples_ += visual_fill; } void RtpAudioStream::outputStateChanged(QAudio::State new_state) { if (!audio_output_) return; // On some platforms including macOS and Windows, the stateChanged signal // is emitted while a QMutexLocker is active. As a result we shouldn't // delete audio_output_ here. switch (new_state) { case QAudio::StoppedState: // RTP_STREAM_DEBUG("stopped %f", audio_output_->processedUSecs() / 100000.0); // Detach from parent (RtpAudioStream) to prevent deleteLater from being // run during destruction of this class. audio_output_->setParent(0); audio_output_->disconnect(); audio_output_->deleteLater(); audio_output_ = NULL; emit finishedPlaying(); break; case QAudio::IdleState: audio_output_->stop(); break; default: break; } } void RtpAudioStream::outputNotify() { emit processedSecs(audio_output_->processedUSecs() / 1000000.0); } #endif // QT_MULTIMEDIA_LIB /* * Editor modelines * * Local Variables: * c-basic-offset: 4 * tab-width: 8 * indent-tabs-mode: nil * End: * * ex: set shiftwidth=4 tabstop=8 expandtab: * :indentSize=4:tabSize=8:noTabs=true: */