fix(encoder): fixed incorrect stream mapping for multi-stream files

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-11-06 00:00:00 +00:00
parent 3b7921a774
commit 33b7c53e16
No known key found for this signature in database
6 changed files with 82 additions and 58 deletions

View File

@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Stream mapping for cases where the video stream is not the first stream in the input file (#1217).
## [6.1.0] - 2024-11-04 ## [6.1.0] - 2024-11-04
### Added ### Added
@ -17,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed ### Fixed
- Wide character string paths support on Windows systems without UTF-8 suppoprt enabled. - Wide character string paths support on Windows systems without UTF-8 suppoprt enabled (#1201).
### Changed ### Changed

View File

@ -14,7 +14,7 @@ int init_decoder(
std::filesystem::path in_fpath, std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx, AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx, AVCodecContext **dec_ctx,
int *vstream_idx int *in_vstream_idx
); );
#endif // DECODER_H #endif // DECODER_H

View File

@ -19,7 +19,8 @@ int init_encoder(
AVCodecContext **enc_ctx, AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx, AVCodecContext *dec_ctx,
EncoderConfig *encoder_config, EncoderConfig *encoder_config,
int vstream_idx, int in_vstream_idx,
int *out_vstream_idx,
int **stream_map int **stream_map
); );
@ -27,9 +28,9 @@ int write_frame(
AVFrame *frame, AVFrame *frame,
AVCodecContext *enc_ctx, AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx, AVFormatContext *ofmt_ctx,
int vstream_idx int out_vstream_idx
); );
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx); int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx);
#endif // ENCODER_H #endif // ENCODER_H

View File

@ -25,7 +25,7 @@ int init_decoder(
std::filesystem::path in_fpath, std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx, AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx, AVCodecContext **dec_ctx,
int *vstream_idx int *in_vstream_idx
) { ) {
AVFormatContext *ifmt_ctx = NULL; AVFormatContext *ifmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL; AVCodecContext *codec_ctx = NULL;
@ -110,7 +110,7 @@ int init_decoder(
*fmt_ctx = ifmt_ctx; *fmt_ctx = ifmt_ctx;
*dec_ctx = codec_ctx; *dec_ctx = codec_ctx;
*vstream_idx = stream_index; *in_vstream_idx = stream_index;
return 0; return 0;
} }

View File

@ -25,12 +25,12 @@ int init_encoder(
AVCodecContext **enc_ctx, AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx, AVCodecContext *dec_ctx,
EncoderConfig *encoder_config, EncoderConfig *encoder_config,
int vstream_idx, int in_vstream_idx,
int *out_vstream_idx,
int **stream_map int **stream_map
) { ) {
AVFormatContext *fmt_ctx = NULL; AVFormatContext *fmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL; AVCodecContext *codec_ctx = NULL;
int stream_index = 0;
int ret; int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str()); avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
@ -49,11 +49,12 @@ int init_encoder(
} }
// Create a new video stream in the output file // Create a new video stream in the output file
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL);
if (!out_stream) { if (!out_vstream) {
spdlog::error("Failed to allocate the output video stream"); spdlog::error("Failed to allocate the output video stream");
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
} }
*out_vstream_idx = out_vstream->index;
codec_ctx = avcodec_alloc_context3(encoder); codec_ctx = avcodec_alloc_context3(encoder);
if (!codec_ctx) { if (!codec_ctx) {
@ -89,14 +90,14 @@ int init_encoder(
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) { if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
codec_ctx->time_base = dec_ctx->time_base; codec_ctx->time_base = dec_ctx->time_base;
} else { } else {
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL)); codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL));
} }
// Set the output video's frame rate // Set the output video's frame rate
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) { if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
codec_ctx->framerate = dec_ctx->framerate; codec_ctx->framerate = dec_ctx->framerate;
} else { } else {
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_stream, NULL); codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL);
} }
// Set the CRF and preset for any codecs that support it // Set the CRF and preset for any codecs that support it
@ -113,15 +114,15 @@ int init_encoder(
return ret; return ret;
} }
ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx); ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx);
if (ret < 0) { if (ret < 0) {
spdlog::error("Failed to copy encoder parameters to output video stream"); spdlog::error("Failed to copy encoder parameters to output video stream");
return ret; return ret;
} }
out_stream->time_base = codec_ctx->time_base; out_vstream->time_base = codec_ctx->time_base;
out_stream->avg_frame_rate = codec_ctx->framerate; out_vstream->avg_frame_rate = codec_ctx->framerate;
out_stream->r_frame_rate = codec_ctx->framerate; out_vstream->r_frame_rate = codec_ctx->framerate;
if (encoder_config->copy_streams) { if (encoder_config->copy_streams) {
// Allocate the stream map // Allocate the stream map
@ -132,43 +133,46 @@ int init_encoder(
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
// Map the video stream // Map each input stream to an output stream
(*stream_map)[vstream_idx] = stream_index++;
// Loop through each stream in the input file
for (int i = 0; i < static_cast<int>(ifmt_ctx->nb_streams); i++) { for (int i = 0; i < static_cast<int>(ifmt_ctx->nb_streams); i++) {
AVStream *in_stream = ifmt_ctx->streams[i]; AVStream *in_stream = ifmt_ctx->streams[i];
AVCodecParameters *in_codecpar = in_stream->codecpar; AVCodecParameters *in_codecpar = in_stream->codecpar;
if (i == vstream_idx) { // Skip the input video stream as it's already processed
// Video stream is already handled if (i == in_vstream_idx) {
(*stream_map)[i] = *out_vstream_idx;
continue; continue;
} }
// Map only audio and subtitle streams (skip other types)
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
(*stream_map)[i] = -1; (*stream_map)[i] = -1; // Stream not mapped
spdlog::warn("Skipping unsupported stream type at index: {}", i);
continue; continue;
} }
// Create corresponding output stream // Create corresponding output stream for audio and subtitle streams
AVStream *out_copied_stream = avformat_new_stream(fmt_ctx, NULL); AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_copied_stream) { if (!out_stream) {
spdlog::error("Failed allocating output stream"); spdlog::error("Failed allocating output stream");
return AVERROR_UNKNOWN; return AVERROR_UNKNOWN;
} }
ret = avcodec_parameters_copy(out_copied_stream->codecpar, in_codecpar); // Copy codec parameters from input to output
ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
if (ret < 0) { if (ret < 0) {
spdlog::error("Failed to copy codec parameters"); spdlog::error("Failed to copy codec parameters");
return ret; return ret;
} }
out_copied_stream->codecpar->codec_tag = 0; out_stream->codecpar->codec_tag = 0;
// Copy time base // Copy time base
out_copied_stream->time_base = in_stream->time_base; out_stream->time_base = in_stream->time_base;
(*stream_map)[i] = stream_index++; // Map input stream index to output stream index
spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
(*stream_map)[i] = out_stream->index;
} }
} }
@ -191,7 +195,7 @@ int write_frame(
AVFrame *frame, AVFrame *frame,
AVCodecContext *enc_ctx, AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx, AVFormatContext *ofmt_ctx,
int vstream_idx int out_vstream_idx
) { ) {
AVFrame *converted_frame = nullptr; AVFrame *converted_frame = nullptr;
int ret; int ret;
@ -238,9 +242,9 @@ int write_frame(
// Rescale packet timestamps // Rescale packet timestamps
av_packet_rescale_ts( av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[vstream_idx]->time_base enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
); );
enc_pkt->stream_index = vstream_idx; enc_pkt->stream_index = out_vstream_idx;
// Write the packet // Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
@ -256,7 +260,7 @@ int write_frame(
return 0; return 0;
} }
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) { int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) {
int ret; int ret;
AVPacket *enc_pkt = av_packet_alloc(); AVPacket *enc_pkt = av_packet_alloc();
if (!enc_pkt) { if (!enc_pkt) {
@ -265,26 +269,35 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
} }
ret = avcodec_send_frame(enc_ctx, NULL); ret = avcodec_send_frame(enc_ctx, NULL);
while (ret >= 0) { if (ret < 0) {
spdlog::error("Error sending NULL frame to encoder during flush");
av_packet_free(&enc_pkt);
return ret;
}
// Write the packets to the output file
while (true) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt); ret = avcodec_receive_packet(enc_ctx, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt); av_packet_unref(enc_pkt);
break; break;
} else if (ret < 0) { } else if (ret < 0) {
spdlog::error("Error encoding frame"); spdlog::error("Error encoding packet during flush");
av_packet_free(&enc_pkt); av_packet_free(&enc_pkt);
return ret; return ret;
} }
// Rescale packet timestamps // Rescale packet timestamps
av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base); av_packet_rescale_ts(
enc_pkt->stream_index = ofmt_ctx->streams[0]->index; enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
);
enc_pkt->stream_index = out_vstream_idx;
// Write the packet // Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
av_packet_unref(enc_pkt); av_packet_unref(enc_pkt);
if (ret < 0) { if (ret < 0) {
spdlog::error("Error muxing packet"); spdlog::error("Error muxing packet during flush");
av_packet_free(&enc_pkt); av_packet_free(&enc_pkt);
return ret; return ret;
} }

View File

@ -26,7 +26,8 @@ static int process_frames(
AVCodecContext *dec_ctx, AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx, AVCodecContext *enc_ctx,
Filter *filter, Filter *filter,
int vstream_idx, int in_vstream_idx,
int out_vstream_idx,
int *stream_map, int *stream_map,
bool benchmark = false bool benchmark = false
) { ) {
@ -36,7 +37,7 @@ static int process_frames(
// Get the total number of frames in the video with OpenCV // Get the total number of frames in the video with OpenCV
spdlog::debug("Reading total number of frames"); spdlog::debug("Reading total number of frames");
proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames; proc_ctx->total_frames = ifmt_ctx->streams[in_vstream_idx]->nb_frames;
if (proc_ctx->total_frames > 0) { if (proc_ctx->total_frames > 0) {
spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames); spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames);
} else { } else {
@ -47,27 +48,27 @@ static int process_frames(
if (ifmt_ctx->duration != AV_NOPTS_VALUE) { if (ifmt_ctx->duration != AV_NOPTS_VALUE) {
duration_secs = duration_secs =
static_cast<double>(ifmt_ctx->duration) / static_cast<double>(AV_TIME_BASE); static_cast<double>(ifmt_ctx->duration) / static_cast<double>(AV_TIME_BASE);
} else if (ifmt_ctx->streams[vstream_idx]->duration != AV_NOPTS_VALUE) { } else if (ifmt_ctx->streams[in_vstream_idx]->duration != AV_NOPTS_VALUE) {
duration_secs = static_cast<double>(ifmt_ctx->streams[vstream_idx]->duration) * duration_secs = static_cast<double>(ifmt_ctx->streams[in_vstream_idx]->duration) *
av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
} else { } else {
spdlog::warn("Unable to determine video duration"); spdlog::warn("Unable to determine video duration");
} }
spdlog::debug("Video duration: {}s", duration_secs); spdlog::debug("Video duration: {}s", duration_secs);
// Calculate average FPS // Calculate average FPS
double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate); double fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate);
if (fps <= 0) { if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'"); spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate); fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->r_frame_rate);
} }
if (fps <= 0) { if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'"); spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr)); fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr));
} }
if (fps <= 0) { if (fps <= 0) {
spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'"); spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
} }
if (fps <= 0 || duration_secs <= 0) { if (fps <= 0 || duration_secs <= 0) {
spdlog::warn("Unable to estimate the video's average frame rate"); spdlog::warn("Unable to estimate the video's average frame rate");
@ -123,7 +124,7 @@ static int process_frames(
return ret; return ret;
} }
if (packet->stream_index == vstream_idx) { if (packet->stream_index == in_vstream_idx) {
ret = avcodec_send_packet(dec_ctx, packet); ret = avcodec_send_packet(dec_ctx, packet);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
@ -161,7 +162,7 @@ static int process_frames(
return ret; return ret;
} else if (ret == 0 && processed_frame != nullptr) { } else if (ret == 0 && processed_frame != nullptr) {
if (!benchmark) { if (!benchmark) {
ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, vstream_idx); ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing frame: {}", errbuf); spdlog::critical("Error encoding/writing frame: {}", errbuf);
@ -191,7 +192,7 @@ static int process_frames(
ret = av_interleaved_write_frame(ofmt_ctx, packet); ret = av_interleaved_write_frame(ofmt_ctx, packet);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error muxing packet: {}", errbuf); spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
av_packet_unref(packet); av_packet_unref(packet);
cleanup(); cleanup();
return ret; return ret;
@ -211,7 +212,7 @@ static int process_frames(
// Encode and write all flushed frames // Encode and write all flushed frames
for (AVFrame *&flushed_frame : flushed_frames) { for (AVFrame *&flushed_frame : flushed_frames) {
ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, vstream_idx); ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing flushed frame: {}", errbuf); spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
@ -226,7 +227,7 @@ static int process_frames(
} }
// Flush the encoder // Flush the encoder
ret = flush_encoder(enc_ctx, ofmt_ctx); ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error flushing encoder: {}", errbuf); spdlog::critical("Error flushing encoder: {}", errbuf);
@ -256,7 +257,8 @@ extern "C" int process_video(
AVBufferRef *hw_ctx = nullptr; AVBufferRef *hw_ctx = nullptr;
int *stream_map = nullptr; int *stream_map = nullptr;
Filter *filter = nullptr; Filter *filter = nullptr;
int vstream_idx = -1; int in_vstream_idx = -1;
int out_vstream_idx = -1;
char errbuf[AV_ERROR_MAX_STRING_SIZE]; char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0; int ret = 0;
@ -348,7 +350,7 @@ extern "C" int process_video(
} }
// Initialize input // Initialize input
ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx); ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx);
if (ret < 0) { if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf)); av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize decoder: {}", errbuf); spdlog::critical("Failed to initialize decoder: {}", errbuf);
@ -385,7 +387,8 @@ extern "C" int process_video(
&enc_ctx, &enc_ctx,
dec_ctx, dec_ctx,
encoder_config, encoder_config,
vstream_idx, in_vstream_idx,
&out_vstream_idx,
&stream_map &stream_map
); );
if (ret < 0) { if (ret < 0) {
@ -461,7 +464,8 @@ extern "C" int process_video(
dec_ctx, dec_ctx,
enc_ctx, enc_ctx,
filter, filter,
vstream_idx, in_vstream_idx,
out_vstream_idx,
stream_map, stream_map,
benchmark benchmark
); );