fix(encoder): fixed incorrect stream mapping for multi-stream files

Signed-off-by: k4yt3x <i@k4yt3x.com>
This commit is contained in:
k4yt3x 2024-11-06 00:00:00 +00:00
parent 3b7921a774
commit 33b7c53e16
No known key found for this signature in database
6 changed files with 82 additions and 58 deletions

View File

@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
### Fixed
- Stream mapping for cases where the video stream is not the first stream in the input file (#1217).
## [6.1.0] - 2024-11-04
### Added
@ -17,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Wide character string paths support on Windows systems without UTF-8 suppoprt enabled.
- Wide character string paths support on Windows systems without UTF-8 suppoprt enabled (#1201).
### Changed

View File

@ -14,7 +14,7 @@ int init_decoder(
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *vstream_idx
int *in_vstream_idx
);
#endif // DECODER_H

View File

@ -19,7 +19,8 @@ int init_encoder(
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int vstream_idx,
int in_vstream_idx,
int *out_vstream_idx,
int **stream_map
);
@ -27,9 +28,9 @@ int write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int vstream_idx
int out_vstream_idx
);
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx);
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx);
#endif // ENCODER_H

View File

@ -25,7 +25,7 @@ int init_decoder(
std::filesystem::path in_fpath,
AVFormatContext **fmt_ctx,
AVCodecContext **dec_ctx,
int *vstream_idx
int *in_vstream_idx
) {
AVFormatContext *ifmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
@ -110,7 +110,7 @@ int init_decoder(
*fmt_ctx = ifmt_ctx;
*dec_ctx = codec_ctx;
*vstream_idx = stream_index;
*in_vstream_idx = stream_index;
return 0;
}

View File

@ -25,12 +25,12 @@ int init_encoder(
AVCodecContext **enc_ctx,
AVCodecContext *dec_ctx,
EncoderConfig *encoder_config,
int vstream_idx,
int in_vstream_idx,
int *out_vstream_idx,
int **stream_map
) {
AVFormatContext *fmt_ctx = NULL;
AVCodecContext *codec_ctx = NULL;
int stream_index = 0;
int ret;
avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str());
@ -49,11 +49,12 @@ int init_encoder(
}
// Create a new video stream in the output file
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_stream) {
AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL);
if (!out_vstream) {
spdlog::error("Failed to allocate the output video stream");
return AVERROR_UNKNOWN;
}
*out_vstream_idx = out_vstream->index;
codec_ctx = avcodec_alloc_context3(encoder);
if (!codec_ctx) {
@ -89,14 +90,14 @@ int init_encoder(
if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) {
codec_ctx->time_base = dec_ctx->time_base;
} else {
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL));
codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL));
}
// Set the output video's frame rate
if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) {
codec_ctx->framerate = dec_ctx->framerate;
} else {
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_stream, NULL);
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL);
}
// Set the CRF and preset for any codecs that support it
@ -113,15 +114,15 @@ int init_encoder(
return ret;
}
ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx);
ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx);
if (ret < 0) {
spdlog::error("Failed to copy encoder parameters to output video stream");
return ret;
}
out_stream->time_base = codec_ctx->time_base;
out_stream->avg_frame_rate = codec_ctx->framerate;
out_stream->r_frame_rate = codec_ctx->framerate;
out_vstream->time_base = codec_ctx->time_base;
out_vstream->avg_frame_rate = codec_ctx->framerate;
out_vstream->r_frame_rate = codec_ctx->framerate;
if (encoder_config->copy_streams) {
// Allocate the stream map
@ -132,43 +133,46 @@ int init_encoder(
return AVERROR(ENOMEM);
}
// Map the video stream
(*stream_map)[vstream_idx] = stream_index++;
// Loop through each stream in the input file
// Map each input stream to an output stream
for (int i = 0; i < static_cast<int>(ifmt_ctx->nb_streams); i++) {
AVStream *in_stream = ifmt_ctx->streams[i];
AVCodecParameters *in_codecpar = in_stream->codecpar;
if (i == vstream_idx) {
// Video stream is already handled
// Skip the input video stream as it's already processed
if (i == in_vstream_idx) {
(*stream_map)[i] = *out_vstream_idx;
continue;
}
// Map only audio and subtitle streams (skip other types)
if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO &&
in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) {
(*stream_map)[i] = -1;
(*stream_map)[i] = -1; // Stream not mapped
spdlog::warn("Skipping unsupported stream type at index: {}", i);
continue;
}
// Create corresponding output stream
AVStream *out_copied_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_copied_stream) {
// Create corresponding output stream for audio and subtitle streams
AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL);
if (!out_stream) {
spdlog::error("Failed allocating output stream");
return AVERROR_UNKNOWN;
}
ret = avcodec_parameters_copy(out_copied_stream->codecpar, in_codecpar);
// Copy codec parameters from input to output
ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar);
if (ret < 0) {
spdlog::error("Failed to copy codec parameters");
return ret;
}
out_copied_stream->codecpar->codec_tag = 0;
out_stream->codecpar->codec_tag = 0;
// Copy time base
out_copied_stream->time_base = in_stream->time_base;
out_stream->time_base = in_stream->time_base;
(*stream_map)[i] = stream_index++;
// Map input stream index to output stream index
spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index);
(*stream_map)[i] = out_stream->index;
}
}
@ -191,7 +195,7 @@ int write_frame(
AVFrame *frame,
AVCodecContext *enc_ctx,
AVFormatContext *ofmt_ctx,
int vstream_idx
int out_vstream_idx
) {
AVFrame *converted_frame = nullptr;
int ret;
@ -238,9 +242,9 @@ int write_frame(
// Rescale packet timestamps
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[vstream_idx]->time_base
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
);
enc_pkt->stream_index = vstream_idx;
enc_pkt->stream_index = out_vstream_idx;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
@ -256,7 +260,7 @@ int write_frame(
return 0;
}
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) {
int ret;
AVPacket *enc_pkt = av_packet_alloc();
if (!enc_pkt) {
@ -265,26 +269,35 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) {
}
ret = avcodec_send_frame(enc_ctx, NULL);
while (ret >= 0) {
if (ret < 0) {
spdlog::error("Error sending NULL frame to encoder during flush");
av_packet_free(&enc_pkt);
return ret;
}
// Write the packets to the output file
while (true) {
ret = avcodec_receive_packet(enc_ctx, enc_pkt);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
av_packet_unref(enc_pkt);
break;
} else if (ret < 0) {
spdlog::error("Error encoding frame");
spdlog::error("Error encoding packet during flush");
av_packet_free(&enc_pkt);
return ret;
}
// Rescale packet timestamps
av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base);
enc_pkt->stream_index = ofmt_ctx->streams[0]->index;
av_packet_rescale_ts(
enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base
);
enc_pkt->stream_index = out_vstream_idx;
// Write the packet
ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt);
av_packet_unref(enc_pkt);
if (ret < 0) {
spdlog::error("Error muxing packet");
spdlog::error("Error muxing packet during flush");
av_packet_free(&enc_pkt);
return ret;
}

View File

@ -26,7 +26,8 @@ static int process_frames(
AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx,
Filter *filter,
int vstream_idx,
int in_vstream_idx,
int out_vstream_idx,
int *stream_map,
bool benchmark = false
) {
@ -36,7 +37,7 @@ static int process_frames(
// Get the total number of frames in the video with OpenCV
spdlog::debug("Reading total number of frames");
proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames;
proc_ctx->total_frames = ifmt_ctx->streams[in_vstream_idx]->nb_frames;
if (proc_ctx->total_frames > 0) {
spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames);
} else {
@ -47,27 +48,27 @@ static int process_frames(
if (ifmt_ctx->duration != AV_NOPTS_VALUE) {
duration_secs =
static_cast<double>(ifmt_ctx->duration) / static_cast<double>(AV_TIME_BASE);
} else if (ifmt_ctx->streams[vstream_idx]->duration != AV_NOPTS_VALUE) {
duration_secs = static_cast<double>(ifmt_ctx->streams[vstream_idx]->duration) *
av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
} else if (ifmt_ctx->streams[in_vstream_idx]->duration != AV_NOPTS_VALUE) {
duration_secs = static_cast<double>(ifmt_ctx->streams[in_vstream_idx]->duration) *
av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
} else {
spdlog::warn("Unable to determine video duration");
}
spdlog::debug("Video duration: {}s", duration_secs);
// Calculate average FPS
double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate);
double fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate);
if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate);
fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->r_frame_rate);
}
if (fps <= 0) {
spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'");
fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr));
fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr));
}
if (fps <= 0) {
spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'");
fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base);
fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base);
}
if (fps <= 0 || duration_secs <= 0) {
spdlog::warn("Unable to estimate the video's average frame rate");
@ -123,7 +124,7 @@ static int process_frames(
return ret;
}
if (packet->stream_index == vstream_idx) {
if (packet->stream_index == in_vstream_idx) {
ret = avcodec_send_packet(dec_ctx, packet);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
@ -161,7 +162,7 @@ static int process_frames(
return ret;
} else if (ret == 0 && processed_frame != nullptr) {
if (!benchmark) {
ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, vstream_idx);
ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing frame: {}", errbuf);
@ -191,7 +192,7 @@ static int process_frames(
ret = av_interleaved_write_frame(ofmt_ctx, packet);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::error("Error muxing packet: {}", errbuf);
spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf);
av_packet_unref(packet);
cleanup();
return ret;
@ -211,7 +212,7 @@ static int process_frames(
// Encode and write all flushed frames
for (AVFrame *&flushed_frame : flushed_frames) {
ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, vstream_idx);
ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error encoding/writing flushed frame: {}", errbuf);
@ -226,7 +227,7 @@ static int process_frames(
}
// Flush the encoder
ret = flush_encoder(enc_ctx, ofmt_ctx);
ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Error flushing encoder: {}", errbuf);
@ -256,7 +257,8 @@ extern "C" int process_video(
AVBufferRef *hw_ctx = nullptr;
int *stream_map = nullptr;
Filter *filter = nullptr;
int vstream_idx = -1;
int in_vstream_idx = -1;
int out_vstream_idx = -1;
char errbuf[AV_ERROR_MAX_STRING_SIZE];
int ret = 0;
@ -348,7 +350,7 @@ extern "C" int process_video(
}
// Initialize input
ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx);
ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx);
if (ret < 0) {
av_strerror(ret, errbuf, sizeof(errbuf));
spdlog::critical("Failed to initialize decoder: {}", errbuf);
@ -385,7 +387,8 @@ extern "C" int process_video(
&enc_ctx,
dec_ctx,
encoder_config,
vstream_idx,
in_vstream_idx,
&out_vstream_idx,
&stream_map
);
if (ret < 0) {
@ -461,7 +464,8 @@ extern "C" int process_video(
dec_ctx,
enc_ctx,
filter,
vstream_idx,
in_vstream_idx,
out_vstream_idx,
stream_map,
benchmark
);