diff --git a/CHANGELOG.md b/CHANGELOG.md index 51fd5bc..0c63dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Fixed + +- Stream mapping for cases where the video stream is not the first stream in the input file (#1217). + ## [6.1.0] - 2024-11-04 ### Added @@ -17,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Wide character string paths support on Windows systems without UTF-8 suppoprt enabled. +- Wide character string paths support on Windows systems without UTF-8 suppoprt enabled (#1201). ### Changed diff --git a/include/libvideo2x/decoder.h b/include/libvideo2x/decoder.h index dc90c3e..e6ed9f0 100644 --- a/include/libvideo2x/decoder.h +++ b/include/libvideo2x/decoder.h @@ -14,7 +14,7 @@ int init_decoder( std::filesystem::path in_fpath, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, - int *vstream_idx + int *in_vstream_idx ); #endif // DECODER_H diff --git a/include/libvideo2x/encoder.h b/include/libvideo2x/encoder.h index 65d39a4..64ecb35 100644 --- a/include/libvideo2x/encoder.h +++ b/include/libvideo2x/encoder.h @@ -19,7 +19,8 @@ int init_encoder( AVCodecContext **enc_ctx, AVCodecContext *dec_ctx, EncoderConfig *encoder_config, - int vstream_idx, + int in_vstream_idx, + int *out_vstream_idx, int **stream_map ); @@ -27,9 +28,9 @@ int write_frame( AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, - int vstream_idx + int out_vstream_idx ); -int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx); +int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx); #endif // ENCODER_H diff --git a/src/decoder.cpp b/src/decoder.cpp index d5a05ee..b6dd71d 100644 --- a/src/decoder.cpp +++ b/src/decoder.cpp @@ -25,7 +25,7 @@ int init_decoder( std::filesystem::path in_fpath, AVFormatContext **fmt_ctx, AVCodecContext **dec_ctx, - int *vstream_idx + int *in_vstream_idx ) { AVFormatContext *ifmt_ctx = NULL; AVCodecContext *codec_ctx = NULL; @@ -110,7 +110,7 @@ int init_decoder( *fmt_ctx = ifmt_ctx; *dec_ctx = codec_ctx; - *vstream_idx = stream_index; + *in_vstream_idx = stream_index; return 0; } diff --git a/src/encoder.cpp b/src/encoder.cpp index d5fe5ce..ad93ac6 100644 --- a/src/encoder.cpp +++ b/src/encoder.cpp @@ -25,12 +25,12 @@ int init_encoder( AVCodecContext **enc_ctx, AVCodecContext *dec_ctx, EncoderConfig *encoder_config, - int vstream_idx, + int in_vstream_idx, + int *out_vstream_idx, int **stream_map ) { AVFormatContext *fmt_ctx = NULL; AVCodecContext *codec_ctx = NULL; - int stream_index = 0; int ret; avformat_alloc_output_context2(&fmt_ctx, NULL, NULL, out_fpath.u8string().c_str()); @@ -49,11 +49,12 @@ int init_encoder( } // Create a new video stream in the output file - AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); - if (!out_stream) { + AVStream *out_vstream = avformat_new_stream(fmt_ctx, NULL); + if (!out_vstream) { spdlog::error("Failed to allocate the output video stream"); return AVERROR_UNKNOWN; } + *out_vstream_idx = out_vstream->index; codec_ctx = avcodec_alloc_context3(encoder); if (!codec_ctx) { @@ -89,14 +90,14 @@ int init_encoder( if (dec_ctx->time_base.num > 0 && dec_ctx->time_base.den > 0) { codec_ctx->time_base = dec_ctx->time_base; } else { - codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_stream, NULL)); + codec_ctx->time_base = av_inv_q(av_guess_frame_rate(ifmt_ctx, out_vstream, NULL)); } // Set the output video's frame rate if (dec_ctx->framerate.num > 0 && dec_ctx->framerate.den > 0) { codec_ctx->framerate = dec_ctx->framerate; } else { - codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_stream, NULL); + codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, out_vstream, NULL); } // Set the CRF and preset for any codecs that support it @@ -113,15 +114,15 @@ int init_encoder( return ret; } - ret = avcodec_parameters_from_context(out_stream->codecpar, codec_ctx); + ret = avcodec_parameters_from_context(out_vstream->codecpar, codec_ctx); if (ret < 0) { spdlog::error("Failed to copy encoder parameters to output video stream"); return ret; } - out_stream->time_base = codec_ctx->time_base; - out_stream->avg_frame_rate = codec_ctx->framerate; - out_stream->r_frame_rate = codec_ctx->framerate; + out_vstream->time_base = codec_ctx->time_base; + out_vstream->avg_frame_rate = codec_ctx->framerate; + out_vstream->r_frame_rate = codec_ctx->framerate; if (encoder_config->copy_streams) { // Allocate the stream map @@ -132,43 +133,46 @@ int init_encoder( return AVERROR(ENOMEM); } - // Map the video stream - (*stream_map)[vstream_idx] = stream_index++; - - // Loop through each stream in the input file + // Map each input stream to an output stream for (int i = 0; i < static_cast(ifmt_ctx->nb_streams); i++) { AVStream *in_stream = ifmt_ctx->streams[i]; AVCodecParameters *in_codecpar = in_stream->codecpar; - if (i == vstream_idx) { - // Video stream is already handled + // Skip the input video stream as it's already processed + if (i == in_vstream_idx) { + (*stream_map)[i] = *out_vstream_idx; continue; } + // Map only audio and subtitle streams (skip other types) if (in_codecpar->codec_type != AVMEDIA_TYPE_AUDIO && in_codecpar->codec_type != AVMEDIA_TYPE_SUBTITLE) { - (*stream_map)[i] = -1; + (*stream_map)[i] = -1; // Stream not mapped + spdlog::warn("Skipping unsupported stream type at index: {}", i); continue; } - // Create corresponding output stream - AVStream *out_copied_stream = avformat_new_stream(fmt_ctx, NULL); - if (!out_copied_stream) { + // Create corresponding output stream for audio and subtitle streams + AVStream *out_stream = avformat_new_stream(fmt_ctx, NULL); + if (!out_stream) { spdlog::error("Failed allocating output stream"); return AVERROR_UNKNOWN; } - ret = avcodec_parameters_copy(out_copied_stream->codecpar, in_codecpar); + // Copy codec parameters from input to output + ret = avcodec_parameters_copy(out_stream->codecpar, in_codecpar); if (ret < 0) { spdlog::error("Failed to copy codec parameters"); return ret; } - out_copied_stream->codecpar->codec_tag = 0; + out_stream->codecpar->codec_tag = 0; // Copy time base - out_copied_stream->time_base = in_stream->time_base; + out_stream->time_base = in_stream->time_base; - (*stream_map)[i] = stream_index++; + // Map input stream index to output stream index + spdlog::debug("Stream mapping: {} (in) -> {} (out)", i, out_stream->index); + (*stream_map)[i] = out_stream->index; } } @@ -191,7 +195,7 @@ int write_frame( AVFrame *frame, AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, - int vstream_idx + int out_vstream_idx ) { AVFrame *converted_frame = nullptr; int ret; @@ -238,9 +242,9 @@ int write_frame( // Rescale packet timestamps av_packet_rescale_ts( - enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[vstream_idx]->time_base + enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base ); - enc_pkt->stream_index = vstream_idx; + enc_pkt->stream_index = out_vstream_idx; // Write the packet ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); @@ -256,7 +260,7 @@ int write_frame( return 0; } -int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) { +int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx, int out_vstream_idx) { int ret; AVPacket *enc_pkt = av_packet_alloc(); if (!enc_pkt) { @@ -265,26 +269,35 @@ int flush_encoder(AVCodecContext *enc_ctx, AVFormatContext *ofmt_ctx) { } ret = avcodec_send_frame(enc_ctx, NULL); - while (ret >= 0) { + if (ret < 0) { + spdlog::error("Error sending NULL frame to encoder during flush"); + av_packet_free(&enc_pkt); + return ret; + } + + // Write the packets to the output file + while (true) { ret = avcodec_receive_packet(enc_ctx, enc_pkt); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { av_packet_unref(enc_pkt); break; } else if (ret < 0) { - spdlog::error("Error encoding frame"); + spdlog::error("Error encoding packet during flush"); av_packet_free(&enc_pkt); return ret; } // Rescale packet timestamps - av_packet_rescale_ts(enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[0]->time_base); - enc_pkt->stream_index = ofmt_ctx->streams[0]->index; + av_packet_rescale_ts( + enc_pkt, enc_ctx->time_base, ofmt_ctx->streams[out_vstream_idx]->time_base + ); + enc_pkt->stream_index = out_vstream_idx; // Write the packet ret = av_interleaved_write_frame(ofmt_ctx, enc_pkt); av_packet_unref(enc_pkt); if (ret < 0) { - spdlog::error("Error muxing packet"); + spdlog::error("Error muxing packet during flush"); av_packet_free(&enc_pkt); return ret; } diff --git a/src/libvideo2x.cpp b/src/libvideo2x.cpp index 354c697..9e4aff2 100644 --- a/src/libvideo2x.cpp +++ b/src/libvideo2x.cpp @@ -26,7 +26,8 @@ static int process_frames( AVCodecContext *dec_ctx, AVCodecContext *enc_ctx, Filter *filter, - int vstream_idx, + int in_vstream_idx, + int out_vstream_idx, int *stream_map, bool benchmark = false ) { @@ -36,7 +37,7 @@ static int process_frames( // Get the total number of frames in the video with OpenCV spdlog::debug("Reading total number of frames"); - proc_ctx->total_frames = ifmt_ctx->streams[vstream_idx]->nb_frames; + proc_ctx->total_frames = ifmt_ctx->streams[in_vstream_idx]->nb_frames; if (proc_ctx->total_frames > 0) { spdlog::debug("Read total number of frames from 'nb_frames': {}", proc_ctx->total_frames); } else { @@ -47,27 +48,27 @@ static int process_frames( if (ifmt_ctx->duration != AV_NOPTS_VALUE) { duration_secs = static_cast(ifmt_ctx->duration) / static_cast(AV_TIME_BASE); - } else if (ifmt_ctx->streams[vstream_idx]->duration != AV_NOPTS_VALUE) { - duration_secs = static_cast(ifmt_ctx->streams[vstream_idx]->duration) * - av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); + } else if (ifmt_ctx->streams[in_vstream_idx]->duration != AV_NOPTS_VALUE) { + duration_secs = static_cast(ifmt_ctx->streams[in_vstream_idx]->duration) * + av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base); } else { spdlog::warn("Unable to determine video duration"); } spdlog::debug("Video duration: {}s", duration_secs); // Calculate average FPS - double fps = av_q2d(ifmt_ctx->streams[vstream_idx]->avg_frame_rate); + double fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->avg_frame_rate); if (fps <= 0) { spdlog::debug("Unable to read the average frame rate from 'avg_frame_rate'"); - fps = av_q2d(ifmt_ctx->streams[vstream_idx]->r_frame_rate); + fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->r_frame_rate); } if (fps <= 0) { spdlog::debug("Unable to read the average frame rate from 'r_frame_rate'"); - fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[vstream_idx], nullptr)); + fps = av_q2d(av_guess_frame_rate(ifmt_ctx, ifmt_ctx->streams[in_vstream_idx], nullptr)); } if (fps <= 0) { spdlog::debug("Unable to estimate the average frame rate with 'av_guess_frame_rate'"); - fps = av_q2d(ifmt_ctx->streams[vstream_idx]->time_base); + fps = av_q2d(ifmt_ctx->streams[in_vstream_idx]->time_base); } if (fps <= 0 || duration_secs <= 0) { spdlog::warn("Unable to estimate the video's average frame rate"); @@ -123,7 +124,7 @@ static int process_frames( return ret; } - if (packet->stream_index == vstream_idx) { + if (packet->stream_index == in_vstream_idx) { ret = avcodec_send_packet(dec_ctx, packet); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); @@ -161,7 +162,7 @@ static int process_frames( return ret; } else if (ret == 0 && processed_frame != nullptr) { if (!benchmark) { - ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, vstream_idx); + ret = write_frame(processed_frame, enc_ctx, ofmt_ctx, out_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error encoding/writing frame: {}", errbuf); @@ -191,7 +192,7 @@ static int process_frames( ret = av_interleaved_write_frame(ofmt_ctx, packet); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); - spdlog::error("Error muxing packet: {}", errbuf); + spdlog::critical("Error muxing audio/subtitle packet: {}", errbuf); av_packet_unref(packet); cleanup(); return ret; @@ -211,7 +212,7 @@ static int process_frames( // Encode and write all flushed frames for (AVFrame *&flushed_frame : flushed_frames) { - ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, vstream_idx); + ret = write_frame(flushed_frame, enc_ctx, ofmt_ctx, out_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error encoding/writing flushed frame: {}", errbuf); @@ -226,7 +227,7 @@ static int process_frames( } // Flush the encoder - ret = flush_encoder(enc_ctx, ofmt_ctx); + ret = flush_encoder(enc_ctx, ofmt_ctx, out_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Error flushing encoder: {}", errbuf); @@ -256,7 +257,8 @@ extern "C" int process_video( AVBufferRef *hw_ctx = nullptr; int *stream_map = nullptr; Filter *filter = nullptr; - int vstream_idx = -1; + int in_vstream_idx = -1; + int out_vstream_idx = -1; char errbuf[AV_ERROR_MAX_STRING_SIZE]; int ret = 0; @@ -348,7 +350,7 @@ extern "C" int process_video( } // Initialize input - ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &vstream_idx); + ret = init_decoder(hw_type, hw_ctx, in_fpath, &ifmt_ctx, &dec_ctx, &in_vstream_idx); if (ret < 0) { av_strerror(ret, errbuf, sizeof(errbuf)); spdlog::critical("Failed to initialize decoder: {}", errbuf); @@ -385,7 +387,8 @@ extern "C" int process_video( &enc_ctx, dec_ctx, encoder_config, - vstream_idx, + in_vstream_idx, + &out_vstream_idx, &stream_map ); if (ret < 0) { @@ -461,7 +464,8 @@ extern "C" int process_video( dec_ctx, enc_ctx, filter, - vstream_idx, + in_vstream_idx, + out_vstream_idx, stream_map, benchmark );