Nvidia Video Codec SDK——AppDec解析

news/2024/7/16 8:13:12

Nvidia Video Codec SDK——AppDec解析

    • 项目框架
    • AppDec
      • 硬解码整体框架
      • 主函数main
      • 具体硬解码流程函数DecodeMediaFile
      • 初始化解码器
      • 实际解码函数Decode
      • 回调函数HandlePictureDisplay
    • 数据指针
    • 参考链接

项目框架

使用的SDK版本是Video_Codec_SDK_8.2.16,下载链接:NVIDIA VIDEO CODEC SDK 8.2.16.zip
项目结构如下:
在这里插入图片描述
AppDecode:视频源硬解码
AppEncode:视频编码
AppTranscode:转换编码格式
本篇研究的是硬解码工程AppDec
在这里插入图片描述

AppDec

硬解码整体框架

在这里插入图片描述

主函数main

/**
*  This sample application illustrates the demuxing and decoding of media file with
*  resize and crop of the output image. The application supports both planar (YUV420P and YUV420P16)
*  and non-planar (NV12 and P016) output formats.
*/

int main(int argc, char **argv) 
{
    char szInFilePath[256] = "D:/H265/video/KiteFlite_3840x1920_0tile_22_0.h265", szOutFilePath[256] = "";
    bool bOutPlanar = true;
    int iGpu = 0;
    Rect cropRect = {};
    Dim resizeDim = {};
    try
    {
        
		//按命令行参数读取输入文件等,例如text.h265
		ParseCommandLine(argc, argv, szInFilePath, szOutFilePath, bOutPlanar, iGpu, cropRect, resizeDim);
        CheckInputFile(szInFilePath);

        if (!*szOutFilePath) {
            sprintf(szOutFilePath, bOutPlanar ? "out.planar" : "out.native");
        }

		//初始化cuda环境
        ck(cuInit(0));
        int nGpu = 0;
        ck(cuDeviceGetCount(&nGpu));
        if (iGpu < 0 || iGpu >= nGpu) {
            std::cout << "GPU ordinal out of range. Should be within [" << 0 << ", " << nGpu - 1 << "]" << std::endl;
            return 1;
        }
        CUdevice cuDevice = 0;
        ck(cuDeviceGet(&cuDevice, iGpu));
        char szDeviceName[80];
        ck(cuDeviceGetName(szDeviceName, sizeof(szDeviceName), cuDevice));
        std::cout << "GPU in use: " << szDeviceName << std::endl;
		//设置cuda上下文
        CUcontext cuContext = NULL;
        ck(cuCtxCreate(&cuContext, 0, cuDevice));
		//进行解码
        std::cout << "Decode with demuxing." << std::endl;
        DecodeMediaFile(cuContext, szInFilePath, szOutFilePath, bOutPlanar, cropRect, resizeDim);
    }
    catch (const std::exception& ex)
    {
        std::cout << ex.what();
        exit(1);
    }

    return 0;
}

具体硬解码流程函数DecodeMediaFile

void DecodeMediaFile(CUcontext cuContext, const char *szInFilePath, const char *szOutFilePath, bool bOutPlanar,
    const Rect &cropRect, const Dim &resizeDim)
{
    //输出
	std::ofstream fpOut(szOutFilePath, std::ios::out | std::ios::binary);
    if (!fpOut)
    {
        std::ostringstream err;
        err << "Unable to open output file: " << szOutFilePath << std::endl;
        throw std::invalid_argument(err.str());
    }

	//解析输入的文件,FFmpegDemuxer是对FFmpeg封装的一个解析文件的类
    FFmpegDemuxer demuxer(szInFilePath);
	//创建硬解码器,设置了三个重要的回调函数;第三个参数为bUseDeviceFrame,决定是否使用显卡内存,是的话解码出的数据不转到CPU中
    NvDecoder dec(cuContext, demuxer.GetWidth(), demuxer.GetHeight(), false, FFmpeg2NvCodecId(demuxer.GetVideoCodec()), NULL, false, false, &cropRect, &resizeDim);

    int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
    uint8_t *pVideo = NULL, **ppFrame;
	int x = 2;
    do {
		// Demux 解析,获得每一帧码流的数据存在pVideo中,nVideoBytes为数据的字节数
		//Demux将pVideo存储的地址值改变为pkt.data,即改变了pVideo指向的地址!!!
        demuxer.Demux(&pVideo, &nVideoBytes);
		//实际解码进入函数
        dec.Decode(pVideo, nVideoBytes, &ppFrame, &nFrameReturned);
        if (!nFrame && nFrameReturned)
            LOG(INFO) << dec.GetVideoInfo();// This function is used to print information about the video stream
		//硬解码是一个异步过程,nFrameReturned表示解码得到了多少帧
        for (int i = 0; i < nFrameReturned; i++) {
            if (bOutPlanar) {
				//转换格式
                ConvertToPlanar(ppFrame[i], dec.GetWidth(), dec.GetHeight(), dec.GetBitDepth());
            }
			//写文件,GetFrameSize: get the current frame size based on pixel format
            fpOut.write(reinterpret_cast<char*>(ppFrame[i][2]), dec.GetFrameSize());
        }
        nFrame += nFrameReturned;
    } while (nVideoBytes);

    std::cout << "Total frame decoded: " << nFrame << std::endl
            << "Saved in file " << szOutFilePath << " in "
            << (dec.GetBitDepth() == 8 ? (bOutPlanar ? "iyuv" : "nv12") : (bOutPlanar ? "yuv420p16" : "p016"))
            << " format" << std::endl;
    fpOut.close();
}

初始化解码器

在这里插入图片描述

//初始化解码器的代码
NvDecoder::NvDecoder(CUcontext cuContext, int nWidth, int nHeight, bool bUseDeviceFrame, cudaVideoCodec eCodec, std::mutex *pMutex,
    bool bLowLatency, bool bDeviceFramePitched, const Rect *pCropRect, const Dim *pResizeDim, int maxWidth, int maxHeight) :
    m_cuContext(cuContext), m_bUseDeviceFrame(bUseDeviceFrame), m_eCodec(eCodec), m_pMutex(pMutex), m_bDeviceFramePitched(bDeviceFramePitched),
    m_nMaxWidth (maxWidth), m_nMaxHeight(maxHeight)
{
    if (pCropRect) m_cropRect = *pCropRect;
    if (pResizeDim) m_resizeDim = *pResizeDim;
	//This API is used to create CtxLock object
    NVDEC_API_CALL(cuvidCtxLockCreate(&m_ctxLock, cuContext));

	//CUVIDPARSERPARAMS:该接口用来创建VideoParser
	//主要参数是设置三个回调函数,实现对解析出来的数据的处理
    CUVIDPARSERPARAMS videoParserParameters = {};  //结构体
    videoParserParameters.CodecType = eCodec;  //解码视频类型,如H264
    videoParserParameters.ulMaxNumDecodeSurfaces = 1;  //解码表面的最大数量(解析器将循环遍历这些表面)
    videoParserParameters.ulMaxDisplayDelay = bLowLatency ? 0 : 1;
    videoParserParameters.pUserData = this;
	//三个回调函数
    videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;//解码序列时调用
    videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;//准备开始解码时调用
    videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;//解码出数据调用
    if (m_pMutex) m_pMutex->lock();//m_pMutex互斥量,0表示解锁
	//Create video parser object and initialize
    NVDEC_API_CALL(cuvidCreateVideoParser(&m_hParser, &videoParserParameters));
    if (m_pMutex) m_pMutex->unlock();
}

实际解码函数Decode

在这里插入图片描述

//实际进行解码的函数Decode,码流在*pData中
bool NvDecoder::Decode(const uint8_t *pData, int nSize, uint8_t ***pppFrame, int *pnFrameReturned, uint32_t flags, int64_t **ppTimestamp, int64_t timestamp, CUstream stream)
{
    if (!m_hParser)
    {
        NVDEC_THROW_ERROR("Parser not initialized.", CUDA_ERROR_NOT_INITIALIZED);
        return false;
    }

    m_nDecodedFrame = 0;
	//AVPacket转CUVIDSOURCEDATAPACKET,并交给cuvidParaseVideoData进行
    CUVIDSOURCEDATAPACKET packet = {0}; 
    packet.payload = pData; //指向数据包有效载荷数据的指针
    packet.payload_size = nSize; //负载中的字节数
    packet.flags = flags | CUVID_PKT_TIMESTAMP;
    packet.timestamp = timestamp;
	//判断是否是stream的最后一个packet
    if (!pData || nSize == 0) {
        packet.flags |= CUVID_PKT_ENDOFSTREAM;
    }
    m_cuvidStream = stream;
    if (m_pMutex) m_pMutex->lock(); //解码要加锁
    NVDEC_API_CALL(cuvidParseVideoData(m_hParser, &packet));
    if (m_pMutex) m_pMutex->unlock();//解锁
    m_cuvidStream = 0;

	//检测是否解码的帧数大于0了
    if (m_nDecodedFrame > 0)
    {
        if (pppFrame) 
        {
            m_vpFrameRet.clear(); //将返回的列队清空
            std::lock_guard<std::mutex> lock(m_mtxVPFrame);
			// 将m_vpFrame传给m_vpFrameRet;
            m_vpFrameRet.insert(m_vpFrameRet.begin(), m_vpFrame.begin(), m_vpFrame.begin() + m_nDecodedFrame);
            *pppFrame = &m_vpFrameRet[0]; //pppFrame里存储的地址指向了m_vpFrameRet向量的起始位置
        }
        if (ppTimestamp) 
        {
            *ppTimestamp = &m_vTimestamp[0];
        }
    }
    if (pnFrameReturned)
    {
        *pnFrameReturned = m_nDecodedFrame;
    }
    return true;
}

回调函数HandlePictureDisplay

博主的主要目的是找到解码后数据在显存中的位置以及指针的指向,因此重点看了HandlePictureDisplay函数

/* Return value from HandlePictureDisplay() are interpreted as:
*  0: fail, >=1: suceeded 
*/
int NvDecoder::HandlePictureDisplay(CUVIDPARSERDISPINFO *pDispInfo) {
    CUVIDPROCPARAMS videoProcessingParameters = {};
    videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
    videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
    videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
    videoProcessingParameters.unpaired_field = pDispInfo->repeat_first_field < 0;
    videoProcessingParameters.output_stream = m_cuvidStream;

    CUdeviceptr dpSrcFrame = 0;
    unsigned int nSrcPitch = 0;

	//cuvidMapVideoFrame:返回cuda设备指针和视频帧的Pitch
	// MapVideoFrame: 拿到解码后数据在显存的指针 --> dpSrcFrame
    NVDEC_API_CALL(cuvidMapVideoFrame(m_hDecoder, pDispInfo->picture_index, &dpSrcFrame,
        &nSrcPitch, &videoProcessingParameters));

    CUVIDGETDECODESTATUS DecodeStatus;
    memset(&DecodeStatus, 0, sizeof(DecodeStatus));
    CUresult result = cuvidGetDecodeStatus(m_hDecoder, pDispInfo->picture_index, &DecodeStatus);
    if (result == CUDA_SUCCESS && (DecodeStatus.decodeStatus == cuvidDecodeStatus_Error || DecodeStatus.decodeStatus == cuvidDecodeStatus_Error_Concealed))
    {
        printf("Decode Error occurred for picture %d\n", m_nPicNumInDecodeOrder[pDispInfo->picture_index]);
    }
    uint8_t *pDecodedFrame = nullptr;
    {
		// lock_guard 自动解锁 当控件离开lock_guard创建对象的范围时,lock_guard被破坏并释放互斥体
        std::lock_guard<std::mutex> lock(m_mtxVPFrame);
		// 解出一帧 m_nDecodedFrame+1,且若不够空间了,则开辟空间
        if ((unsigned)++m_nDecodedFrame > m_vpFrame.size())
        {
            // Not enough frames in stock
            m_nFrameAlloc++;
            uint8_t *pFrame = NULL;
			//m_bUseDeviceFrame 初始化解码器的时候设置的,是否使用显卡内存,是得解码出来的数据不转到CPU内存
            if (m_bUseDeviceFrame)
            {
				//Pushes the given context \p ctx onto the CPU thread's stack of current contexts.
				//The specified context becomes the CPU thread's current context, so all CUDA functions that operate on the current context are affected.
                CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext));				
                //分配显存空间,并返回指向该空间的指针pFrame
				if (m_bDeviceFramePitched)
                {
                    CUDA_DRVAPI_CALL(cuMemAllocPitch((CUdeviceptr *)&pFrame, &m_nDeviceFramePitch, m_nWidth * (m_nBitDepthMinus8 ? 2 : 1), m_nHeight * 3 / 2, 16));
                }
                else 
                {
					//GetFrameSize() is used to get the current frame size based on pixel format
					CUDA_DRVAPI_CALL(cuMemAlloc((CUdeviceptr *)&pFrame, GetFrameSize()));
                }
                CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL));
            }
            else //CPU内存
            {
                pFrame = new uint8_t[GetFrameSize()]; // 开辟空间
            }
            m_vpFrame.push_back(pFrame);
        }
        pDecodedFrame = m_vpFrame[m_nDecodedFrame - 1]; // 取到最后一个
    }

    CUDA_DRVAPI_CALL(cuCtxPushCurrent(m_cuContext)); // 启用context
    CUDA_MEMCPY2D m = { 0 };
    m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
    m.srcDevice = dpSrcFrame;//解码后数据在显存的指针
    m.srcPitch = nSrcPitch;
    m.dstMemoryType = m_bUseDeviceFrame ? CU_MEMORYTYPE_DEVICE : CU_MEMORYTYPE_HOST;
    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
    m.dstPitch = m_nDeviceFramePitch ? m_nDeviceFramePitch : m_nWidth * (m_nBitDepthMinus8 ? 2 : 1);
    m.WidthInBytes = m_nWidth * (m_nBitDepthMinus8 ? 2 : 1);
    m.Height = m_nHeight;
    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
    m.srcDevice = (CUdeviceptr)((uint8_t *)dpSrcFrame + m.srcPitch * m_nSurfaceHeight);
    m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame + m.dstPitch * m_nHeight);
    m.Height = m_nHeight / 2;
	// 解码完成,NV12格式 pDecodedFrame
	// NV12TORGBA
    CUDA_DRVAPI_CALL(cuMemcpy2DAsync(&m, m_cuvidStream));
    CUDA_DRVAPI_CALL(cuStreamSynchronize(m_cuvidStream));
    CUDA_DRVAPI_CALL(cuCtxPopCurrent(NULL)); // 拷贝结束,取消上下文

    if ((int)m_vTimestamp.size() < m_nDecodedFrame) {
        m_vTimestamp.resize(m_vpFrame.size());
    }
    m_vTimestamp[m_nDecodedFrame - 1] = pDispInfo->timestamp;
	//取消映射先前映射的视频帧
    NVDEC_API_CALL(cuvidUnmapVideoFrame(m_hDecoder, dpSrcFrame));
    return 1;
}

数据指针

解码后数据在内存地址由指针**ppFrame指向
遍历所有像素点YUV的值:

void DecodeMediaFile(CUcontext cuContext, const char *szInFilePath, const char *szOutFilePath, bool bOutPlanar,
    const Rect &cropRect, const Dim &resizeDim)
{
    //输出
	std::ofstream fpOut(szOutFilePath, std::ios::out | std::ios::binary);
    if (!fpOut)
    {
        std::ostringstream err;
        err << "Unable to open output file: " << szOutFilePath << std::endl;
        throw std::invalid_argument(err.str());
    }

	//解析输入的文件,FFmpegDemuxer是对FFmpeg封装的一个解析文件的类
    FFmpegDemuxer demuxer(szInFilePath);
	//创建硬解码器,设置了三个重要的回调函数;第三个参数为bUseDeviceFrame,决定是否使用显卡内存,是的话解码出的数据不转到CPU中
    NvDecoder dec(cuContext, demuxer.GetWidth(), demuxer.GetHeight(), false, FFmpeg2NvCodecId(demuxer.GetVideoCodec()), NULL, false, false, &cropRect, &resizeDim);

    int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;
    uint8_t *pVideo = NULL, **ppFrame;
	int x = 2;
    do {
		// Demux 解析,获得每一帧码流的数据存在pVideo中,nVideoBytes为数据的字节数
		//Demux将pVideo存储的地址值改变为pkt.data,即改变了pVideo指向的地址!!!
        demuxer.Demux(&pVideo, &nVideoBytes);
		//实际解码进入函数
        dec.Decode(pVideo, nVideoBytes, &ppFrame, &nFrameReturned);
        if (!nFrame && nFrameReturned)
            LOG(INFO) << dec.GetVideoInfo();// This function is used to print information about the video stream
		//硬解码是一个异步过程,nFrameReturned表示解码得到了多少帧
        for (int i = 0; i < nFrameReturned; i++) {
            if (bOutPlanar) {
				//转换格式
                ConvertToPlanar(ppFrame[i], dec.GetWidth(), dec.GetHeight(), dec.GetBitDepth());
            }
            //遍历所有像素点的YUV值并输出
			for (int j = 0; j < dec.GetWidth()*dec.GetHeight()*3/2; j++) {
				printf(" %d, ", reinterpret_cast<uint8_t*>(ppFrame[i][j]));

				}

			//写文件,GetFrameSize: get the current frame size based on pixel format
            fpOut.write(reinterpret_cast<char*>(ppFrame[i][2]), dec.GetFrameSize());
        }
        nFrame += nFrameReturned;
    } while (nVideoBytes);

    std::cout << "Total frame decoded: " << nFrame << std::endl
            << "Saved in file " << szOutFilePath << " in "
            << (dec.GetBitDepth() == 8 ? (bOutPlanar ? "iyuv" : "nv12") : (bOutPlanar ? "yuv420p16" : "p016"))
            << " format" << std::endl;
    fpOut.close();
}

参考链接

NIVIDIA 硬解码学习1
NIVIDIA 硬解码学习2
【视频开发】Nvidia硬解码总结


http://www.niftyadmin.cn/n/1339329.html

相关文章

JAVACARD减少应用开发风险的建议

参考Java Card & STK Applet Development Guidelines by gemalto文档。 由于EEPROM和闪存的寿命有限&#xff0c;应用频繁的读写同一位置可能导致运行故障。本文主要描述了如何设计程序&#xff0c;尽量最小读写内存的必要。如果不遵守本文的建议的话&#xff0c;实际运行过…

CUDA高性能并行计算学习笔记1----认识CUDA

CUDA高性能并行计算学习笔记1----认识CUDACUDA C基础函数类型限定符执行配置运算符五个内置变量变量类型限定符转载链接CUDA C基础 CUDA C是对C/C语言进行拓展后形成的变种&#xff0c;兼容C/C语法&#xff0c;文件类型为”.cu”文件&#xff0c;编译器为”nvcc”&#xff0c;…

解决32位plsql客户端连接不64位Oracle11g上数据库

一、解决方案 因为本人安装的是64位的Oracle&#xff0c;plsql 是32位的故连接不上。网上有方法能连接。 1. 文件下载 下载PLSQL_Developer地址 http://pan.baidu.com/share/link?shareid3768883331&uk3557941237 下载instantclient-basic-win32-10.2.0.5地址&#xff1a;…

CUDA未定义标识符printf

CUDA未定义标识符"printf": 引用头文件stdio.h&#xff1a; #include <stdio.h>

Python3+Selenium3环境构建填坑之旅

全文简介 最早使用Selenium还是几年前写GUI自动化用例的时候&#xff0c;当时用的框架是Robot FrameworkSelenium2Library。当初使用的时候碰到一些问题&#xff0c;但是安装过程还是比较简单和顺利的。随着Python3的普及&#xff0c;Selenium3也跟上了行程。而Selenium3最大的…

C++ template模板常见特性(函数模板、类模板)

「函数模板」的形式&#xff1a; template <class 类型参数1&#xff0c;class 类型参数2&#xff0c;...> 返回值类型 模板名 (形参表) {函数体 };template 就是模板定义的关键词&#xff0c;T 代表的是任意变量的类型。 定义Swap函数&#xff1a; template <class…

java 数组的冒泡排序

//将五个整数升序排序 public static void main(String[] args) int[] numsnew int[5]; int i,j; //循环变量 int temp; //临时变量 Scanner inputnew Scanner(System.in); System.out.println("请输入5个整数"); for(i0;i<5;i){ nums[i]input.nextInt(); } //开始…

关于弹框的那些事~

原文链接&#xff1a;https://isux.tencent.com/summary-of-100-dialog-design.html 弹框在网页中是属于非常常见的一种元素了。弹框一般包含一个蒙版&#xff0c;一个主体及一个关闭入口&#xff0c;常见于网页及移动端。其好处是让用户更聚焦&#xff0c;且不用离开当前页面&…