Main Content

本页的翻译已过时。点击此处可查看最新英文版本。

使用 GPU Coder 优化车道检测

此示例说明如何从表示为 SeriesNetwork 对象的深度学习网络生成 CUDA® 代码。此示例中的串行网络是一个卷积神经网络,可以从图像中检测并输出车道标记边界。

前提条件

  • 支持 CUDA 的 NVIDIA® GPU。

  • NVIDIA CUDA 工具包和驱动程序。

  • NVIDIA cuDNN 库。

  • OpenCV 库,用于视频读取和图像显示操作。

  • 编译器和库的环境变量。有关支持的编译器和库的版本的信息,请参阅Third-Party Hardware (GPU Coder)。有关设置环境变量的信息,请参阅Setting Up the Prerequisite Products (GPU Coder)

验证 GPU 环境

使用 coder.checkGpuInstall (GPU Coder) 函数验证运行此示例所需的编译器和库是否已正确设置。

envCfg = coder.gpuEnvConfig('host');
envCfg.DeepLibTarget = 'cudnn';
envCfg.DeepCodegen = 1;
envCfg.Quiet = 1;
coder.checkGpuInstall(envCfg);

获得预训练的 SeriesNetwork

[laneNet, coeffMeans, coeffStds] = getLaneDetectionNetworkGPU();

该网络将图像作为输入并输出两个车道边界,分别对应于自我意识车辆的左右车道。每个车道边界都由抛物线方程 y=ax2+bx+c 表示,其中 y 是横向偏移,x 是与车辆的纵向距离。该网络为每个车道输出三个参数 a、b 和 c。网络架构类似于 AlexNet,但是最后几层会替换为较小的全连接层和回归输出层。要查看网络架构,请使用 analyzeNetwork 函数。

analyzeNetwork(laneNet)

检查主要入口函数

type detect_lane.m
function [laneFound, ltPts, rtPts] = detect_lane(frame, laneCoeffMeans, laneCoeffStds) 
% From the networks output, compute left and right lane points in the image
% coordinates. The camera coordinates are described by the caltech mono
% camera model.

%#codegen

% A persistent object mynet is used to load the series network object. At
% the first call to this function, the persistent object is constructed and
% setup. When the function is called subsequent times, the same object is
% reused to call predict on inputs, thus avoiding reconstructing and
% reloading the network object.
persistent lanenet;

if isempty(lanenet)
    lanenet = coder.loadDeepLearningNetwork('laneNet.mat', 'lanenet');
end

lanecoeffsNetworkOutput = lanenet.predict(permute(frame, [2 1 3]));

% Recover original coeffs by reversing the normalization steps

params = lanecoeffsNetworkOutput .* laneCoeffStds + laneCoeffMeans;

isRightLaneFound = abs(params(6)) > 0.5; %c should be more than 0.5 for it to be a right lane
isLeftLaneFound =  abs(params(3)) > 0.5;

vehicleXPoints = 3:30; %meters, ahead of the sensor
ltPts = coder.nullcopy(zeros(28,2,'single'));
rtPts = coder.nullcopy(zeros(28,2,'single'));

if isRightLaneFound && isLeftLaneFound
    rtBoundary = params(4:6);		
	rt_y = computeBoundaryModel(rtBoundary, vehicleXPoints);
	ltBoundary = params(1:3);
	lt_y = computeBoundaryModel(ltBoundary, vehicleXPoints);
	
	% Visualize lane boundaries of the ego vehicle
    tform = get_tformToImage;
    % map vehicle to image coordinates
    ltPts =  tform.transformPointsInverse([vehicleXPoints', lt_y']);
    rtPts =  tform.transformPointsInverse([vehicleXPoints', rt_y']);
	laneFound = true;
else
	laneFound = false;
end

end

function yWorld = computeBoundaryModel(model, xWorld)
	yWorld = polyval(model, xWorld);	
end

function tform = get_tformToImage 
% Compute extrinsics based on camera setup
yaw = 0;
pitch = 14; % pitch of the camera in degrees
roll = 0;

translation = translationVector(yaw, pitch, roll);
rotation    = rotationMatrix(yaw, pitch, roll);

% Construct a camera matrix
focalLength    = [309.4362, 344.2161];
principalPoint = [318.9034, 257.5352];
Skew = 0;

camMatrix = [rotation; translation] * intrinsicMatrix(focalLength, ...
	Skew, principalPoint);

% Turn camMatrix into 2-D homography
tform2D = [camMatrix(1,:); camMatrix(2,:); camMatrix(4,:)]; % drop Z

tform = projective2d(tform2D);
tform = tform.invert();
end

function translation = translationVector(yaw, pitch, roll)
SensorLocation = [0 0];
Height = 2.1798;    % mounting height in meters from the ground
rotationMatrix = (...
	rotZ(yaw)*... % last rotation
	rotX(90-pitch)*...
	rotZ(roll)... % first rotation
	);


% Adjust for the SensorLocation by adding a translation
sl = SensorLocation;

translationInWorldUnits = [sl(2), sl(1), Height];
translation = translationInWorldUnits*rotationMatrix;
end

%------------------------------------------------------------------
% Rotation around X-axis
function R = rotX(a)
a = deg2rad(a);
R = [...
	1   0        0;
	0   cos(a)  -sin(a);
	0   sin(a)   cos(a)];

end

%------------------------------------------------------------------
% Rotation around Y-axis
function R = rotY(a)
a = deg2rad(a);
R = [...
	cos(a)  0 sin(a);
	0       1 0;
	-sin(a) 0 cos(a)];

end

%------------------------------------------------------------------
% Rotation around Z-axis
function R = rotZ(a)
a = deg2rad(a);
R = [...
	cos(a) -sin(a) 0;
	sin(a)  cos(a) 0;
	0       0      1];
end

%------------------------------------------------------------------
% Given the Yaw, Pitch, and Roll, determine the appropriate Euler angles
% and the sequence in which they are applied to align the camera's
% coordinate system with the vehicle coordinate system. The resulting
% matrix is a Rotation matrix that together with the Translation vector
% defines the extrinsic parameters of the camera.
function rotation = rotationMatrix(yaw, pitch, roll)

rotation = (...
	rotY(180)*...            % last rotation: point Z up
	rotZ(-90)*...            % X-Y swap
	rotZ(yaw)*...            % point the camera forward
	rotX(90-pitch)*...       % "un-pitch"
	rotZ(roll)...            % 1st rotation: "un-roll"
	);
end

function intrinsicMat = intrinsicMatrix(FocalLength, Skew, PrincipalPoint)
intrinsicMat = ...
	[FocalLength(1)  , 0                     , 0; ...
	 Skew             , FocalLength(2)   , 0; ...
	 PrincipalPoint(1), PrincipalPoint(2), 1];
end

生成网络代码和后处理代码

网络会计算参数 a、b 和 c,这些参数描述了左右车道边界的抛物线方程。

根据这些参数,计算与车道位置对应的 x 和 y 坐标。这些坐标必须映射到图像坐标。函数 detect_lane.m 执行所有这些计算。为 'lib' 目标创建一个 GPU 代码配置对象,从而为该函数生成 CUDA 代码,并将目标语言设置为 C++。使用 coder.DeepLearningConfig (GPU Coder) 函数创建一个 CuDNN 深度学习配置对象,并将其赋给 GPU 代码配置对象的 DeepLearningConfig 属性。运行 codegen 命令。

cfg = coder.gpuConfig('lib');
cfg.DeepLearningConfig = coder.DeepLearningConfig('cudnn');
cfg.GenerateReport = true;
cfg.TargetLang = 'C++';
inputs = {ones(227,227,3,'single'),ones(1,6,'double'),ones(1,6,'double')};
codegen -args inputs -config cfg detect_lane
Code generation successful: View report

生成的代码说明

串行网络生成为一个 C++ 类,其中包含由 23 个层类组成的数组。

class c_lanenet {
 public:
  int32_T batchSize; int32_T numLayers; real32_T *inputData; real32_T
  *outputData; MWCNNLayer *layers[23];
 public:
  c_lanenet(void); void setup(void); void predict(void); void
  cleanup(void); ~c_lanenet(void);
};

该类的 setup() 方法会设置句柄并为每个层对象分配内存。predict() 方法会针对网络中 23 个层的每个层调用预测。

cnn_lanenet_conv*_w 和 cnn_lanenet_conv*_b 文件是网络中卷积层的二进制权重和偏置文件。cnn_lanenet_fc*_w 和 cnn_lanenet_fc*_b 文件是网络中全连接层的二进制权重和偏置文件。

codegendir = fullfile('codegen', 'lib', 'detect_lane');
dir(codegendir)
.                                      MWReLULayer.o                          
..                                     MWReLULayerImpl.cu                     
.gitignore                             MWReLULayerImpl.hpp                    
DeepLearningNetwork.cu                 MWReLULayerImpl.o                      
DeepLearningNetwork.h                  MWTargetNetworkImpl.cu                 
DeepLearningNetwork.o                  MWTargetNetworkImpl.hpp                
MWCNNLayer.cpp                         MWTargetNetworkImpl.o                  
MWCNNLayer.hpp                         MWTensor.hpp                           
MWCNNLayer.o                           MWTensorBase.cpp                       
MWCNNLayerImpl.cu                      MWTensorBase.hpp                       
MWCNNLayerImpl.hpp                     MWTensorBase.o                         
MWCNNLayerImpl.o                       _clang-format                          
MWCUSOLVERUtils.cpp                    buildInfo.mat                          
MWCUSOLVERUtils.hpp                    cnn_lanenet0_0_conv1_b.bin             
MWCUSOLVERUtils.o                      cnn_lanenet0_0_conv1_w.bin             
MWCudaDimUtility.hpp                   cnn_lanenet0_0_conv2_b.bin             
MWCustomLayerForCuDNN.cpp              cnn_lanenet0_0_conv2_w.bin             
MWCustomLayerForCuDNN.hpp              cnn_lanenet0_0_conv3_b.bin             
MWCustomLayerForCuDNN.o                cnn_lanenet0_0_conv3_w.bin             
MWElementwiseAffineLayer.cpp           cnn_lanenet0_0_conv4_b.bin             
MWElementwiseAffineLayer.hpp           cnn_lanenet0_0_conv4_w.bin             
MWElementwiseAffineLayer.o             cnn_lanenet0_0_conv5_b.bin             
MWElementwiseAffineLayerImpl.cu        cnn_lanenet0_0_conv5_w.bin             
MWElementwiseAffineLayerImpl.hpp       cnn_lanenet0_0_data_offset.bin         
MWElementwiseAffineLayerImpl.o         cnn_lanenet0_0_data_scale.bin          
MWElementwiseAffineLayerImplKernel.cu  cnn_lanenet0_0_fc6_b.bin               
MWElementwiseAffineLayerImplKernel.o   cnn_lanenet0_0_fc6_w.bin               
MWFCLayer.cpp                          cnn_lanenet0_0_fcLane1_b.bin           
MWFCLayer.hpp                          cnn_lanenet0_0_fcLane1_w.bin           
MWFCLayer.o                            cnn_lanenet0_0_fcLane2_b.bin           
MWFCLayerImpl.cu                       cnn_lanenet0_0_fcLane2_w.bin           
MWFCLayerImpl.hpp                      cnn_lanenet0_0_responseNames.txt       
MWFCLayerImpl.o                        codeInfo.mat                           
MWFusedConvReLULayer.cpp               codedescriptor.dmr                     
MWFusedConvReLULayer.hpp               compileInfo.mat                        
MWFusedConvReLULayer.o                 defines.txt                            
MWFusedConvReLULayerImpl.cu            detect_lane.a                          
MWFusedConvReLULayerImpl.hpp           detect_lane.cu                         
MWFusedConvReLULayerImpl.o             detect_lane.h                          
MWInputLayer.cpp                       detect_lane.o                          
MWInputLayer.hpp                       detect_lane_data.cu                    
MWInputLayer.o                         detect_lane_data.h                     
MWInputLayerImpl.hpp                   detect_lane_data.o                     
MWKernelHeaders.hpp                    detect_lane_initialize.cu              
MWMaxPoolingLayer.cpp                  detect_lane_initialize.h               
MWMaxPoolingLayer.hpp                  detect_lane_initialize.o               
MWMaxPoolingLayer.o                    detect_lane_internal_types.h           
MWMaxPoolingLayerImpl.cu               detect_lane_rtw.mk                     
MWMaxPoolingLayerImpl.hpp              detect_lane_terminate.cu               
MWMaxPoolingLayerImpl.o                detect_lane_terminate.h                
MWNormLayer.cpp                        detect_lane_terminate.o                
MWNormLayer.hpp                        detect_lane_types.h                    
MWNormLayer.o                          examples                               
MWNormLayerImpl.cu                     gpu_codegen_info.mat                   
MWNormLayerImpl.hpp                    html                                   
MWNormLayerImpl.o                      interface                              
MWOutputLayer.cpp                      mean.bin                               
MWOutputLayer.hpp                      predict.cu                             
MWOutputLayer.o                        predict.h                              
MWOutputLayerImpl.cu                   predict.o                              
MWOutputLayerImpl.hpp                  rtw_proj.tmw                           
MWOutputLayerImpl.o                    rtwtypes.h                             
MWReLULayer.cpp                        
MWReLULayer.hpp                        

生成附加文件以对输出进行后处理

从经过训练的网络中导出均值和标准差,以便在执行过程中使用。

codegendir = fullfile(pwd, 'codegen', 'lib','detect_lane');
fid = fopen(fullfile(codegendir,'mean.bin'), 'w');
A = [coeffMeans coeffStds];
fwrite(fid, A, 'double');
fclose(fid);

主文件

使用主文件编译网络代码。主文件使用 OpenCV VideoCapture 方法从输入视频中读取帧。对每个帧都进行处理和分类,直到不再读取帧为止。在显示每个帧的输出之前,使用 detect_lane.cu 中生成的 detect_lane 函数对输出进行后处理。

type main_lanenet.cu
/* Copyright 2016 The MathWorks, Inc. */

#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/core/core.hpp>
#include <opencv2/core/types.hpp>
#include <opencv2/highgui.hpp>
#include <list>
#include <cmath>
#include "detect_lane.h"

using namespace cv;
void readData(float *input, Mat& orig, Mat & im)
{
	Size size(227,227);
	resize(orig,im,size,0,0,INTER_LINEAR);
	for(int j=0;j<227*227;j++)
	{
		//BGR to RGB
		input[2*227*227+j]=(float)(im.data[j*3+0]);
		input[1*227*227+j]=(float)(im.data[j*3+1]);
		input[0*227*227+j]=(float)(im.data[j*3+2]);
	}
}

void addLane(float pts[28][2], Mat & im, int numPts)
{
    std::vector<Point2f> iArray;
    for(int k=0; k<numPts; k++) 
    {
        iArray.push_back(Point2f(pts[k][0],pts[k][1]));    
    }	
    Mat curve(iArray, true);
    curve.convertTo(curve, CV_32S); //adapt type for polylines
    polylines(im, curve, false, CV_RGB(255,255,0), 2, LINE_AA);
}


void writeData(float *outputBuffer, Mat & im, int N, double means[6], double stds[6])
{
    // get lane coordinates
    boolean_T laneFound = 0;	
    float ltPts[56];
    float rtPts[56];	
    detect_lane(outputBuffer, means, stds, &laneFound, ltPts, rtPts);    
	
	if (!laneFound)
	{
		return;
	}
	
	float ltPtsM[28][2];
	float rtPtsM[28][2];
	for(int k=0; k<28; k++)
	{
		ltPtsM[k][0] = ltPts[k];
		ltPtsM[k][1] = ltPts[k+28];
		rtPtsM[k][0] = rtPts[k];
		rtPtsM[k][1] = rtPts[k+28];   
	}		  

	addLane(ltPtsM, im, 28);
	addLane(rtPtsM, im, 28);
}

void readMeanAndStds(const char* filename, double means[6], double stds[6])
{
    FILE* pFile = fopen(filename, "rb");
    if (pFile==NULL)
    {
        fputs ("File error",stderr);
        return;
    }

    // obtain file size
    fseek (pFile , 0 , SEEK_END);
    long lSize = ftell(pFile);
    rewind(pFile);
    
    double* buffer = (double*)malloc(lSize);
    
    size_t result = fread(buffer,sizeof(double),lSize,pFile);
    if (result*sizeof(double) != lSize) {    
        fputs ("Reading error",stderr);
        return;
    }
    
    for (int k = 0 ; k < 6; k++)
    {
        means[k] = buffer[k];
        stds[k] = buffer[k+6];
    }
    free(buffer);        
}


// Main function
int main(int argc, char* argv[])
{    
	
    float *inputBuffer = (float*)calloc(sizeof(float),227*227*3);
    float *outputBuffer = (float*)calloc(sizeof(float),6);

    if ((inputBuffer == NULL) || (outputBuffer == NULL)) {
        printf("ERROR: Input/Output buffers could not be allocated!\n");
        exit(-1);
    }
    
    // get ground truth mean and std
    double means[6];
    double stds[6];	
    readMeanAndStds("mean.bin", means, stds);	
	
	if (argc < 2)
    {
        printf("Pass in input video file name as argument\n");
        return -1;
    }
    
    VideoCapture cap(argv[1]);
    if (!cap.isOpened()) {
        printf("Could not open the video capture device.\n");
        return -1;
    }

    cudaEvent_t start, stop;
    float fps = 0;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);    
    Mat orig, im;    
    namedWindow("Lane detection demo",WINDOW_NORMAL);
    while(true)
    {
        cudaEventRecord(start);
        cap >> orig;
        if (orig.empty()) break;                
        readData(inputBuffer, orig, im);		

        writeData(inputBuffer, orig, 6, means, stds);
        
        cudaEventRecord(stop);
        cudaEventSynchronize(stop);
        
        char strbuf[50];
        float milliseconds = -1.0; 
        cudaEventElapsedTime(&milliseconds, start, stop);
        fps = fps*.9+1000.0/milliseconds*.1;
        sprintf (strbuf, "%.2f FPS", fps);
        putText(orig, strbuf, Point(200,30), FONT_HERSHEY_DUPLEX, 1, CV_RGB(0,0,0), 2);
        imshow("Lane detection demo", orig); 		
        if( waitKey(50)%256 == 27 ) break; // stop capturing by pressing ESC	*/       
    }
    destroyWindow("Lane detection demo");
	
    free(inputBuffer);
    free(outputBuffer);
        
    return 0;
}

下载示例视频

if ~exist('./caltech_cordova1.avi', 'file')
    url = 'https://www.mathworks.com/supportfiles/gpucoder/media/caltech_cordova1.avi';
    websave('caltech_cordova1.avi', url);
end

编译可执行文件

if ispc
    setenv('MATLAB_ROOT', matlabroot);
    vcvarsall = mex.getCompilerConfigurations('C++').Details.CommandLineShell;
    setenv('VCVARSALL', vcvarsall);
    system('make_win_lane_detection.bat');
    cd(codegendir);
    system('lanenet.exe ..\..\..\caltech_cordova1.avi');
else
    setenv('MATLAB_ROOT', matlabroot);
    system('make -f Makefile_lane_detection.mk');
    cd(codegendir);
    system('./lanenet ../../../caltech_cordova1.avi');
end

输入截图

输出截图

相关主题