基于全卷积Fully-Convolutional-Siamese-Networks的目标跟踪仿真

99 阅读3分钟

1.算法概述

1.正确版本组合:Win7+Matlab R2015b+CUDA7.5+vs2013

 

CUDA7.5下载地址为:

 

developer.download.nvidia.com/compute/cud…

 

vs2013要专业版。

 

如下所示:

1.png

 全部安装好之后,做如下操作:

 

2. CPU配置,运行CNN工具箱中的

2.png

然后再运行

3.png

可以完成CPP文件的编译。

 

3.编译成功后,会产生

 

4.png

 

这些必须在电脑上编译,否则用别人复制的,如果配置不一样,可能会报错。

 

4.GPU配置:

 

安装cudnn:developer.nvidia.com/rdp/cudnn-a…

 

然后mex -setup下,操作和CPU一样。

 

然后执行matlab程序:

 

vl_setupnn;

 

vl_compilenn('enableGpu', true,'cudaRoot', 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5','cudaMethod', 'nvcc', 'enableCudnn', 'true','cudnnRoot', 'E:\A_2016_FPGA_Test\FC_tracking\A_FC\matconvnet-1.0-beta20\local\cudnn');

 

红色部分为路径

 

5.然后运行程序,就可以自动运行了,原来程序的运行非常麻烦,定义了这个程序,可以一步运行处结果。

5.png  

2.仿真效果预览

Win7+Matlab R2015b+CUDA7.5+vs2013

 

运行后,如果可以出现如下结果:

 

6.png

 

3.核心MATLAB代码预览

`function bboxes = tracker(varargin)

%TRACKER

%   is the main function that performs the tracking loop

%   Default parameters are overwritten by VARARGIN

%

%   Luca Bertinetto, Jack Valmadre, Joao F. Henriques, 2016

% -------------------------------------------------------------------------------------------------

    % These are the default hyper-params for SiamFC-3S

    % The ones for SiamFC (5 scales) are in params-5s.txt

    p.numScale = 3;

    p.scaleStep = 1.0375;

    p.scalePenalty = 0.9745;

    p.scaleLR = 0.59; % damping factor for scale update

    p.responseUp = 16; % upsampling the small 17x17 response helps with the accuracy

    p.windowing = 'cosine'; % to penalize large displacements

    p.wInfluence = 0.176; % windowing influence (in convex sum)

    p.net = '2016-08-17.net.mat';

    %% execution, visualization, benchmark

    p.video = 'vot15_bag';

    p.visualization = false;

    p.gpus = 1;

    p.bbox_output = false;

    p.fout = -1;

    %% Params from the network architecture, have to be consistent with the training

    p.exemplarSize = 127;  % input z size

    p.instanceSize = 255;  % input x size (search region)

    p.scoreSize = 17;

    p.totalStride = 8;

    p.contextAmount = 0.5; % context amount for the exemplar

    p.subMean = false;

    %% SiamFC prefix and ids

    p.prefix_z = 'a_'; % used to identify the layers of the exemplar

    p.prefix_x = 'b_'; % used to identify the layers of the instance

    p.prefix_join = 'xcorr';

    p.prefix_adj = 'adjust';

    p.id_feat_z = 'a_feat';

    p.id_score = 'score';

    % Overwrite default parameters with varargin

    p = vl_argparse(p, varargin);

% -------------------------------------------------------------------------------------------------

 

    % Get environment-specific default paths.

    p = env_paths_tracking(p);

    1

    % Load ImageNet Video statistics

    if exist(p.stats_path,'file')

        stats = load(p.stats_path);

    else

        warning('No stats found at %s', p.stats_path);

        stats = [];

    end

    

    2

    % Load two copies of the pre-trained network

    net_z = load_pretrained([p.net_base_path p.net], p.gpus);

    3

    net_x = load_pretrained([p.net_base_path p.net], []);

    4

    p.seq_base_path

    p.video

    [imgFiles, targetPosition, targetSize] = load_video_info(p.seq_base_path, p.video);

    nImgs = numel(imgFiles);

    startFrame = 1;

    5

    % Divide the net in 2

    % exemplar branch (used only once per video) computes features for the target

    remove_layers_from_prefix(net_z, p.prefix_x);

    remove_layers_from_prefix(net_z, p.prefix_join);

    remove_layers_from_prefix(net_z, p.prefix_adj);

    % instance branch computes features for search region x and cross-correlates with z features

    6

    remove_layers_from_prefix(net_x, p.prefix_z);

    zFeatId = net_z.getVarIndex(p.id_feat_z);

    scoreId = net_x.getVarIndex(p.id_score);

    % get the first frame of the video

    im = gpuArray(single(imgFiles{startFrame}));

    % if grayscale repeat one channel to match filters size

if(size(im, 3)==1)

        im = repmat(im, [1 1 3]);

    end

    % Init visualization

    videoPlayer = [];

    if p.visualization && isToolboxAvailable('Computer Vision System Toolbox')

        videoPlayer = vision.VideoPlayer('Position', [100 100 [size(im,2), size(im,1)]+30]);

    end

    7

    % get avg for padding

    avgChans = gather([mean(mean(im(:,:,1))) mean(mean(im(:,:,2))) mean(mean(im(:,:,3)))]);

 

    wc_z = targetSize(2) + p.contextAmount*sum(targetSize);

    hc_z = targetSize(1) + p.contextAmount*sum(targetSize);

    s_z = sqrt(wc_z*hc_z);

    scale_z = p.exemplarSize / s_z;

    % initialize the exemplar

    [z_crop, ~] = get_subwindow_tracking(im, targetPosition, [p.exemplarSize p.exemplarSize], [round(s_z) round(s_z)], avgChans);

    if p.subMean

        z_crop = bsxfun(@minus, z_crop, reshape(stats.z.rgbMean, [1 1 3]));

    end

    d_search = (p.instanceSize - p.exemplarSize)/2;

    pad = d_search/scale_z;

    s_x = s_z + 2*pad;

    % arbitrary scale saturation

    min_s_x = 0.2*s_x;

    max_s_x = 5*s_x;

 

    switch p.windowing

        case 'cosine'

            window = single(hann(p.scoreSizep.responseUp) * hann(p.scoreSizep.responseUp)');

        case 'uniform'

            window = single(ones(p.scoreSizep.responseUp, p.scoreSizep.responseUp));

    end

    % make the window sum 1

    window = window / sum(window(:));

    scales = (p.scaleStep .^ ((ceil(p.numScale/2)-p.numScale) : floor(p.numScale/2)));

    % evaluate the offline-trained network for exemplar z features

    net_z.eval({'exemplar', z_crop});

    z_features = net_z.vars(zFeatId).value;

    z_features = repmat(z_features, [1 1 1 p.numScale]);

 

    bboxes = zeros(nImgs, 4);

    % start tracking

    tic;

    for i = startFrame:nImgs

        i

        if i>startFrame

            % load new frame on GPU

            im = gpuArray(single(imgFiles{i}));

    % if grayscale repeat one channel to match filters size

     if(size(im, 3)==1)

         im = repmat(im, [1 1 3]);

     end

            scaledInstance = s_x .* scales;

            scaledTarget = [targetSize(1) .* scales; targetSize(2) .* scales];

            % extract scaled crops for search region x at previous target position

            x_crops = make_scale_pyramid(im, targetPosition, scaledInstance, p.instanceSize, avgChans, stats, p);

            % evaluate the offline-trained network for exemplar x features

            [newTargetPosition, newScale] = tracker_eval(net_x, round(s_x), scoreId, z_features, x_crops, targetPosition, window, p);

            targetPosition = gather(newTargetPosition);

            % scale damping and saturation

            s_x = max(min_s_x, min(max_s_x, (1-p.scaleLR)s_x + p.scaleLRscaledInstance(newScale)));

            targetSize = (1-p.scaleLR)targetSize + p.scaleLR[scaledTarget(1,newScale) scaledTarget(2,newScale)];

        else

            % at the first frame output position and size passed as input (ground truth)

        end

 

        rectPosition = [targetPosition([2,1]) - targetSize([2,1])/2, targetSize([2,1])];

        % output bbox in the original frame coordinates

        oTargetPosition = targetPosition; % .* frameSize ./ newFrameSize;

        oTargetSize = targetSize; % .* frameSize ./ newFrameSize;

        bboxes(i, :) = [oTargetPosition([2,1]) - oTargetSize([2,1])/2, oTargetSize([2,1])];

 

%         if p.visualization

            if isempty(videoPlayer)

                figure(1), imshow(im/255);

                figure(1), rectangle('Position', rectPosition, 'LineWidth', 4, 'EdgeColor', 'y');

                drawnow

                fprintf('Frame %d\n', startFrame+i);

            else

                im = gather(im)/255;

                im = insertShape(im, 'Rectangle', rectPosition, 'LineWidth', 4, 'Color', 'yellow');

                % Display the annotated video frame using the video player object.

                step(videoPlayer, im);

            end

%         end

 

        if p.bbox_output

            fprintf(p.fout,'%.2f,%.2f,%.2f,%.2f\n', bboxes(i, :));

        end

 

    end

 

    bboxes = bboxes(startFrame : i, :);

 

end

A000`