基于全卷积Fully-Convolutional-Siamese-Networks的目标跟踪仿真1.算法概述 1.正确版

1.算法概述

1.正确版本组合：Win7+Matlab R2015b+CUDA7.5+vs2013

CUDA7.5下载地址为：

developer.download.nvidia.com/compute/cud…

vs2013要专业版。

如下所示：

全部安装好之后，做如下操作：

2. CPU配置，运行CNN工具箱中的

然后再运行

可以完成CPP文件的编译。

3.编译成功后，会产生

这些必须在电脑上编译，否则用别人复制的，如果配置不一样，可能会报错。

4.GPU配置：

安装cudnn：developer.nvidia.com/rdp/cudnn-a…

然后mex -setup下，操作和CPU一样。

然后执行matlab程序：

vl_setupnn;

vl_compilenn('enableGpu', true,'cudaRoot', 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5','cudaMethod', 'nvcc', 'enableCudnn', 'true','cudnnRoot', 'E:\A_2016_FPGA_Test\FC_tracking\A_FC\matconvnet-1.0-beta20\local\cudnn');

红色部分为路径

5.然后运行程序，就可以自动运行了，原来程序的运行非常麻烦，定义了这个程序，可以一步运行处结果。

2.仿真效果预览

Win7+Matlab R2015b+CUDA7.5+vs2013

运行后，如果可以出现如下结果：

3.核心MATLAB代码预览

`function bboxes = tracker(varargin)

%TRACKER

% is the main function that performs the tracking loop

% Default parameters are overwritten by VARARGIN

% Luca Bertinetto, Jack Valmadre, Joao F. Henriques, 2016

% -------------------------------------------------------------------------------------------------

% These are the default hyper-params for SiamFC-3S

% The ones for SiamFC (5 scales) are in params-5s.txt

p.numScale = 3;

p.scaleStep = 1.0375;

p.scalePenalty = 0.9745;

p.scaleLR = 0.59; % damping factor for scale update

p.responseUp = 16; % upsampling the small 17x17 response helps with the accuracy

p.windowing = 'cosine'; % to penalize large displacements

p.wInfluence = 0.176; % windowing influence (in convex sum)

p.net = '2016-08-17.net.mat';

%% execution, visualization, benchmark

p.video = 'vot15_bag';

p.visualization = false;

p.gpus = 1;

p.bbox_output = false;

p.fout = -1;

%% Params from the network architecture, have to be consistent with the training

p.exemplarSize = 127; % input z size

p.instanceSize = 255; % input x size (search region)

p.scoreSize = 17;

p.totalStride = 8;

p.contextAmount = 0.5; % context amount for the exemplar

p.subMean = false;

%% SiamFC prefix and ids

p.prefix_z = 'a_'; % used to identify the layers of the exemplar

p.prefix_x = 'b_'; % used to identify the layers of the instance

p.prefix_join = 'xcorr';

p.prefix_adj = 'adjust';

p.id_feat_z = 'a_feat';

p.id_score = 'score';

% Overwrite default parameters with varargin

p = vl_argparse(p, varargin);

% -------------------------------------------------------------------------------------------------

% Get environment-specific default paths.

p = env_paths_tracking(p);

% Load ImageNet Video statistics

if exist(p.stats_path,'file')

stats = load(p.stats_path);

else

warning('No stats found at %s', p.stats_path);

stats = [];

end

% Load two copies of the pre-trained network

net_z = load_pretrained([p.net_base_path p.net], p.gpus);

net_x = load_pretrained([p.net_base_path p.net], []);

p.seq_base_path

p.video

[imgFiles, targetPosition, targetSize] = load_video_info(p.seq_base_path, p.video);

nImgs = numel(imgFiles);

startFrame = 1;

% Divide the net in 2

% exemplar branch (used only once per video) computes features for the target

remove_layers_from_prefix(net_z, p.prefix_x);

remove_layers_from_prefix(net_z, p.prefix_join);

remove_layers_from_prefix(net_z, p.prefix_adj);

% instance branch computes features for search region x and cross-correlates with z features

remove_layers_from_prefix(net_x, p.prefix_z);

zFeatId = net_z.getVarIndex(p.id_feat_z);

scoreId = net_x.getVarIndex(p.id_score);

% get the first frame of the video

im = gpuArray(single(imgFiles{startFrame}));

% if grayscale repeat one channel to match filters size

if(size(im, 3)==1)

im = repmat(im, [1 1 3]);

end

% Init visualization

videoPlayer = [];

if p.visualization && isToolboxAvailable('Computer Vision System Toolbox')

videoPlayer = vision.VideoPlayer('Position', [100 100 [size(im,2), size(im,1)]+30]);

end

% get avg for padding

avgChans = gather([mean(mean(im(:,:,1))) mean(mean(im(:,:,2))) mean(mean(im(:,:,3)))]);

wc_z = targetSize(2) + p.contextAmount*sum(targetSize);

hc_z = targetSize(1) + p.contextAmount*sum(targetSize);

s_z = sqrt(wc_z*hc_z);

scale_z = p.exemplarSize / s_z;

% initialize the exemplar

[z_crop, ~] = get_subwindow_tracking(im, targetPosition, [p.exemplarSize p.exemplarSize], [round(s_z) round(s_z)], avgChans);

if p.subMean

z_crop = bsxfun(@minus, z_crop, reshape(stats.z.rgbMean, [1 1 3]));

end

d_search = (p.instanceSize - p.exemplarSize)/2;

pad = d_search/scale_z;

s_x = s_z + 2*pad;

% arbitrary scale saturation

min_s_x = 0.2*s_x;

max_s_x = 5*s_x;

switch p.windowing

case 'cosine'

window = single(hann(p.scoreSizep.responseUp) * hann(p.scoreSizep.responseUp)');

case 'uniform'

window = single(ones(p.scoreSizep.responseUp, p.scoreSizep.responseUp));

end

% make the window sum 1

window = window / sum(window(:));

scales = (p.scaleStep .^ ((ceil(p.numScale/2)-p.numScale) : floor(p.numScale/2)));

% evaluate the offline-trained network for exemplar z features

net_z.eval({'exemplar', z_crop});

z_features = net_z.vars(zFeatId).value;

z_features = repmat(z_features, [1 1 1 p.numScale]);

bboxes = zeros(nImgs, 4);

% start tracking

tic;

for i = startFrame:nImgs

if i>startFrame

% load new frame on GPU

im = gpuArray(single(imgFiles{i}));

% if grayscale repeat one channel to match filters size

if(size(im, 3)==1)

im = repmat(im, [1 1 3]);

end

scaledInstance = s_x .* scales;

scaledTarget = [targetSize(1) .* scales; targetSize(2) .* scales];

% extract scaled crops for search region x at previous target position

x_crops = make_scale_pyramid(im, targetPosition, scaledInstance, p.instanceSize, avgChans, stats, p);

% evaluate the offline-trained network for exemplar x features

[newTargetPosition, newScale] = tracker_eval(net_x, round(s_x), scoreId, z_features, x_crops, targetPosition, window, p);

targetPosition = gather(newTargetPosition);

% scale damping and saturation

s_x = max(min_s_x, min(max_s_x, (1-p.scaleLR)s_x + p.scaleLRscaledInstance(newScale)));

targetSize = (1-p.scaleLR)targetSize + p.scaleLR[scaledTarget(1,newScale) scaledTarget(2,newScale)];

else

% at the first frame output position and size passed as input (ground truth)

end

rectPosition = [targetPosition([2,1]) - targetSize([2,1])/2, targetSize([2,1])];

% output bbox in the original frame coordinates

oTargetPosition = targetPosition; % .* frameSize ./ newFrameSize;

oTargetSize = targetSize; % .* frameSize ./ newFrameSize;

bboxes(i, :) = [oTargetPosition([2,1]) - oTargetSize([2,1])/2, oTargetSize([2,1])];

% if p.visualization

if isempty(videoPlayer)

figure(1), imshow(im/255);

figure(1), rectangle('Position', rectPosition, 'LineWidth', 4, 'EdgeColor', 'y');

drawnow

fprintf('Frame %d\n', startFrame+i);

else

im = gather(im)/255;

im = insertShape(im, 'Rectangle', rectPosition, 'LineWidth', 4, 'Color', 'yellow');

% Display the annotated video frame using the video player object.

step(videoPlayer, im);

end

% end

if p.bbox_output

fprintf(p.fout,'%.2f,%.2f,%.2f,%.2f\n', bboxes(i, :));

end

bboxes = bboxes(startFrame : i, :);

end

A000`