%   This code tests the performance of R-graph for outlier detection on the 
%   Caltech256 image database. The code generates results in 
%   Table 2 of the paper
%
%   Chong You, Daniel Robinson, Rene Vidal,
%   "Provable Self-Representation Based Outlier Detection in a Union of 
%   Subspaces", CVPR 2017.
%
%   Regarding the dataset: to run the code, one should download the dataset
%   (at http://www.vision.caltech.edu/Image_Datasets/Caltech256/), extract
%   features using VGG-16 (at
%   http://www.robots.ox.ac.uk/~vgg/research/very_deep/), and save the
%   features in .mat files. The code assumes that for each of the 257
%   categories there exist a file vgg_[CategoryFolderName].mat, e.g.
%   vgg_020.brain-101.mat, containing the variable [data], which is a
%   2-by-N cell array. data(1, ii) contains the 4096-dimensional feature of
%   the ii-th image in the category. data(2, ii) contains the corresponding
%   name of the image file. Some examples are given in data/.
%   
%   Note: the code will run outlier detection where inliers are images from
%   categories 20, 40, 60 (files for these three categories are provided).
%   If you have the entire dataset as described above then the code can be
%   modified to randomly pick inlier categories (see lines 9, 51).

% Copyright Chong You @ Johns Hopkins University, 2017
% chong.you1987@gmail.com

clear all;close all;

addpath('data')
addpath('tools')

if ~exist('Caltech256_info', 'var')
    fid = fopen( 'Caltech256_info.dat', 'r');
    Caltech256_info = textscan(fid, '%s');
    fclose(fid);
end
%% Set param
Num_Inlier_Group = 3; % number of inlier groups 
outlier_perc = 0.5; % percentage of outliers 
Nexperiment = 50;

fprintf('Caltech256: Num_Inlier_Group = %d, outlier perc = %f, Nexperiment = %d\n', Num_Inlier_Group, outlier_perc, Nexperiment);

flag_ROC = false; % set true to generate ROC curve plots for each experiment 
%(set Nexperiment to be small if flag_ROC is set to be true!)

%% Experiment
result = zeros(3, Nexperiment);
for iexperiment = 1:Nexperiment
%     subjectIdx = randperm(256, Num_Inlier_Group);
	subjectIdx = [20 40 60];
    
% Prepare data  
    % inliers. 
    Max_Inlier_Per_Group = 150;
    data_inlier = zeros(4096, length(subjectIdx) * Max_Inlier_Per_Group);
    filename_inlier = cell(1, length(subjectIdx) * Max_Inlier_Per_Group);
    counter = 0;
    for ii = 1:length(subjectIdx)
        load(['vgg_' Caltech256_info{1}{subjectIdx(ii)} '.mat']);
        Nimages = size(data, 2);
        data_tmp = cell2mat(data(1,:));
        data_tmp = reshape(data_tmp, [4096, Nimages]);
        Nimages = min([Max_Inlier_Per_Group, Nimages]);
        data_inlier(:, counter + 1:counter + Nimages) = data_tmp(:, 1:Nimages);
        filename_inlier(counter + 1:counter + Nimages) = cellfun(@(x) strcat(Caltech256_info{1}{subjectIdx(ii)},'/',x), data(2, 1:Nimages),'UniformOutput', false);
        counter = counter + Nimages;
    end
    data_inlier = data_inlier(:, 1:counter);
    filename_inlier = filename_inlier(:, 1:counter);
    Ninlier = size(data_inlier, 2);

    % outlier
    if ~exist('data_257', 'var')
        load(['vgg_' Caltech256_info{1}{257} '.mat']);
        N_257 = size(data, 2);
        data_257 = cell2mat(data(1,:));
        data_257 = reshape(data_257, [4096, N_257]);
        filename_257 = cellfun(@(x) strcat(Caltech256_info{1}{257},'/',x), data(2, :),'UniformOutput', false);
    end
    
    Noutlier = round(outlier_perc / (1-outlier_perc) * Ninlier); % 
    if Noutlier > N_257
        fprintf('Using all outliers, outlier percentage is %f\n', N_257/(N_257+Ninlier))
        Noutlier = N_257;
        data_outlier = data_257;
        filename_outlier = filename_257;
    else
        outlier_index = randperm(N_257, Noutlier);
        data_outlier = data_257(:, outlier_index);
        filename_outlier = filename_257(outlier_index);
    end

    % compose test data
    data = double([data_inlier, data_outlier]);
    s = [zeros(1, Ninlier), ones(1, Noutlier)];
    filename = [filename_inlier, filename_outlier];
    N = Ninlier + Noutlier;

    
% R-graph outlier detection
    tic;
    
    data = dimReduction(data, min(size(data)));
    % step 1: compute representation R from data (line 1 of Alg. 1)
    lambda = 0.95;
    alpha = 20;
    gamma = @(X, y, lambda, alpha)  alpha*lambda/max(abs(X'*y));
    EN_solver =  @(X, y) rfss( full(X), full(y), lambda / gamma(X, y, lambda, alpha), (1-lambda) / gamma(X, y, lambda, alpha) );
    R = selfRepresentation(data, EN_solver);
    % step 2: compute transition P from R (line 2 of Alg. 1)
    P = cnormalize(abs(R), 1)';
    % step 3: compute \pi from P (line 3 - 7 of Alg. 1)
    T = 1000;
    pi = ones(1, N) / N;
    pi_bar = zeros(1, N);
    for ii = 1:T
        pi = pi * P;
        pi_bar = pi_bar + pi;
    end
    pi_bar = pi_bar / T;
    %
    feat = - pi_bar; % larger values in feat indicate higher "outlierness"
      
% Evaluation
    time = toc;
    
    [FPR, TPR, T, AUC] = perfcurve(s, feat, 1);
    [PREC, RECA] = perfcurve(s, feat, 1, 'XCrit', 'prec', 'YCrit', 'reca');
    F1 = max(2 * (PREC .* RECA) ./ (PREC + RECA));
    fprintf('Experiment %d: AUC = %f, F1 = %f, time = %f\n', iexperiment, AUC, F1, time);

    result(:, iexperiment) = [AUC, F1, time]';
    if flag_ROC
        figure;
        plot(FPR, TPR, '-r');
        xlabel('False positive rate')
        ylabel('True positive rate')
    end
end
fprintf('Mean AUROC: %f, Mean F1: %f, Mean time: %f sec.\n', mean(result(1, :)), mean(result(2, :)), mean(result(3, :)));




    
