% LOAD DATASET AND DO PATH FOLLOWING WITH CROSS-VALIDATION
% FROM KERNEL MATRICES

clear all

% simulation parameters
ntotal = 200;      % maxmimum total number of points - useful for subsampling dataset
proptrain = .5;     % proportion of training data
nsplits = 2;        % number of splits
seed = 22;          % seed of random number generator
DS_POWER = 1;       % parameter of the weight computations
NORMALIZE = 1;      % if 1, normalize data to zero mean/unit variance features

% DATA
% requires : Ks             cell array of kernel matrices (FORMAT = SINGLE si MATLAB7)
%            y              array of labels or responses
%            problem_type   classification, classification-unbalances, regression
load kernels_labels_demo
problem_type = 'classification_unbalanced'



n = length(y);          % number of data points
m = length(Ks);         % number of kernels
switch problem_type,
    case 'regression'
        % normalized label
        y = y - mean(y);
        y = y / std(y);

    case 'classification_unbalanced';
        n = length(y);
        % define usual cost asymmetries
        nplus = length(find(y==1));
        nminus = length(find(y==-1));
        rho = zeros(n,1);
        rho(find(y==1)) = n / nplus / 2;
        rho(find(y==-1))= n / nminus / 2;
        y = [ y rho ];
end

% randomize order
% randomize order and separate training set and testing set
rand('state', seed);
randn('state', seed);
ind_global    = randperm(n);
ntotal = min( ntotal, n );
ntrain    = round(ntotal*proptrain);


for isplit = 1:nsplits
    isplit

    % separate training set and testing set
    ind    = randperm(ntotal);
    indtrain = ind_global(ind(1:ntrain));
    indtest  = ind_global(ind(ntrain+1:end));
    ytrain    = y(indtrain,:);
    ytest     = y(indtest,:);


    % compute training kernel matrices and weights
    Ks_train = cell(1,m);
    Ks_test = cell(1,m);

    for j=1:m
        Ks_train{j} = Ks{j}(indtrain,indtrain);
        Ks_test{j} = Ks{j}(indtrain,indtest);
        % normalize to unit trace
        trace_normalizer(j) = trace(Ks_train{j});
        Ks_train{j} = Ks_train{j} / trace_normalizer(j);
        Ks_test{j} = Ks_test{j} / trace_normalizer(j);

    end
    ds = compute_ds(Ks_train,DS_POWER);
    ds = ds / norm(ds);    % path following parameters
    switch problem_type,
        case 'regression', loss.type='regression';
        case 'classification', loss.type='logistic';
        case 'classification_unbalanced', loss.type='logistic_unbalanced';
    end
    efficient_type=1;
    Kse_train = build_efficient_Ks(Ks_train,efficient_type);
    Kse_test = build_efficient_Ks_test(Ks_test,0);

    path_params.mu                  = 1e-3;      % parameter of log-barrier
    path_params.EPS1                = 1e-10;     % precision parameters of Newton steps (very small and fixed)
    path_params.EPS2                = 1e-2;      % precision parameter of tube around path (adaptive)
    path_params.predictor_type      = 2;         % 1 : first order predictor, 2 : second order
    path_params.efficient_predictor = 0;         % 1 : efficient predictor steps, 0 : full steps
    path_params.efficient_eta       = 1;         % real value : threshold eta for corrector steps, 0 : no threshold
    path_params.maxsigma            = 20;        % maximum value of sigma = -log(lambda);
    path_params.newton_iter1        = 10;        % number of iterations with no modification
    path_params.newton_iter2        = 6;         % delta number of iterations under which EPS2 is divided/multiplied by 2
    path_params.maxdsigma           = 1;         % maximal step
    path_params.mindsigma           = .004;      % minimal step
    path_params.maxvalue_EPS2       = 1e-2;      % maximum value of tolerance for predictor steps


    path = follow_entire_path(Kse_train,ytrain,loss,ds,path_params,Kse_test,ytest);
    paths{isplit}=path;
end


% Now compute averages over all splits
%
% Note that the optimal thing to do to obtain an error
% for a value of lambda which is not already samples is to
% take alpha as a linear interpolation of the closest sampled points,
% and then compute the prediction and the error.
% In what follows, we simply linearly interpolate the errors.

minsigma = 0;
maxsigma = 20;
for isplit=1:nsplits,
    maxsigma = min( maxsigma, max(paths{isplit}.sigmas) );
    minsigma = min( minsigma, min(paths{isplit}.sigmas) );
end

nsigmas = 1000;
sigmas = minsigma + (0:(nsigmas-1)) * ( maxsigma-minsigma ) / (nsigmas - 1);
training_errors = zeros(nsplits,nsigmas);
testing_errors = zeros(nsplits,nsigmas);
netas = zeros(nsplits,nsigmas);

for isplit = 1:nsplits
    isplit
    path = paths{isplit};
    for isigma = 1:nsigmas
        sigma = sigmas(isigma);
        if sigma <= min(path.sigmas)
            training_errors(isplit,isigma) = path.training_errors(1);
            testing_errors(isplit,isigma) = path.testing_errors(1);
            netas(isplit,isigma) = path.netas(1);
        elseif sigma >= max(path.sigmas)
            training_errors(isplit,isigma) = path.training_errors(end);
            testing_errors(isplit,isigma) = path.testing_errors(end);
            netas(isplit,isigma) = path.netas(end);

        else
            k = min( find( path.sigmas>= sigma ) );
            sigma1 = path.sigmas(k-1);
            sigma2 = path.sigmas(k);
            tr1 = path.training_errors(k-1);
            tr2 = path.training_errors(k);
            te1 = path.testing_errors(k-1);
            te2 = path.testing_errors(k);
            n1 = path.netas(k-1);
            n2 = path.netas(k);
            training_errors(isplit,isigma) = ...
                tr1 + ( sigma - sigma1 ) / ( sigma2 - sigma1 ) * ( tr2 - tr1 );
            testing_errors(isplit,isigma) = ...
                te1 + ( sigma - sigma1 ) / ( sigma2 - sigma1 ) * ( te2 - te1 );
            netas(isplit,isigma) = ...
                n1 + ( sigma - sigma1 ) / ( sigma2 - sigma1 ) * ( n2 - n1 );

        end
    end
end

subplot(1,2,1);
plot(sigmas,mean(training_errors,1),'k');
hold on;
plot(sigmas,mean(testing_errors,1),'k:');
hold off
xlabel('log(\lambda)');
ylabel('test set accuracy');

subplot(1,2,2);
plot(sigmas,mean(netas,1),'k');
xlabel('log(\lambda)');
ylabel('number of \eta''s');
