%% demo for "Submodular Functions: from Discrete to Continuous Domains"
%% Minimizing a continuous submodular functions by discretization


%% initialization
clear all
seed = 2;
randn('state',seed);
rand('state',seed);

% number of variables and elements per variable
n = 51;
k = 51;

% parameters of the function
param.k = k;
param.lambda = 2/10; % regularization parameter
param.mu = 8/10; % regularization parameter
t = (0:(n-1))/(n-1)*2-1;
signal = 100 *(  (t<-.2) .* (t>-.8) .* ((t+.5).^2-.3^2).^2 + (t>.2) .* (t<.8) .* -((t-.5).^2-.3^2).^2 );
param.signal =  signal   + randn(1,n)/10;
F = @submodular_fct_1D_sparse_smooth;





%% subgradient descent on [0,1]
H0 = submodular_fct_1D_sparse_smooth(zeros(1,n),param);
maxiter = 400;

% random initialization
rho = fliplr(cumsum(rand(n,k-1),2));
rho = rho - min(rho(:));
rho = rho / max(rho(:));
w_ave = 0;

fprintf('Subgradient - 400 iterations\n');

for iter=1:maxiter
    
    if mod(iter,10)==0, fprintf('%d ', iter); end
    [w,f,Fmin] = greedy_algorithm(rho,F,param);
    
    % computing a dual candidate
    w_ave = ( w_ave * ( iter - 1) + w ) / (iter);
    
    primal_subgradient(iter)=f;
    primal_subgradient_min(iter) = Fmin;
    dual_subgradient(iter) = sum( min(min( cumsum(w_ave,2) , [],2),0) );
    
    % subgradient step (Polyak rule)
    rho = rho - ( f - max(dual_subgradient) ) * w / norm(w)^2;
    
    % orthogonal projection
    for i=1:n,
        rho(i,:) = -pav(-min(1,max(rho(i,:),0)));
    end
    
end
rho_subgradient = rho;
fprintf('\n');





%% Frank-wolfe on B(F)
H0 = submodular_fct_1D_sparse_smooth(zeros(1,n),param);
maxiter = 400;

% random initialization
w = greedy_algorithm(fliplr(cumsum(rand(n,k-1),2)),F,param);


fprintf('Frank Wolfe - 400 iterations\n');

for iter=1:maxiter
    if mod(iter,10)==0, fprintf('%d ', iter); end
    
    rho = zeros(n,k-1);
    for i=1:n
        rho(i,:) = -pav(w(i,:));
    end
    
    [wbar,f,Fmin] = greedy_algorithm(rho,F,param);
    dual_fw(iter) = .5 * sum( rho(:).^2 ) + sum( rho(:) .* w(:) );
    primal_fw(iter) = f + .5 * sum( rho(:).^2 );
    primal_fw_min(iter) = Fmin;
    dual_fw_min(iter) = sum( min(min( cumsum(w,2) , [],2),0) );
    
    % % no line search
    % w = ( 1 - 2 / (iter + 1) ) * w + 2 / (iter+1) * wbar;
    
    % line search
    a = sum( rho(:) .* ( wbar(:)-w(:) ) );
    b = sum( ( wbar(:)-w(:) ).^2 );
    step = min(1,max(a/b,0));
    w = ( 1 - step ) * w + step * wbar;
    
    
end
rho_fw = rho;
fprintf('\n');




%% Frank-wolfe on B(F) with pairwise steps
%% from Lacoste-Julien and Jaggi (2015)

H0 = submodular_fct_1D_sparse_smooth(zeros(1,n),param);
maxiter = 400;

% random initialization
w = greedy_algorithm(fliplr(cumsum(rand(n,k-1),2)),F,param);

ws = zeros( n*(k-1) ,maxiter+1 );
ws(:,1) = reshape(w,n*(k-1),1);
convex_combinations = zeros(1,maxiter+1);
convex_combinations(1) = 1;

fprintf('Pairwise Frank Wolfe - 400 iterations\n');

for iter=1:maxiter
    if mod(iter,10)==0, fprintf('%d ', iter); end
    
    % compute gradient direction
    rho = zeros(n,k-1);
    for i=1:n
        rho(i,:) = -pav(w(i,:));
    end
    
    % linear oracle
    [wbar,f,Fmin] = greedy_algorithm(rho,F,param);
    ws(:,iter+1) = reshape(wbar,n*(k-1),1);
    % compute away step
    ind = find(convex_combinations(1:iter)>0);
    [a,b] = min( ws(:,ind)'* reshape(rho,n*(k-1),1) );
    b = ind(b);
    away_direction = w - reshape(ws(:,b),n,k-1);
    max_step_away = convex_combinations(b);
    fw_direction = wbar - w;
    max_step_fw = 1;
    direction = fw_direction + away_direction;
    max_step = max_step_away;
    
    
    dual_fw_pair(iter) = .5 * sum( rho(:).^2 ) + sum( rho(:) .* w(:) );
    primal_fw_pair(iter) = f + .5 * sum( rho(:).^2 );
    primal_fw_pair_min(iter) = Fmin;
    dual_fw_pair_min(iter) = sum( min(min( cumsum(w,2) , [],2),0) );
    
    % line search
    aa = sum( rho(:) .* ( direction(:) ) );
    bb = sum( ( direction(:) ).^2 );
    step = min(max_step,max(aa/bb,0));
    convex_combination_direction = zeros(1,maxiter+1);
    convex_combination_direction(iter+1)=1;
    convex_combination_direction(b) = -1;
    convex_combinations = convex_combinations + step * convex_combination_direction;
    w = w + step * direction;
    
end
rho_fw_pair = rho;
fprintf('\n');



%% plot gaps
plot(log10(primal_subgradient_min-dual_subgradient-H0),'k','linewidth',2);
hold on;
plot(log10(primal_fw_min-dual_fw_min-H0),'r','linewidth',2);
plot(log10(primal_fw_pair_min-dual_fw_pair_min-H0+1e-13),'b','linewidth',2);
hold off
legend('subgradient','Frank-Wolfe','Pair-wise FW');
axis([1 maxiter -8 4])
xlabel('number of iterations');
ylabel('(certified) gaps');

pause


%% plot signals


[xmin,Fmin] = theta_minimizer(rho_fw_pair,F,param);
plot(1:n,xmin/(k-1)*2-1,'linewidth',2);
hold on;
%plot(param.signal,'r','linewidth',2);
plot(1:n,signal,'--r','linewidth',2);
hold off
axis([1 n -1 1])
set(gca,'fontsize',16)
xlabel('n')
ylabel('signal')
legend('denoised','noiseless')

pause


plot(1:n,param.signal,'k','linewidth',2);
hold on;
plot(1:n,signal,'--r','linewidth',2);
hold off
axis([1 n -1 1])
set(gca,'fontsize',16)
xlabel('n')
ylabel('signal')
legend('noisy','noiseless')

pause

%% plot solutions
imagesc(t,1:n,rho_subgradient); xlabel('x'); ylabel('n'); colorbar

pause

imagesc(t,1:n,rho_fw_pair); xlabel('x'); ylabel('n'); colorbar







