function  [CM_aggr, Acc, predict, modelli, index, prob] = cross_valid_svm(inst, labels, nfold, sc_low, sc_up, kernel, C, gamma, Pb, termination, maxlabelnum)

% Input arguments:
% ----------------------
% inst = data (NxF)
% labels = annotations (Nx1)
% nfold = number of folds for cross validation
% sc_low, sc_up = extremes for data normalization (usually -1 +1 respectively)
% C, gamma = svm parameters
% Pb = soft assignement if true 
% maxlabelnum = maximum label ID, for easier data representation
%
% Output arguments:
% ----------------------
% CM_aggr = confusion matrix
% Acc = aggregated classifcation accuracy
% predict = predicted labels
% modelli = trained models
% index = data randomization index
% prob = probabilities if soft assignement specified
% label_split = data labels as splitted in different folds %% rimosso
% 
% [Andrea Mannini: a.mannini@sssup.it; Last modified: ?]


% kernel = 2;

scaling1 = 0;
scaling2 = 1;

[N,F] = size(inst);

%% Dataset randomization the variable index contain the random order
index = randperm(N);
label_rnd = labels(index);
inst_rnd = inst(index,:);


%% The dataset is divided in "nfold" folds of N/nfold dimension. The last fold may be longer if N/nfold is not integer
L = idivide(int32(N),nfold);
for n = 1:nfold
    if n<nfold
        label_split{n,:} = label_rnd( ((n-1)*L)+1 : n*L);
        inst_split{n,:} = inst_rnd( ((n-1)*L)+1 : n*L, :);
    else
        label_split{n,:} = label_rnd(((n-1)*L)+1 : end);
        inst_split{n,:} = inst_rnd(((n-1)*L)+1 : end, :);
    end
end

              
%% Following nfold cross validation approach:
% scaling, training, classification,
% confusion matrix evaluation.
% The result is saved and the algorithm continues until when all folds are tested. 


lab = unique(labels);
CM = zeros(max(lab),max(lab),nfold);

% h = waitbar(0,['SVM: ' num2str(nfold) '-fold cross validation']);

ind = 1:nfold;
for n = 1:nfold
%     waitbar((n-1)/nfold,h)
    % assegnamento TS e VS
    TS_label = [];
    TS_inst = [];
    TS_inst_sc = [];
    VS_inst_sc = [];
    for k = 1:nfold-1
        TS_label = [TS_label; label_split{ind(k),:}];
        TS_inst = [TS_inst; inst_split{ind(k),:}];
    end
    VS_label = label_split{ind(nfold),:};
    VS_inst = inst_split{ind(nfold),:};
    
    if scaling1
        % scaling
        M = max(TS_inst);  %max e min 
        m = min(TS_inst);
        for f = 1:F
            TS_inst_sc(:,f) = ones(length(TS_inst(:,f)),1)*sc_low + (TS_inst(:,f)-ones(length(TS_inst(:,f)),1)*m(f)) .* (sc_up-sc_low)/(M(f)-m(f));
            VS_inst_sc(:,f) = ones(length(VS_inst(:,f)),1)*sc_low + (VS_inst(:,f)-ones(length(VS_inst(:,f)),1)*m(f)) .* (sc_up-sc_low)/(M(f)-m(f));
        end
    elseif scaling2
        % scaling
        me = mean(TS_inst);  
        st = std(TS_inst);
        for f = 1:F
            TS_inst_sc(:,f) = (TS_inst(:,f) - me(f)) ./ st(f);
            VS_inst_sc(:,f) = (VS_inst(:,f) - me(f)) ./ st(f);
        end
    end

    
    % train & test
    model{n} = svmtrain(TS_label, TS_inst_sc, [' -t ' num2str(kernel)  ' -c ' num2str(C, '%12.12f') ' -g ' num2str(gamma, '%12.12f') ' -b ' num2str(Pb) ' -e ' num2str(termination, '%12.12f')  ]);
    [predict{n}, accuracy(:,n), prob_tmp{n}] = svmpredict(VS_label, VS_inst_sc, model{n}, [' -b ' num2str(Pb)] );

    %confmat evaluation
    lab1 = unique(VS_label);
    lab2 = unique(predict{n});
    [CM(lab1,lab2,n),ne,lablist] = confmat(VS_label, predict{n});  

    % label - probabilities pairing
    [L U] = size(prob_tmp{n});
    prob{n}= zeros(L,maxlabelnum);
%     prob{n}(:,lab1) = prob_tmp{n};
%     prob{n}(:,1:maxlabelnum) = prob_tmp{n};
    
    % rotate index
    ind = circshift(ind,[0 1]);
end

%% Results aggregation
CM_aggr = sum(CM,3);
Acc = sum(accuracy(1,:))/nfold;
modelli = model;


%% SVM PARAMS (memo)
% options:
% -s svm_type : set type of SVM (default 0)
% 	0 -- C-SVC
% 	1 -- nu-SVC
% 	2 -- one-class SVM
% 	3 -- epsilon-SVR
% 	4 -- nu-SVR
% -t kernel_type : set type of kernel function (default 2)
% 	0 -- linear: u'*v
% 	1 -- polynomial: (gamma*u'*v + coef0)^degree
% 	2 -- radial basis function: exp(-gamma*|u-v|^2)
% 	3 -- sigmoid: tanh(gamma*u'*v + coef0)
% -d degree : set degree in kernel function (default 3)
% -g gamma : set gamma in kernel function (default 1/num_features)
% -r coef0 : set coef0 in kernel function (default 0)
% -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
% -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
% -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
% -m cachesize : set cache memory size in MB (default 100)
% -e epsilon : set tolerance of termination criterion (default 0.001)
% -h shrinking: whether to use the shrinking heuristics, 0 or 1 (default 1)
% -b probability_estimates: whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
% -wi weight: set the parameter C of class i to weight*C, for C-SVC (default 1)
% 
% The k in the -g option means the number of attributes in the input data.
% To install this tool, please read the README file in the package. There are Windows, X, and Java versions in the package.

