FuzzySystems/Work 3/source/preprocess_data.m

39 lines
1.3 KiB
Matlab
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

function [X_trn_s, X_val_s, X_chk_s, stats] = preprocess_data(X_trn, X_val, X_chk, mode)
%SPLITSET Splits the data-set to train, eval,check data
% Randomly split the data set according to ratios
%
if nargin < 4, mode = 1; end % 1: min-max, 2: z-score
switch mode
case 1 % MinMax to [0,1] using TRAIN stats
xmin = min(X_trn,[],1);
xmax = max(X_trn,[],1);
range = xmax - xmin;
range(range==0) = 1;
X_trn_s = (X_trn - xmin) ./ range;
X_val_s = (X_val - xmin) ./ range;
X_chk_s = (X_chk - xmin) ./ range;
stats = struct( ...
'type', 'minmax', ...
'xmin', xmin, ...
'xmax', xmax ...
);
case 2 % Z-score using TRAIN stats (chatGPT gave me this one)
mu = mean(X_trn,1);
sig = std(X_trn,0,1);
sig(sig==0) = 1;
X_trn_s = (X_trn - mu) ./ sig;
X_val_s = (X_val - mu) ./ sig;
X_chk_s = (X_chk - mu) ./ sig;
stats = struct(...
'type', 'zscore', ...
'mu', mu, ...
'sig', sig ...
);
otherwise
error('Unknown mode.');
end
end