I have matlab problem removing outliers

2 次查看(过去 30 天)
I want to remove outliers by year, but the values are same every year.
And my plot.
My code:
clear all ; clc ; clf ;
set(gcf,'color','w')
%%
filehtm = dir('moving_windown_test.xlsx') ;
for ii = 1 : length(filehtm);
filehtm(ii).name
[num,str,xlsdata] = xlsread(filehtm(ii).name) ; %num數值 str字串
end
time = num(:,1) ;
tide_detrend = num(:,2) ./ 1000 ;
tide = tide_detrend ;
tide_outline = tide_detrend;
%% set time
YYYY = fix(time/1000000) ;
MM = mod(fix(time/10000),100) ;
DD = mod(fix(time/100),100) ;
HH = mod(time,100) ;
tt = datenum(YYYY,MM,DD,HH,0,0) ;
%%
kkk= 0;
%% 設定時間序列
for yyy = 1961 :1: 1965
t1 = datenum(yyy,1,1) : 1/24 : datenum(yyy+1,1,1) ; % 不能使用t1(yyy)
t1(end)=[];
%% step 1 - Q1 ..... 先篩選離群值
kkk = kkk + 1 ;
Q1(kkk) = prctile(tide_outline, 25)
Q3(kkk) = prctile(tide_outline, 75)
IQR(kkk) = Q3(kkk) - Q1(kkk) ;
upper(kkk) = Q3(kkk) + 1.5*IQR(kkk);
lower(kkk) = Q1(kkk) - 1.5*IQR(kkk) ;
tide_outline(tide_outline > upper | tide_outline<lower) = nan ;
%% step 2 - 將空白日期補上nan
for j = 1 : length(t1); % 建立兩個tide變數 與篩選前後做比較
if (isempty(find(tt==t1(j))))
r_tide(j) = nan ;
else
r_tide(j) = tide(find(tt==t1(j))) ;
end
end
for j = 1 : length(t1);
if (isempty(find(tt==t1(j))));
r_tide_removeoutline(j) = nan ;
% kkk = kkk + 1
else
r_tide_removeoutline(j) = tide_outline(find(tt==t1(j))) ;
end
end
for i = 1 : length(t1) ; % 補上完整日期(調和用)
field_day(i) = (yyy+(i - 1) * 60 * 60 / 86400/365) ;
end
% plot(field_day,r_tide);hold on
% plot([yyy,yyy+1],[upper(kkk),upper(kkk)],'--.k','DisplayName','upper');hold on
% plot([yyy,yyy+1],[lower,lower],'--.k','DisplayName','upper');
pause
end

采纳的回答

Image Analyst
Image Analyst 2022-1-18
Try calling sgolayfilt() to smooth the data, if that's what you want to do.
% Initialization Steps.
clc; % Clear the command window.
close all; % Close all figures (except those of imtool.)
clear; % Erase all existing variables. Or clearvars if you want.
workspace; % Make sure the workspace panel is showing.
format long g;
format compact;
fontSize = 18;
set(gcf,'color','w')
%%
filehtm = dir('moving_windown_test.xlsx') ;
for ii = 1 : length(filehtm);
filehtm(ii).name
[num,str,xlsdata] = xlsread(filehtm(ii).name) ; %num數值 str字串
end
time = num(:,1) ;
tide_detrend = num(:,2) ./ 1000 ;
tide = tide_detrend ;
tide_outline = tide_detrend;
%% set time
YYYY = fix(time/1000000) ;
MM = mod(fix(time/10000),100) ;
DD = mod(fix(time/100),100) ;
HH = mod(time,100) ;
tt = datenum(YYYY,MM,DD,HH,0,0) ;
%%
kkk= 0;
%% 設定時間序列
for yyy = 1961 :1: 1965
t1 = datenum(yyy,1,1) : 1/24 : datenum(yyy+1,1,1) ; % 不能使用t1(yyy)
t1(end)=[];
%% step 1 - Q1 ..... 先篩選離群值
kkk = kkk + 1 ;
Q1(kkk) = prctile(tide_outline, 25)
Q3(kkk) = prctile(tide_outline, 75)
IQR(kkk) = Q3(kkk) - Q1(kkk) ;
upperLimit(kkk) = Q3(kkk) + 1.5*IQR(kkk);
lowerLimit(kkk) = Q1(kkk) - 1.5*IQR(kkk) ;
tide_outline(tide_outline > upperLimit | tide_outline<lowerLimit) = nan ;
%% step 2 - 將空白日期補上nan
for j = 1 : length(t1); % 建立兩個tide變數 與篩選前後做比較
if (isempty(find(tt==t1(j))))
r_tide(j) = nan ;
else
r_tide(j) = tide(find(tt==t1(j))) ;
end
end
for j = 1 : length(t1);
if (isempty(find(tt==t1(j))));
r_tide_removeoutline(j) = nan ;
% kkk = kkk + 1
else
r_tide_removeoutline(j) = tide_outline(find(tt==t1(j))) ;
end
end
for i = 1 : length(t1) ; % 補上完整日期(調和用)
field_day(i) = (yyy+(i - 1) * 60 * 60 / 86400/365) ;
end
% Remove nan's
nanIndexes = isnan(r_tide);
field_day(nanIndexes) = [];
r_tide(nanIndexes) = [];
% Plot good data only.
plot(field_day,r_tide);hold on
plot([yyy,yyy+1],[upperLimit(kkk),upperLimit(kkk)],'--.k','DisplayName','upper');
plot([yyy,yyy+1],[lowerLimit(end),lowerLimit(end)],'--.k','DisplayName','upper');
grid on;
% Smooth the data with a Savitzky-Golay filter.
windowWidth = 1001; % Some large odd number.
smoothTide = sgolayfilt(r_tide, 2, 1001); % Requires Signal Processing Toolbox.
% smoothTide = movmean(r_tide, windowWidth); % Requires no toolbox.
plot(field_day, smoothTide, 'r-', 'LineWidth', 5);
if yyy < 1965
promptMessage = sprintf('Showing year %d\nDo you want to Continue processing,\nor Quit processing?', yyy);
titleBarCaption = 'Continue?';
buttonText = questdlg(promptMessage, titleBarCaption, 'Continue', 'Quit', 'Continue');
if contains(buttonText, 'Quit', 'IgnoreCase', true)
return; % or break or continue.
end
end
end
  5 个评论
Image Analyst
Image Analyst 2022-1-19
You don't need to smooth apparently. You just need to delete values above or below your control limits. So with your final array you can do
outlierIndexes = r_tide > upperLimit(1) | r_tide < lowerLimit(1);
r_tide(outlierIndexes) = [];
field_day(outlierIndexes) = [];

请先登录,再进行评论。

更多回答(0 个)

类别

Help CenterFile Exchange 中查找有关 Get Started with Curve Fitting Toolbox 的更多信息

Community Treasure Hunt

Find the treasures in MATLAB Central and discover how the community can help you!

Start Hunting!

Translated by