
I have matlab problem removing outliers
    3 次查看(过去 30 天)
  
       显示 更早的评论
    
I want to remove outliers by year, but the values are same every year.
And my plot.
My code:
clear all ; clc ; clf ;
set(gcf,'color','w')
%%
filehtm = dir('moving_windown_test.xlsx') ;
for ii = 1 : length(filehtm);
    filehtm(ii).name
    [num,str,xlsdata] = xlsread(filehtm(ii).name) ;    %num數值 str字串
end
time = num(:,1) ;
tide_detrend = num(:,2) ./ 1000 ;
tide = tide_detrend ;
tide_outline = tide_detrend;
%% set time
YYYY = fix(time/1000000) ;
MM   = mod(fix(time/10000),100) ;
DD   = mod(fix(time/100),100) ;
HH   = mod(time,100) ;
tt   = datenum(YYYY,MM,DD,HH,0,0) ;
%%
kkk= 0;
%% 設定時間序列
for yyy = 1961 :1: 1965 
    t1 = datenum(yyy,1,1) : 1/24 : datenum(yyy+1,1,1) ; % 不能使用t1(yyy)
    t1(end)=[];    
    %% step 1 - Q1 ..... 先篩選離群值
    kkk = kkk + 1 ;
    Q1(kkk) = prctile(tide_outline, 25) 
    Q3(kkk) = prctile(tide_outline, 75) 
    IQR(kkk) = Q3(kkk) - Q1(kkk)   ;
    upper(kkk) = Q3(kkk) + 1.5*IQR(kkk);
    lower(kkk) = Q1(kkk) - 1.5*IQR(kkk)  ;
    tide_outline(tide_outline > upper |  tide_outline<lower) = nan ;
    %%  step 2   - 將空白日期補上nan
    for j = 1 : length(t1);   % 建立兩個tide變數 與篩選前後做比較
        if  (isempty(find(tt==t1(j))))
            r_tide(j) = nan ;
        else
            r_tide(j) = tide(find(tt==t1(j))) ;
        end 
    end
    for j = 1 : length(t1);
        if  (isempty(find(tt==t1(j))));
            r_tide_removeoutline(j) = nan ;
            %          kkk = kkk + 1
        else
            r_tide_removeoutline(j) = tide_outline(find(tt==t1(j))) ;
        end 
    end
    for  i = 1 : length(t1) ; % 補上完整日期(調和用)
        field_day(i) = (yyy+(i - 1) * 60 * 60 / 86400/365) ;
    end
    %   plot(field_day,r_tide);hold on
    %   plot([yyy,yyy+1],[upper(kkk),upper(kkk)],'--.k','DisplayName','upper');hold on
    % plot([yyy,yyy+1],[lower,lower],'--.k','DisplayName','upper');
    pause
end
0 个评论
采纳的回答
  Image Analyst
      
      
 2022-1-18
        Try calling sgolayfilt() to smooth the data, if that's what you want to do.
% Initialization Steps.
clc;    % Clear the command window.
close all;  % Close all figures (except those of imtool.)
clear;  % Erase all existing variables. Or clearvars if you want.
workspace;  % Make sure the workspace panel is showing.
format long g;
format compact;
fontSize = 18;
set(gcf,'color','w')
%%
filehtm = dir('moving_windown_test.xlsx') ;
for ii = 1 : length(filehtm);
    filehtm(ii).name
    [num,str,xlsdata] = xlsread(filehtm(ii).name) ;    %num數值 str字串
end
time = num(:,1) ;
tide_detrend = num(:,2) ./ 1000 ;
tide = tide_detrend ;
tide_outline = tide_detrend;
%% set time
YYYY = fix(time/1000000) ;
MM   = mod(fix(time/10000),100) ;
DD   = mod(fix(time/100),100) ;
HH   = mod(time,100) ;
tt   = datenum(YYYY,MM,DD,HH,0,0) ;
%%
kkk= 0;
%% 設定時間序列
for yyy = 1961 :1: 1965
    t1 = datenum(yyy,1,1) : 1/24 : datenum(yyy+1,1,1) ; % 不能使用t1(yyy)
    t1(end)=[];
    %% step 1 - Q1 ..... 先篩選離群值
    kkk = kkk + 1 ;
    Q1(kkk) = prctile(tide_outline, 25)
    Q3(kkk) = prctile(tide_outline, 75)
    IQR(kkk) = Q3(kkk) - Q1(kkk)   ;
    upperLimit(kkk) = Q3(kkk) + 1.5*IQR(kkk);
    lowerLimit(kkk) = Q1(kkk) - 1.5*IQR(kkk)  ;
    tide_outline(tide_outline > upperLimit |  tide_outline<lowerLimit) = nan ;
    %%  step 2   - 將空白日期補上nan
    for j = 1 : length(t1);   % 建立兩個tide變數 與篩選前後做比較
        if  (isempty(find(tt==t1(j))))
            r_tide(j) = nan ;
        else
            r_tide(j) = tide(find(tt==t1(j))) ;
        end
    end
    for j = 1 : length(t1);
        if  (isempty(find(tt==t1(j))));
            r_tide_removeoutline(j) = nan ;
            %          kkk = kkk + 1
        else
            r_tide_removeoutline(j) = tide_outline(find(tt==t1(j))) ;
        end
    end
    for  i = 1 : length(t1) ; % 補上完整日期(調和用)
        field_day(i) = (yyy+(i - 1) * 60 * 60 / 86400/365) ;
    end
    % Remove nan's
    nanIndexes = isnan(r_tide);
    field_day(nanIndexes) = [];
    r_tide(nanIndexes) = [];
    % Plot good data only.
    plot(field_day,r_tide);hold on
    plot([yyy,yyy+1],[upperLimit(kkk),upperLimit(kkk)],'--.k','DisplayName','upper');
    plot([yyy,yyy+1],[lowerLimit(end),lowerLimit(end)],'--.k','DisplayName','upper');
    grid on;
    % Smooth the data with a Savitzky-Golay filter.
    windowWidth = 1001; % Some large odd number.
    smoothTide = sgolayfilt(r_tide, 2, 1001); % Requires Signal Processing Toolbox.
    % 	smoothTide = movmean(r_tide, windowWidth); % Requires no toolbox.
    plot(field_day, smoothTide, 'r-', 'LineWidth', 5);
    if yyy < 1965
        promptMessage = sprintf('Showing year %d\nDo you want to Continue processing,\nor Quit processing?', yyy);
        titleBarCaption = 'Continue?';
        buttonText = questdlg(promptMessage, titleBarCaption, 'Continue', 'Quit', 'Continue');
        if contains(buttonText, 'Quit', 'IgnoreCase', true)
            return; % or break or continue.
        end
    end
end

5 个评论
  Image Analyst
      
      
 2022-1-19
				You don't need to smooth apparently.  You just need to delete values above or below your control limits.  So with your final array you can do
outlierIndexes = r_tide > upperLimit(1) | r_tide < lowerLimit(1);
r_tide(outlierIndexes) = [];
field_day(outlierIndexes) = [];
更多回答(0 个)
另请参阅
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!


