%%%% Code for loading snowflake statistics and diagnostics file.
%%%% Requires LABEL_PARAMS, CAM_PARAMS, INSERTNANS and CAMSTATS %%%%

%   Copyright Tim Garrett, University of Utah. This code is freely available for
%   non-commercial distribution and modification.

%  This is a version modified by Kyle Fitch for reading output files from
%  the output text files of mascpy

% Dependencies:
% 	cam_params.m
% 	label_params.m
% 	matchMASC.m
%  matchMASC2.m
% 	uploaddirs_ARM3.m
% 	uploaddirs.m
% 	windeffects_paths.m
% also required: processed masc images and output files (processed with 
% mascpy code located at https://doi.org/10.7278/S50DVA5JK2PD), 'met' ARM 
% data for surface meteorological values, and 'kollias' ARM VAP data for 
% cloud base height and mean Doppler velocity

% user can avoid running this often computationally intensive script by
% instead using the provided 'oli_data_windeffects.mat' with the 
% 'wind_effects_plots2.m' script


% Note. If you get this error: 

%    Error in uploaddirs (line 20)
%    d = regexp( r{2}, '\.','split' );

%    then UPLOADDIRS needs to be modified
clear;
close all

% add desired paths to working directories
windeffects_paths;

% assign a version name for the set
%myversion = '3_nomdv';
%myversion = 'NWS';
myversion = '_test';

% set to 1 if parallel processing is desired
parproc = 1;

% start parallel pool
if parproc == 1
    p = gcp('nocreate'); % if no pool, do not create new one
    if isempty(p)
         myCluster = parcluster();
         parpool(myCluster)
    end
end

%%%%%%%%%OUTPUT PARAMTERS%%%%%%%%%%%%

% statsoutputfile = 'Statsdata'; %To go in directory DIRALL as specfied below
% 
% stripstatsoutputfile = 'StripStatsdata'; %To go in directory DIRALL as specfied below

%%%%%%%%%%% Load user specified parameters %%%%%%%%%%%%
    
 %labelling convention and analaysis bounds for input and output
    label_params% outputs: campaigndir,labelformat
 % Camera and lens details (not needed)
    cam_params %outputs: MASCtype, fovmat, colorcammat, interarrivaltime

 
%%%%The following range could be the same as that specified originally in
%%%%MASC_process
starthr = [2016 09 04 00]; % Specifies the starting hour for a desired range of upload
endhr = [2016 09 05 00]; % Specifies the ending hour for a desired range of upload


flakedirs = dir(campaigndir);
dirall = campaigndir;
dirlistall = {flakedirs.name};

% upload flake directorie names 
dirlist = uploaddirs(dirall,dirlistall,starthr,endhr); %outputs: directory list

%index locations of first and last directories
STARTind = find(strcmp(strcat(dirall,'/',dirlistall,'/'),dirlist(1)) == 1);
ENDind = find(strcmp(strcat(dirall,'/',dirlistall,'/'),dirlist(length(dirlist))) == 1);

%If multiple directories, order with most recent first: .e.g
%flakedir = {'./11082012/' './11092012/' './11092012_2/'};

statsdata = [];
iddata = [];
datedata = [];
timedata = [];
max_cols = 13;

%% Concatenate statistics files

% set for loop to 'parfor' for parallel processing
parfor i = STARTind:ENDind
    flakedir = cell2mat(strcat(dirall,'/',dirlistall(i),'/')); 

    dirconcell = struct2cell(cat(1,dir(flakedir)));
% Avoid empty directories
    if any(ismember(dirconcell(1,:),'outAnalysisParticles.txt')) == 0  
        continue
    end

    statsname = strcat([flakedir 'outAnalysisParticles.txt']);
    s = dir(statsname);
% test for empty outAnalysisParticles.txt files
    if s.bytes <= 372
        continue
    end
% Read in statistics, diagnostics, and file paths.    
    dataset = importdata(statsname,'\t',1);
    if size(dataset.data,2) < max_cols
        nan_col = nan(size(dataset.data,1),max_cols-size(dataset.data,2));
        dataset.data = [dataset.data, nan_col];
    end
    statsdata = [statsdata; dataset.data];
    iddata = [iddata; str2double(dataset.textdata(2:end,1))];
    datedata = [datedata; cell2mat(dataset.textdata(2:end,2))];
    timedata = [timedata; cell2mat(dataset.textdata(2:end,3))];
end

%% extract yr,month,day,hr,min,sec
yrdata = str2num(datedata(:,7:10));
monthdata = str2num(datedata(:,1:2));
daydata = str2num(datedata(:,4:5));
hrdata = str2num(timedata(:,1:2));
mindata = str2num(timedata(:,4:5));
secdata = str2num(timedata(:,7:end));
% numeric array w/o filename strings
statsdata = [iddata yrdata monthdata daydata hrdata mindata secdata statsdata];          
%get header data from single directory
flakedir = cell2mat(strcat(dirall,'/',dirlistall(2012),'/'));
statsname = strcat([flakedir 'outAnalysisParticles.txt']);
dataset = importdata(statsname,'\t',1);
% sort by date because parallelization jumbles
[R,C] = size(statsdata);
statsdatatemp = sortrows([datenum(statsdata(:,2:7)) statsdata],1);
statsdata = statsdatatemp(:,2:C+1);

 % all numeric headers (filename strings not included)
 statsheaderstring = {'oli_id','oli_year','oli_month','oli_day','oli_hour','oli_minute','oli_second',...
                       'fallspeed','numviews',...
                       'd_max','area','ae_rad','perimeter','p_orientation',...
                       'aspRatio','oli_complexity','csarea','mpi','mpiv','flatness'};
%  statsheader = genvarname(statsheaderstring);
 statsheader = matlab.lang.makeValidName(statsheaderstring);

disp('data read');

idcol = 1; %column where the camera id is in the diagnostics and statistics files

%%%%%%%%%%%% ASSIGN STATS DATA TO HEADER LABELS %%%%%%%%%%%%%%


for i = 1:length(statsheader)
    eval([statsheader{i} '= statsdata(:, i);'])
end

%% Gather other ARM data

masc_datetime = [oli_year, oli_month, oli_day, oli_hour, oli_minute, oli_second];

% start with surface meteorology data (MET)
disp('Starting MASC-MET matching loop')
[met_temp,met_wspd,met_wdir,met_press] = matchMASC('met',...
    {'temp_mean','wspd_arith_mean','wdir_vec_mean','atmos_pressure'},...
    'oli',1,0,double(masc_datetime));

met_temp(met_temp<-900)=nan;
met_wspd(met_wspd<0)=nan;
met_wdir(met_wdir<0)=nan;
met_press(met_press==-9999)=nan;

% mean Doppler velocity below cloud base
disp('Starting MASC-MDV matching loop')
mdv = matchMASC2('kollias',...
    {'mean_doppler_velocity'},'oli',10,5,double(masc_datetime));

mdv(mdv==-9999)=nan;

%% clear everything I don't need

a_cs = csarea;
ae_radius = ae_rad;
aspect_ratio = aspRatio;
oli_flatness = flatness;
orientation = p_orientation;
cams4avg = numviews;

clearvars -except a_cs ae_radius aspect_ratio cams4avg d_max fallspeed masc_datetime mdv met_press met_temp met_wspd met_wdir mpi mpiv oli_complexity oli_flatness oli_id orientation perimeter myversion

%% save

save(['/uufs/chpc.utah.edu/common/home/garrett-group3/fitch/Data/saved/oli_data_windeffects_',myversion])
