Making Pickle Files for Environmental Drivers of Spring Bloom Timing Analysis

(201905 only)

To use this notebook, change values in the second code cell and rerun the entire notebook for each year.

[1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib as mpl
import netCDF4 as nc
import datetime as dt
from salishsea_tools import evaltools as et, places, viz_tools, visualisations, bloomdrivers
import xarray as xr
import pandas as pd
import pickle
import os

%matplotlib inline

To recreate this notebook at a different location

only change the following cell:

[2]:
# Change this to the directory you want the pickle files to be stored:
savedir='/ocean/aisabell/MEOPAR/extracted_files'
# Change 'S3' to the location of interest
loc='S3'
# To create the time series for a range of years, change iyear to every year within the range
#    and run all cells each time.
iyear=2020

# Leap years in 'dateslist':
if iyear==2000 or iyear==2004 or iyear==2008 or iyear==2012 or iyear==2016 or iyear==2020:  # leap years: 2020,2016,2012,2008 etc
    dateslist=[[dt.datetime(iyear,1,1),dt.datetime(iyear,1,31)],
               [dt.datetime(iyear,1,31),dt.datetime(iyear,2,29)],
               [dt.datetime(iyear,2,29),dt.datetime(iyear,4,1)]]
else:
    dateslist=[[dt.datetime(iyear,1,1),dt.datetime(iyear,1,31)],
               [dt.datetime(iyear,1,31),dt.datetime(iyear,2,28)],
               [dt.datetime(iyear,2,28),dt.datetime(iyear,4,1)]]

# What is the start year and end year+1 of the time range of interest?
startyear=2007
endyear=2021 # does NOT include this value

# Note: non-location specific variables only need to be done for each year, not for each location
# Note: x and y limits on the location map may need to be changed
[3]:
startjan=dt.datetime(iyear,1,1)  # january start date
endmar=dt.datetime(iyear,4,1)   # march end date (does not include this day)
fraserend=dt.datetime(iyear,3,31) # march end date specifically for Fraser River calculations (includes this day)
forbloomstart=dt.datetime(iyear,2,15) # long time frame to capture spring bloom date
forbloomend=dt.datetime(iyear,6,15)

year=str(iyear)
modver='201905'

fname=f'JanToMarch_TimeSeries_{year}_{loc}_{modver}.pkl' # for location specific variables
fname2=f'JanToMarch_TimeSeries_{year}_{modver}.pkl' # for non-location specific variables
fname3=f'springBloomTime_{year}_{loc}_{modver}.pkl' # for spring bloom timing calculation
fname4=f'JanToMarch_Mixing_{year}_{loc}_{modver}.pkl' # for location specific mixing variables
savepath=os.path.join(savedir,fname)
savepath2=os.path.join(savedir,fname2)
savepath3=os.path.join(savedir,fname3)
savepath4=os.path.join(savedir,fname4)
recalc=False
[4]:
# lat and lon information for place:
lon,lat=places.PLACES[loc]['lon lat']
# get place information on SalishSeaCast grid:
ij,ii=places.PLACES[loc]['NEMO grid ji']
jw,iw=places.PLACES[loc]['GEM2.5 grid ji']

fig, ax = plt.subplots(1,1,figsize = (6,6))
with xr.open_dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/mesh_mask201702.nc') as mesh:
    ax.contour(mesh.nav_lon,mesh.nav_lat,mesh.tmask.isel(t=0,z=0),[0.1,],colors='k')
    tmask=np.array(mesh.tmask)
    gdept_1d=np.array(mesh.gdept_1d)
    e3t_0=np.array(mesh.e3t_0)
ax.plot(lon, lat, '.', markersize=14, color='red')
ax.set_ylim(48,50)
ax.set_xlim(-125,-122)
ax.set_title('Location of Station %s'%loc)
ax.set_xlabel('Longitude')
ax.set_ylabel('Latitude')
viz_tools.set_aspect(ax,coords='map')
[4]:
1.1363636363636362
../../_images/bloom_timing_bloom_notebooks_makePickles201905_6_1.png

Creating pickles files for location specific variables

[5]:
if recalc==True or not os.path.isfile(savepath):
    basedir='/results2/SalishSea/nowcast-green.201905/'
    nam_fmt='nowcast'
    flen=1 # files contain 1 day of data each
    ftype= 'ptrc_T' # loads bio files
    tres=24 # 1: hourly resolution; 24: daily resolution
    bio_time=list()
    diat_alld=list()
    no3_alld=list()
    flag_alld=list()
    cili_alld=list()
    microzoo_alld=list()
    mesozoo_alld=list()
    intdiat=list()
    intphyto=list()
    spar=list()
    intmesoz=list()
    intmicroz=list()
    grid_time=list()
    temp=list()
    salinity=list()
    u_wind=list()
    v_wind=list()
    twind=list()
    solar=list()
    ik=0
    for ind, datepair in enumerate(dateslist):
        start=datepair[0]
        end=datepair[1]
        flist=et.index_model_files(start,end,basedir,nam_fmt,flen,ftype,tres)
        flist3 = et.index_model_files(start,end,basedir,nam_fmt,flen,"grid_T",tres)
        fliste3t = et.index_model_files(start,end,basedir,nam_fmt,flen,"carp_T",tres)
        with xr.open_mfdataset(flist['paths']) as bio:
            bio_time.append(np.array([pd.to_datetime(ii)+dt.timedelta(minutes=30) for ii in bio.time_counter.values]))
            no3_alld.append(np.array(bio.nitrate.isel(y=ij,x=ii)))  # 'all_d' = all depths
            diat_alld.append(np.array(bio.diatoms.isel(y=ij,x=ii)))
            flag_alld.append(np.array(bio.flagellates.isel(y=ij,x=ii)))
            cili_alld.append(np.array(bio.ciliates.isel(y=ij,x=ii)))
            microzoo_alld.append(np.array(bio.microzooplankton.isel(y=ij,x=ii)))
            mesozoo_alld.append(np.array(bio.mesozooplankton.isel(y=ij,x=ii)))

            with xr.open_mfdataset(fliste3t['paths']) as carp:
                intdiat.append(np.array(np.sum(bio.diatoms.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1))) # 'int' = depth integrated
                intphyto.append(np.array(np.sum((bio.diatoms.isel(y=ij,x=ii)+bio.flagellates.isel(y=ij,x=ii)\
                                +bio.ciliates.isel(y=ij,x=ii))*carp.e3t.isel(y=ij,x=ii),1)))
                spar.append(np.array(carp.PAR.isel(deptht=ik,y=ij,x=ii))) # surface PAR
                intmesoz.append(np.array(np.sum(bio.mesozooplankton.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1)))
                intmicroz.append(np.array(np.sum(bio.microzooplankton.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1)))

        with xr.open_mfdataset(flist3['paths']) as grid:
            grid_time.append(np.array([pd.to_datetime(ii)+dt.timedelta(minutes=30) for ii in grid.time_counter.values]))
            temp.append(np.array(grid.votemper.isel(deptht=ik,y=ij,x=ii)) )#surface temperature
            salinity.append(np.array(grid.vosaline.isel(deptht=ik,y=ij,x=ii))) #surface salinity

        jW,iW,wopsdir,wnam_fmt=bloomdrivers.getWindVarsYear(iyear,loc)
        if start==dt.datetime(2007,1,1):
            start=dt.datetime(2007,1,3)
        else:
            pass

        flist2=et.index_model_files(start,end,wopsdir,wnam_fmt,flen=1,ftype='None',tres=24)
        with xr.open_mfdataset(flist2['paths']) as winds:
            u_wind.append(np.array(winds.u_wind.isel(y=jW,x=iW)))
            v_wind.append(np.array(winds.v_wind.isel(y=jW,x=iW)))
            twind.append(np.array(winds.time_counter))
            solar.append(np.array(winds.solar.isel(y=jW,x=iW)))

    bio_time=np.concatenate(bio_time,axis=0)
    diat_alld=np.concatenate(diat_alld,axis=0)
    no3_alld=np.concatenate(no3_alld,axis=0)
    flag_alld=np.concatenate(flag_alld,axis=0)
    cili_alld=np.concatenate(cili_alld,axis=0)
    microzoo_alld=np.concatenate(microzoo_alld,axis=0)
    mesozoo_alld=np.concatenate(mesozoo_alld,axis=0)
    intdiat=np.concatenate(intdiat,axis=0)
    intphyto=np.concatenate(intphyto,axis=0)
    spar=np.concatenate(spar,axis=0)
    intmesoz=np.concatenate(intmesoz,axis=0)
    intmicroz=np.concatenate(intmicroz,axis=0)
    grid_time=np.concatenate(grid_time,axis=0)
    temp=np.concatenate(temp,axis=0)
    salinity=np.concatenate(salinity,axis=0)
    u_wind=np.concatenate(u_wind,axis=0)
    v_wind=np.concatenate(v_wind,axis=0)
    twind=np.concatenate(twind,axis=0)
    solar=np.concatenate(solar,axis=0)

    # Calculations based on saved values:
    no3_30to90m=np.sum(no3_alld[:,22:26]*e3t_0[:,22:26,ij,ii],1)/np.sum(e3t_0[:,22:26,ij,ii]) # average, considering cell thickness
    sno3=no3_alld[:,0] # surface nitrate
    sdiat=diat_alld[:,0] # surface diatoms
    sflag=flag_alld[:,0] # surface flagellates
    scili=cili_alld[:,0] # surface ciliates
    intzoop=intmesoz+intmicroz # depth-integrated zooplankton
    fracdiat=intdiat/intphyto # fraction of depth-integrated phytoplankton that is diatoms
    zoop_alld=microzoo_alld+mesozoo_alld # zooplankton at all depths
    sphyto=sdiat+sflag+scili # surface phytoplankton
    phyto_alld=diat_alld+flag_alld+cili_alld # phytoplankton at all depths
    percdiat=sdiat/sphyto # fraction of surface phytoplankton that is diatoms

    # wind speed:
    wspeed=np.sqrt(u_wind**2 + v_wind**2)
    # wind direction in degrees from east:
    d = np.arctan2(v_wind, u_wind)
    winddirec=np.rad2deg(d + (d < 0)*2*np.pi)

    allvars=(bio_time,diat_alld,no3_alld,flag_alld,cili_alld,microzoo_alld,mesozoo_alld,
              intdiat,intphyto,spar,intmesoz,intmicroz,
            grid_time,temp,salinity,u_wind,v_wind,twind,solar,
            no3_30to90m,sno3,sdiat,sflag,scili,intzoop,fracdiat,zoop_alld,sphyto,phyto_alld,percdiat,
            wspeed,winddirec)
    pickle.dump(allvars,open(savepath,'wb'))
else:
    pvars=pickle.load(open(savepath,'rb'))
    (bio_time,diat_alld,no3_alld,flag_alld,cili_alld,microzoo_alld,mesozoo_alld,
              intdiat,intphyto,spar,intmesoz,intmicroz,
            grid_time,temp,salinity,u_wind,v_wind,twind,solar,
            no3_30to90m,sno3,sdiat,sflag,scili,intzoop,fracdiat,zoop_alld,sphyto,phyto_alld,percdiat,
            wspeed,winddirec)=pvars

Creating pickles files for location specific mixing variables

[6]:
fname4=f'JanToMarch_Mixing_{year}_{loc}_{modver}.pkl' # for location specific mixing variables
savepath4=os.path.join(savedir,fname4)
if recalc==True or not os.path.isfile(savepath4):
    basedir='/results2/SalishSea/nowcast-green.201905/'
    nam_fmt='nowcast'
    flen=1 # files contain 1 day of data each
    tres=24 # 1: hourly resolution; 24: daily resolution
    halocline=list() # daily average depth of halocline
    eddy=list() # daily average eddy diffusivity
    flist=et.index_model_files(startjan,endmar,basedir,nam_fmt,flen,"grid_T",tres)
    flist2=et.index_model_files(startjan,endmar,basedir,nam_fmt,flen,"grid_W",1)

    for filedate in flist['paths']:
        halocline.append(bloomdrivers.halo_de(filedate,ii,ij))

    for day in flist2['paths']:  # this goes through each day and takes the daily average
        with xr.open_dataset(day) as gridw:
            eddy.append(np.mean(np.array(gridw.vert_eddy_diff.isel(y=ij,x=ii)),axis=0))
            depth=np.array(gridw.depthw)

    with xr.open_mfdataset(flist['paths']) as gridt:
        grid_time=np.array([pd.to_datetime(ii)+dt.timedelta(minutes=30) for ii in gridt.time_counter.values])
        temp=np.array(gridt.votemper.isel(y=ij,x=ii)) # all depths temperature
        salinity=np.array(gridt.vosaline.isel(y=ij,x=ii)) # all depths salinity

    allvars=(halocline,eddy,depth,grid_time,temp,salinity)
    pickle.dump(allvars,open(savepath4,'wb'))
else:
    pvars=pickle.load(open(savepath4,'rb'))
    (halocline,eddy,depth,grid_time,temp,salinity)=pvars

Creating pickles files for bloom timing calculations

[7]:
if recalc==True or not os.path.isfile(savepath3):
    basedir='/results2/SalishSea/nowcast-green.201905/'
    nam_fmt='nowcast'
    flen=1 # files contain 1 day of data each
    ftype= 'ptrc_T' # load bio files
    tres=24 # 1: hourly resolution; 24: daily resolution
    flist=et.index_model_files(forbloomstart,forbloomend,basedir,nam_fmt,flen,ftype,tres)
    flist2=et.index_model_files(forbloomstart,forbloomend,basedir,nam_fmt,flen,"carp_T",tres)

    ik=0
    with xr.open_mfdataset(flist['paths']) as bio:
        bio_time0=np.array([pd.to_datetime(ii)+dt.timedelta(minutes=30) for ii in bio.time_counter.values])
        sno30=np.array(bio.nitrate.isel(deptht=ik,y=ij,x=ii))
        sdiat0=np.array(bio.diatoms.isel(deptht=ik,y=ij,x=ii))
        sflag0=np.array(bio.flagellates.isel(deptht=ik,y=ij,x=ii))
        scili0=np.array(bio.ciliates.isel(deptht=ik,y=ij,x=ii))
        no3_alld0=np.array(bio.nitrate.isel(y=ij,x=ii))
        diat_alld0=np.array(bio.diatoms.isel(y=ij,x=ii))
        flag_alld0=np.array(bio.flagellates.isel(y=ij,x=ii))
        cili_alld0=np.array(bio.ciliates.isel(y=ij,x=ii))
        with xr.open_mfdataset(flist2['paths']) as carp:
            intdiat0=np.array(np.sum(bio.diatoms.isel(y=ij,x=ii)*carp.e3t.isel(y=ij,x=ii),1)) # depth integrated diatom
            intphyto0=np.array(np.sum((bio.diatoms.isel(y=ij,x=ii)+bio.flagellates.isel(y=ij,x=ii)\
                            +bio.ciliates.isel(y=ij,x=ii))*carp.e3t.isel(y=ij,x=ii),1))
    fracdiat0=intdiat0/intphyto0 # depth integrated fraction of diatoms

    sphyto0=sdiat0+sflag0+scili0
    phyto_alld0=diat_alld0+flag_alld0+cili_alld0
    percdiat0=sdiat0/sphyto0 # percent diatoms

    pickle.dump((bio_time0,sno30,sdiat0,sflag0,scili0,diat_alld0,no3_alld0,flag_alld0,cili_alld0,phyto_alld0,\
                 intdiat0,intphyto0,fracdiat0,sphyto0,percdiat0),open(savepath3,'wb'))
else:
    bio_time0,sno30,sdiat0,sflag0,scili0,diat_alld0,no3_alld0,flag_alld0,cili_alld0,phyto_alld0,\
                 intdiat0,intphyto0,fracdiat0,sphyto0,percdiat0=pickle.load(open(savepath3,'rb'))

Creating pickles files for non-location specific variables:

[8]:
# define sog region:
fig, ax = plt.subplots(1,2,figsize = (6,6))
with xr.open_dataset('/data/vdo/MEOPAR/NEMO-forcing/grid/bathymetry_201702.nc') as bathy:
    bath=np.array(bathy.Bathymetry)
ax[0].contourf(bath,np.arange(0,250,10))
viz_tools.set_aspect(ax[0],coords='grid')
sogmask=np.copy(tmask[:,:,:,:])
sogmask[:,:,740:,:]=0
sogmask[:,:,700:,170:]=0
sogmask[:,:,550:,250:]=0
sogmask[:,:,:,302:]=0
sogmask[:,:,:400,:]=0
sogmask[:,:,:,:100]=0
#sogmask250[bath<250]=0
ax[1].contourf(np.ma.masked_where(sogmask[0,0,:,:]==0,bathy.Bathymetry),[0,100,250,550])
[8]:
<matplotlib.contour.QuadContourSet at 0x7f2f90145160>
../../_images/bloom_timing_bloom_notebooks_makePickles201905_14_1.png
[9]:
k250=32 # approximate index for 250 m
if recalc==True or not os.path.isfile(savepath2):

    basedir='/results2/SalishSea/nowcast-green.201905/'
    nam_fmt='nowcast'
    flen=1 # files contain 1 day of data each
    ftype= 'ptrc_T' # load bio files
    tres=24 # 1: hourly resolution; 24: daily resolution
    flist=et.index_model_files(startjan,endmar,basedir,nam_fmt,flen,ftype,tres)
    flist3 = et.index_model_files(startjan,endmar,basedir,nam_fmt,flen,"grid_T",tres)
    fliste3t = et.index_model_files(startjan,endmar,basedir,nam_fmt,flen,"carp_T",tres)

    ik=0
    with xr.open_mfdataset(flist['paths']) as bio:
        no3_past250m=np.array(np.sum(np.sum(np.sum(bio.nitrate.isel(deptht=slice(32,40))*sogmask[:,32:,:,:]*e3t_0[:,32:,:,:],3),2),1)\
                /np.sum(sogmask[0,32:,:,:]*e3t_0[0,32:,:,:]))

    if iyear !=2020:
        # reading Fraser river flow files
        dfFra=pd.read_csv('/ocean/eolson/MEOPAR/obs/ECRivers/Flow/FraserHopeDaily__Feb-8-2021_06_29_29AM.csv',
                          skiprows=1)
        # the original file contains both flow and water level information in the same field (Value)
        # keep only the flow data, where PARAM=1 (drop PARAM=2 values, water level data)
        # flow units are m3/s
        # DD is YD, year day (ie. 1 is jan 1)
        dfFra.drop(dfFra.loc[dfFra.PARAM==2].index,inplace=True)

        # rename 'Value' column to 'Flow' now that we have removed all the water level rows
        dfFra.rename(columns={'Value':'Flow'}, inplace=True)
            # inplace=True does this function on the orginal dataframe

        # no time information so use dt.date
        dfFra['Date']=[dt.date(iyr,1,1)+dt.timedelta(days=idd-1) for iyr, idd in zip(dfFra['YEAR'],dfFra['DD'])]
        # taking the value from the yr column, jan1st date, and making jan1 column to be 1 not 0
        dfFra.head(2)

        # select portion of dataframe in desired date range
        dfFra2=dfFra.loc[(dfFra.Date>=startjan.date())&(dfFra.Date<=fraserend.date())]
        riv_time=dfFra2['Date'].values
        rivFlow=dfFra2['Flow'].values
        # could also write dfFra['Date'], sometimes this is required
        # newstart is a datetime object, so we convert it to just a date with .date
    else:
        dfFra=pd.read_csv('/data/dlatorne/SOG-projects/SOG-forcing/ECget/Fraser_flow',sep='\s+',
                  comment='#',names=('Year','Month','Day','Flow'))
        dfFra['Date']=[dt.datetime(int(y),int(m),int(d)) for ind,(y,m,d,f) in dfFra.iterrows()]
        dfFra2=dfFra.loc[(dfFra.Date>=startjan)&(dfFra.Date<=fraserend)]
        riv_time=dfFra2['Date'].values
        rivFlow=dfFra2['Flow'].values

    allvars=(no3_past250m,riv_time,rivFlow)
    pickle.dump(allvars,open(savepath2,'wb'))
else:
    pvars=pickle.load(open(savepath2,'rb'))
    (no3_past250m,riv_time,rivFlow)=pvars