Skip to contents

cate = c("aircraft-pfp",
         "aircraft-insitu",
         "aircraft-flask",
         "surface-insitu",
         "surface-flask", 
         "surface-pfp",   
         "tower-insitu",  
         "aircore",       
         "shipboard-insitu",
         "shipboard-flask") 

obs <- "Z:/torf/obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08/data/nc/"
index <- obs_summary(obs = obs, 
                     categories = cate)
#> Number of files of index: 429
#>               sector     N
#>               <char> <int>
#>  1:     aircraft-pfp    40
#>  2:  aircraft-insitu    15
#>  3:    surface-flask   106
#>  4:   surface-insitu   174
#>  5:   aircraft-flask     4
#>  6:          aircore     1
#>  7:      surface-pfp    33
#>  8:     tower-insitu    51
#>  9:  shipboard-flask     4
#> 10: shipboard-insitu     1
#> 11:    Total sectors   429
#> Detected 190 files with agl
#> Detected 239 files without agl

Now we read the surface-flask using the function obs_read_nc. solar_time is included for surface, so we TRUE that argument.

datasetid <- "surface-flask"
df <- obs_read_nc(index = index,
                  categories = datasetid,
                  solar_time = TRUE,
                  verbose = TRUE)
#> Searching surface-flask...
#> 1: ch4_abp_surface-flask_1_representative.nc
#> 2: ch4_alt_surface-flask_1_representative.nc
#> 3: ch4_alt_surface-flask_2_representative.nc
#> 4: ch4_alt_surface-flask_45_representative.nc
#> 5: ch4_ams_surface-flask_1_representative.nc
#> 6: ch4_amt_surface-flask_1_representative.nc
#> 7: ch4_amy_surface-flask_1_representative.nc
#> 8: ch4_ara_surface-flask_2_representative.nc
#> 9: ch4_asc_surface-flask_1_representative.nc
#> 10: ch4_ask_surface-flask_1_representative.nc
#> 11: ch4_avi_surface-flask_1_representative.nc
#> 12: ch4_azr_surface-flask_1_representative.nc
#> 13: ch4_bal_surface-flask_1_representative.nc
#> 14: ch4_bhd_surface-flask_1_representative.nc
#> 15: ch4_bik_surface-flask_45_representative.nc
#> 16: ch4_bkt_surface-flask_1_representative.nc
#> 17: ch4_bme_surface-flask_1_representative.nc
#> 18: ch4_bmw_surface-flask_1_representative.nc
#> 19: ch4_bnt_surface-flask_1_representative.nc
#> 20: ch4_brw_surface-flask_1_representative.nc
#> 21: ch4_bsc_surface-flask_1_representative.nc
#> 22: ch4_cba_surface-flask_1_representative.nc
#> 23: ch4_cfa_surface-flask_2_representative.nc
#> 24: ch4_cgo_surface-flask_1_representative.nc
#> 25: ch4_cgo_surface-flask_2_representative.nc
#> 26: ch4_cgo_surface-flask_45_representative.nc
#> 27: ch4_chr_surface-flask_1_representative.nc
#> 28: ch4_cib_surface-flask_1_representative.nc
#> 29: ch4_cmo_surface-flask_1_representative.nc
#> 30: ch4_cpa_surface-flask_2_representative.nc
#> 31: ch4_cpt_surface-flask_1_representative.nc
#> 32: ch4_cri_surface-flask_2_representative.nc
#> 33: ch4_crz_surface-flask_1_representative.nc
#> 34: ch4_cvo_surface-flask_45_representative.nc
#> 35: ch4_cya_surface-flask_2_representative.nc
#> 36: ch4_dsi_surface-flask_1_representative.nc
#> 37: ch4_eic_surface-flask_1_representative.nc
#> 38: ch4_esp_surface-flask_2_representative.nc
#> 39: ch4_gmi_surface-flask_1_representative.nc
#> 40: ch4_goz_surface-flask_1_representative.nc
#> 41: ch4_gpa_surface-flask_2_representative.nc
#> 42: ch4_gvn_surface-flask_45_representative.nc
#> 43: ch4_hba_surface-flask_1_representative.nc
#> 44: ch4_hpb_surface-flask_1_representative.nc
#> 45: ch4_hsu_surface-flask_1_representative.nc
#> 46: ch4_hun_surface-flask_1_representative.nc
#> 47: ch4_ice_surface-flask_1_representative.nc
#> 48: ch4_izo_surface-flask_1_representative.nc
#> 49: ch4_jfj_surface-flask_45_representative.nc
#> 50: ch4_key_surface-flask_1_representative.nc
#> 51: ch4_kjn_surface-flask_45_representative.nc
#> 52: ch4_kum_surface-flask_1_representative.nc
#> 53: ch4_kzd_surface-flask_1_representative.nc
#> 54: ch4_kzm_surface-flask_1_representative.nc
#> 55: ch4_lef_surface-flask_1_representative.nc
#> 56: ch4_llb_surface-flask_1_representative.nc
#> 57: ch4_lln_surface-flask_1_representative.nc
#> 58: ch4_lmp_surface-flask_1_representative.nc
#> 59: ch4_maa_surface-flask_2_representative.nc
#> 60: ch4_mbc_surface-flask_1_representative.nc
#> 61: ch4_mex_surface-flask_1_representative.nc
#> 62: ch4_mhd_surface-flask_1_representative.nc
#> 63: ch4_mid_surface-flask_1_representative.nc
#> 64: ch4_mkn_surface-flask_1_representative.nc
#> 65: ch4_mlo_surface-flask_1_representative.nc
#> 66: ch4_mlo_surface-flask_2_representative.nc
#> 67: ch4_mqa_surface-flask_2_representative.nc
#> 68: ch4_nam_surface-flask_45_representative.nc
#> 69: ch4_nat_surface-flask_1_representative.nc
#> 70: ch4_nmb_surface-flask_1_representative.nc
#> 71: ch4_nwr_surface-flask_1_representative.nc
#> 72: ch4_obn_surface-flask_1_representative.nc
#> 73: ch4_opw_surface-flask_1_representative.nc
#> 74: ch4_ota_surface-flask_2_representative.nc
#> 75: ch4_oxk_surface-flask_1_representative.nc
#> 76: ch4_oxk_surface-flask_45_representative.nc
#> 77: ch4_pal_surface-flask_1_representative.nc
#> 78: ch4_psa_surface-flask_1_representative.nc
#> 79: ch4_pta_surface-flask_1_representative.nc
#> 80: ch4_rpb_surface-flask_1_representative.nc
#> 81: ch4_sdz_surface-flask_1_representative.nc
#> 82: ch4_sey_surface-flask_1_representative.nc
#> 83: ch4_sgi_surface-flask_1_representative.nc
#> 84: ch4_sgp_surface-flask_1_representative.nc
#> 85: ch4_shm_surface-flask_1_representative.nc
#> 86: ch4_sis_surface-flask_2_representative.nc
#> 87: ch4_sis_surface-flask_45_representative.nc
#> 88: ch4_smo_surface-flask_1_representative.nc
#> 89: ch4_spo_surface-flask_1_representative.nc
#> 90: ch4_spo_surface-flask_2_representative.nc
#> 91: ch4_stm_surface-flask_1_representative.nc
#> 92: ch4_sum_surface-flask_1_representative.nc
#> 93: ch4_syo_surface-flask_1_representative.nc
#> 94: ch4_tac_surface-flask_1_representative.nc
#> 95: ch4_tap_surface-flask_1_representative.nc
#> 96: ch4_thd_surface-flask_1_representative.nc
#> 97: ch4_tik_surface-flask_1_representative.nc
#> 98: ch4_ush_surface-flask_1_representative.nc
#> 99: ch4_uta_surface-flask_1_representative.nc
#> 100: ch4_uum_surface-flask_1_representative.nc
#> 101: ch4_vrs_surface-flask_45_representative.nc
#> 102: ch4_wis_surface-flask_1_representative.nc
#> 103: ch4_wkt_surface-flask_1_representative.nc
#> 104: ch4_wlg_surface-flask_1_representative.nc
#> 105: ch4_zep_surface-flask_1_representative.nc
#> 106: ch4_zot_surface-flask_45_representative.nc

Now we check the data

df
#>          year month   day  hour minute second       time start_time
#>         <int> <int> <int> <int>  <int>  <int>      <int>      <int>
#>      1:  2006    10    27    15     50      0 1161964200 1161964200
#>      2:  2006    11     3    16     24      0 1162571040 1162571040
#>      3:  2006    11    18    15     47      0 1163864820 1163864820
#>      4:  2006    11    24    15     54      0 1164383640 1164383640
#>      5:  2006    12     1    17      2      0 1164992520 1164992520
#>     ---                                                            
#> 107797:  2021    11    28    12     30      0 1638102600 1638102600
#> 107798:  2021    12     5     8     56      0 1638694560 1638694560
#> 107799:  2021    12    12     9     46      0 1639302360 1639302360
#> 107800:  2021    12    19     7     59      0 1639900740 1639900740
#> 107801:  2021    12    26    12     51      0 1640523060 1640523060
#>         midpoint_time             datetime time_decimal time_interval
#>                 <int>               <char>        <num>         <int>
#>      1:    1161964200 2006-10-27T15:50:00Z     2006.821          3600
#>      2:    1162571040 2006-11-03T16:24:00Z     2006.840          3600
#>      3:    1163864820 2006-11-18T15:47:00Z     2006.881          3600
#>      4:    1164383640 2006-11-24T15:54:00Z     2006.898          3600
#>      5:    1164992520 2006-12-01T17:02:00Z     2006.917          3600
#>     ---                                                              
#> 107797:    1638102600 2021-11-28T12:30:00Z     2021.908          3600
#> 107798:    1638694560 2021-12-05T08:56:00Z     2021.927          3600
#> 107799:    1639302360 2021-12-12T09:46:00Z     2021.946          3600
#> 107800:    1639900740 2021-12-19T07:59:00Z     2021.965          3600
#> 107801:    1640523060 2021-12-26T12:51:00Z     2021.985          3600
#>                value value_unc nvalue value_std_dev latitude longitude altitude
#>                <num>     <num>  <int>         <num>    <num>     <num>    <num>
#>      1: 1.736280e-06  2.05e-09      2  7.636753e-10   -12.76    -38.16        6
#>      2: 1.739605e-06  2.05e-09      2  5.444722e-10   -12.76    -38.16        6
#>      3: 1.731770e-06  2.05e-09      2  1.159655e-09   -12.76    -38.16        6
#>      4: 1.733130e-06  2.05e-09      2  1.131371e-10   -12.76    -38.16        6
#>      5: 1.745230e-06  2.05e-09      2  5.232590e-10   -12.76    -38.16        6
#>     ---                                                                        
#> 107797: 2.039500e-06        NA      1  8.600000e-10    60.75     89.38      411
#> 107798: 2.102740e-06        NA      1  7.900000e-10    60.75     89.38      411
#> 107799: 2.087280e-06        NA      1  3.100000e-10    60.75     89.38      411
#> 107800: 2.067420e-06        NA      1  2.000000e-09    60.75     89.38      411
#> 107801: 2.024140e-06        NA      1  6.000000e-09    60.75     89.38      411
#>         elevation intake_height qcflag instrument   analysis_datetime method
#>             <num>         <num> <char>     <char>              <char> <char>
#>      1:         1             5    ...         H4 2007-04-02T12:43:00      G
#>      2:         1             5    ...         H4 2007-02-20T13:29:00      G
#>      3:         1             5    ...         H4 2007-02-20T13:00:00      G
#>      4:         1             5    ...         H4 2007-02-20T09:55:00      G
#>      5:         1             5    ...         H4 2007-02-16T15:39:00      G
#>     ---                                                                     
#> 107797:       111           300      0       <NA>                <NA>   <NA>
#> 107798:       111           300      0       <NA>                <NA>   <NA>
#> 107799:       111           300      0       <NA>                <NA>   <NA>
#> 107800:       111           300      0       <NA>                <NA>   <NA>
#> 107801:       111           300      0       <NA>                <NA>   <NA>
#>          event_number air_sample_container_id obs_flag obspack_num
#>                <char>                  <char>    <int>       <int>
#>      1: 231671,231672         1779-99,1780-99        1     1033168
#>      2: 229105,229106         6717-66,6718-66        1     1033169
#>      3: 229103,229104         2969-99,2970-99        1     1033170
#>      4: 229081,229082         3217-99,3218-99        1     1033171
#>      5: 229079,229080           759-99,760-99        1     1033172
#>     ---                                                           
#> 107797:          <NA>                    <NA>        1    10187962
#> 107798:          <NA>                    <NA>        1    10187963
#> 107799:          <NA>                    <NA>        1    10187964
#> 107800:          <NA>                    <NA>        1    10187965
#> 107801:          <NA>                    <NA>        1    10187966
#>                                                                                            obspack_id
#>                                                                                                <char>
#>      1:   obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033168
#>      2:   obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033169
#>      3:   obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033170
#>      4:   obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033171
#>      5:   obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033172
#>     ---                                                                                              
#> 107797: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187962
#> 107798: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187963
#> 107799: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187964
#> 107800: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187965
#> 107801: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187966
#>         unique_sample_location_num year_st month_st day_st hour_st minute_st
#>                              <int>   <int>    <int>  <int>   <int>     <int>
#>      1:                   21955180    2006       10     27      13        33
#>      2:                   21955181    2006       11      3      14         7
#>      3:                   21955182    2006       11     18      13        28
#>      4:                   21955183    2006       11     24      13        34
#>      5:                   21955184    2006       12      1      14        39
#>     ---                                                                     
#> 107797:                   54363784    2021       11     28      18        38
#> 107798:                   54363785    2021       12      5      15         2
#> 107799:                   54363786    2021       12     12      15        49
#> 107800:                   54887377    2021       12     19      13        59
#> 107801:                   54363787    2021       12     26      18        47
#>         second_st          scale site_elevation_unit dataset_project
#>             <int>         <char>              <char>          <char>
#>      1:        36 WMO CH4 X2004A                masl   surface-flask
#>      2:        42 WMO CH4 X2004A                masl   surface-flask
#>      3:        40 WMO CH4 X2004A                masl   surface-flask
#>      4:         4 WMO CH4 X2004A                masl   surface-flask
#>      5:        42 WMO CH4 X2004A                masl   surface-flask
#>     ---                                                             
#> 107797:        56 WMO CH4 X2004A                masl   surface-flask
#> 107798:        19 WMO CH4 X2004A                masl   surface-flask
#> 107799:        21 WMO CH4 X2004A                masl   surface-flask
#> 107800:         9 WMO CH4 X2004A                masl   surface-flask
#> 107801:        54 WMO CH4 X2004A                masl   surface-flask
#>         dataset_selection_tag       site_name site_elevation site_latitude
#>                        <char>          <char>          <num>         <num>
#>      1:        representative Arembepe, Bahia              1        -12.77
#>      2:        representative Arembepe, Bahia              1        -12.77
#>      3:        representative Arembepe, Bahia              1        -12.77
#>      4:        representative Arembepe, Bahia              1        -12.77
#>      5:        representative Arembepe, Bahia              1        -12.77
#>     ---                                                                   
#> 107797:        representative Zotino, Siberia            111         60.75
#> 107798:        representative Zotino, Siberia            111         60.75
#> 107799:        representative Zotino, Siberia            111         60.75
#> 107800:        representative Zotino, Siberia            111         60.75
#> 107801:        representative Zotino, Siberia            111         60.75
#>         site_longitude site_country site_code site_utc2lst lab_1_abbr
#>                  <num>       <char>    <char>        <num>     <char>
#>      1:         -38.17       Brazil       ABP           -3       NOAA
#>      2:         -38.17       Brazil       ABP           -3       NOAA
#>      3:         -38.17       Brazil       ABP           -3       NOAA
#>      4:         -38.17       Brazil       ABP           -3       NOAA
#>      5:         -38.17       Brazil       ABP           -3       NOAA
#>     ---                                                              
#> 107797:          89.38       Russia       ZOT            7    MPI-BGC
#> 107798:          89.38       Russia       ZOT            7    MPI-BGC
#> 107799:          89.38       Russia       ZOT            7    MPI-BGC
#> 107800:          89.38       Russia       ZOT            7    MPI-BGC
#> 107801:          89.38       Russia       ZOT            7    MPI-BGC
#>         dataset_calibration_scale altitude_final type_altitude
#>                            <char>          <num>         <num>
#>      1:            WMO CH4 X2004A              5            NA
#>      2:            WMO CH4 X2004A              5            NA
#>      3:            WMO CH4 X2004A              5            NA
#>      4:            WMO CH4 X2004A              5            NA
#>      5:            WMO CH4 X2004A              5            NA
#>     ---                                                       
#> 107797:            WMO CH4 X2004A            300             0
#> 107798:            WMO CH4 X2004A            300             0
#> 107799:            WMO CH4 X2004A            300             0
#> 107800:            WMO CH4 X2004A            300             0
#> 107801:            WMO CH4 X2004A            300             0
#>         dataset_intake_ht_unit
#>                         <char>
#>      1:                   <NA>
#>      2:                   <NA>
#>      3:                   <NA>
#>      4:                   <NA>
#>      5:                   <NA>
#>     ---                       
#> 107797:                   magl
#> 107798:                   magl
#> 107799:                   magl
#> 107800:                   magl
#> 107801:                   magl

Now we can process the data. We first filter for observations within our spatial domain:

Checks and definitions

north <- 80
south <- 10
west <- -170
east <- -50
max_altitude <- 8000
yy <- 2020
evening <- 14

We check altitude, intake_height, altitude_final and elevation. altitude_final is a column from intake_height, added to match column from obs_read text files.

df[, c("altitude", "altitude_final", "intake_height", "elevation",
       "dataset_selection_tag",
              "site_name")]
#>         altitude altitude_final intake_height elevation dataset_selection_tag
#>            <num>          <num>         <num>     <num>                <char>
#>      1:        6              5             5         1        representative
#>      2:        6              5             5         1        representative
#>      3:        6              5             5         1        representative
#>      4:        6              5             5         1        representative
#>      5:        6              5             5         1        representative
#>     ---                                                                      
#> 107797:      411            300           300       111        representative
#> 107798:      411            300           300       111        representative
#> 107799:      411            300           300       111        representative
#> 107800:      411            300           300       111        representative
#> 107801:      411            300           300       111        representative
#>               site_name
#>                  <char>
#>      1: Arembepe, Bahia
#>      2: Arembepe, Bahia
#>      3: Arembepe, Bahia
#>      4: Arembepe, Bahia
#>      5: Arembepe, Bahia
#>     ---                
#> 107797: Zotino, Siberia
#> 107798: Zotino, Siberia
#> 107799: Zotino, Siberia
#> 107800: Zotino, Siberia
#> 107801: Zotino, Siberia

The temporal range of data is

range(df$year)
#> [1] 1983 2021

We also check for dimensions of data

dim(df)
#> [1] 107801     53

Filters

df <- df[year == yy]

df <- df[altitude_final < max_altitude &
           latitude < north &
           latitude > south &
           longitude < east &
           longitude > west]
dim(df)
#> [1] 799  53

Towers can have observations at different heights. Here we need to select one site with the observations registered at the highest height. The column with the height is named altitude_final and the max altitude was named max_altitude.

dfa <- df[,
          max(altitude_final),
          by = site_code] |> unique()

names(dfa)[2] <- "max_altitude"
dfa
#>     site_code max_altitude
#>        <char>        <num>
#>  1:       BMW         21.3
#>  2:       BRW         16.5
#>  3:       CBA         35.7
#>  4:       KEY          5.0
#>  5:       KUM          5.0
#>  6:       LEF        396.0
#>  7:       MEX          5.0
#>  8:       MLO         40.0
#>  9:       NWR          3.0
#> 10:       RPB          5.0
#> 11:       SGP         60.0
#> 12:       UTA          5.0

Key Time

Here we need to start time columns. The function obs_addtime adds time columns timeUTC, timeUTC_start which shows the start time of each observation and timeUTC_end which shows the end time for each observation.

df2 <- obs_addtime(df)
#> Adding timeUTC
#> Adding timeUTC_start
#> Adding timeUTC_end
#> Found time_interval

Then we need a key_time to aggregate data. This can be done using UTC, solar, or local time. The normal approach is using afternoon solar or local time.

Hierarchy of solar or local time

  1. Solar time
  2. Local time with columns site_utc2lst
  3. Local time longitude

solar time (default)

Here we select the hours of interest and then aggregate data by year, month and day of solar time. In this way, we will have one information per day. however this approach is not appropriate for aircraft which are aggregated every 10 or 20 seconds. Hence we need to aggregate data by one time column. Also, this helps to generate the receptor info files including hour, minute and second. Hence, we need to add solar or local time column.

df2$solar_time <- obs_addstime(df2)

local time with column site_utc2lst

Then we need to identify the local time with the function add_ltime. This is important because to identifying observations in the evening in local time. add_ltime uses two methods, first identify the time difference with utc by identifying the metadata column “site_utc2lst”. If solar time is not available #now we need to cut solar time for the frequency needed. As we will work with

local time longitude

If this information is not available, with the aircrafts for instance, the local time is calculated with an approximation based on longitude:

lt=UTC+longitude/15*60*60 lt = UTC + longitude/15 * 60 * 60 Where ltlt is the local time, UTCUTC the time, longitudelongitude the coordinate. Then, the time is cut every two hours. Now, we identify the local time to select evening hours.

Cut time

Now we have they key column time, we can cut it accordingly.

df2$solar_time_cut <- cut(x = df2$solar_time,
                          breaks = "1 hour") |>
  as.character()

How we can check the solar time and the cut solar time. Please note that solar_time_cut, the column that it will be used to aggregate data

How we filter for the required solar time, in this case 14.

df3 <- df2[hour_st %in% evening]
df3[, c("solar_time", "solar_time_cut")]
#>              solar_time      solar_time_cut
#>                  <POSc>              <char>
#>  1: 2020-03-10 14:22:19 2020-03-10 14:00:00
#>  2: 2020-04-01 14:28:02 2020-04-01 14:00:00
#>  3: 2020-04-14 14:52:45 2020-04-14 14:00:00
#>  4: 2020-10-02 14:24:04 2020-10-02 14:00:00
#>  5: 2020-10-02 14:44:04 2020-10-02 14:00:00
#>  6: 2020-10-05 14:15:59 2020-10-05 14:00:00
#>  7: 2020-10-30 14:06:56 2020-10-30 14:00:00
#>  8: 2020-10-30 14:24:56 2020-10-30 14:00:00
#>  9: 2020-11-10 14:04:10 2020-11-10 14:00:00
#> 10: 2020-11-10 14:22:10 2020-11-10 14:00:00
#> 11: 2020-03-27 14:23:57 2020-03-27 14:00:00
#> 12: 2020-04-22 14:09:09 2020-04-22 14:00:00
#> 13: 2020-05-28 14:19:16 2020-05-28 14:00:00
#> 14: 2020-06-09 14:38:15 2020-06-09 14:00:00
#> 15: 2020-06-18 14:29:22 2020-06-18 14:00:00
#> 16: 2020-06-25 14:18:50 2020-06-25 14:00:00
#> 17: 2020-07-15 14:18:25 2020-07-15 14:00:00
#> 18: 2020-12-12 14:41:10 2020-12-12 14:00:00
#> 19: 2020-03-25 14:30:43 2020-03-25 14:00:00
#> 20: 2020-04-09 14:27:40 2020-04-09 14:00:00
#> 21: 2020-04-23 14:23:37 2020-04-23 14:00:00
#> 22: 2020-04-27 14:54:16 2020-04-27 14:00:00
#> 23: 2020-05-18 14:16:12 2020-05-18 14:00:00
#> 24: 2020-06-15 14:03:53 2020-06-15 14:00:00
#> 25: 2020-06-15 14:17:53 2020-06-15 14:00:00
#> 26: 2020-06-22 14:24:21 2020-06-22 14:00:00
#> 27: 2020-06-22 14:41:21 2020-06-22 14:00:00
#> 28: 2020-08-05 14:25:26 2020-08-05 14:00:00
#> 29: 2020-08-05 14:39:26 2020-08-05 14:00:00
#> 30: 2020-11-02 14:18:47 2020-11-02 14:00:00
#> 31: 2020-11-02 14:32:47 2020-11-02 14:00:00
#> 32: 2020-11-12 14:13:45 2020-11-12 14:00:00
#> 33: 2020-11-27 14:14:50 2020-11-27 14:00:00
#> 34: 2020-11-27 14:28:50 2020-11-27 14:00:00
#> 35: 2020-12-22 14:11:13 2020-12-22 14:00:00
#> 36: 2020-12-22 14:28:13 2020-12-22 14:00:00
#> 37: 2020-02-18 14:27:33 2020-02-18 14:00:00
#> 38: 2020-01-06 14:27:37 2020-01-06 14:00:00
#> 39: 2020-04-22 14:26:55 2020-04-22 14:00:00
#> 40: 2020-05-05 14:25:44 2020-05-05 14:00:00
#> 41: 2020-05-22 14:34:41 2020-05-22 14:00:00
#> 42: 2020-06-04 14:51:57 2020-06-04 14:00:00
#> 43: 2020-07-09 14:04:58 2020-07-09 14:00:00
#> 44: 2020-08-26 14:52:18 2020-08-26 14:00:00
#> 45: 2020-09-26 14:44:19 2020-09-26 14:00:00
#> 46: 2020-12-13 14:33:29 2020-12-13 14:00:00
#> 47: 2020-12-31 14:17:13 2020-12-31 14:00:00
#>              solar_time      solar_time_cut

At this point we can calculate the averages of several columns by the cut time. The function obs_agg does this aggregation as shown in the following lines of code. The argument gby establish the function used to aggregate cols. I need to aggregate the data by date (year, month, date), because it is already filtered by the hours of interest. Then, I would have 1 observation per day.

As standard, let us define key_time as solar_time. The obs_agg function will aggregate the desired data by that column.

df3$key_time <- df3$solar_time_cut
df4 <- obs_agg(dt = df3,
               cols = c("value",
                        "latitude",
                        "longitude",
                        "site_utc2lst"),
               verbose = T,
               byalt = TRUE)
#> Selecting by alt
#> Adding time

Here we add the column max_altitude to identify the max altitude by site.

df4[,
    max_altitude := max(altitude_final),
    by = site_code]
df4[,
    c("site_code",
      "altitude_final",
      "max_altitude")] |> unique()
#>    site_code altitude_final max_altitude
#>       <char>          <num>        <num>
#> 1:       BRW            5.0         16.5
#> 2:       BRW           16.5         16.5
#> 3:       KEY            5.0          5.0
#> 4:       KUM            5.0          5.0
#> 5:       NWR            3.0          3.0
#> 6:       UTA            5.0          5.0

Master

Before generating the receptors list, we have the database with all the required information

master <- df4

We may replace missing values with a nine nines. Here is commented

#master[is.na(master)] <- 999999999

We transform the time variables to character and round coordinates with 4 digits

master$timeUTC <- as.character(master$timeUTC)
master$local_time <- as.character(master$local_time)
master$latitude <- round(master$latitude, 4)
master$longitude <- round(master$longitude, 4)

Save master

Finally we save the master file

out <- tempfile()

txt

message(paste0(out,"_", datasetid, ".txt\n"))
fwrite(master,
       paste0(out,"_", datasetid, ".txt"),
       sep = " ")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.txt

csv

message(paste0(out,"_", datasetid, ".csv\n"))
fwrite(master,
       paste0(out,"_", datasetid, ".csv"),
       sep = ",")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.csv

csvy

CSVY are csv files with a YAML header to include metadata in tabulated text files

cat("\nAdding notes in csvy:\n")
notes <- c(paste0("sector: ", datasetid),
           paste0("timespan: ", yy),
           paste0("spatial_limits: north = ", north, ", south = ", south, ", east = ", east, ", west = ", west),
           paste0("altitude: < ", max_altitude),
           paste0("hours: ", evening),
           "local_time: used solar_time")

cat(notes, sep = "\n")

message(paste0(out,"_", datasetid, ".csvy\n"))
obs_write_csvy(dt = master,
               notes = notes,
               out = paste0(out,"_", datasetid, ".csvy"))
#> Adding notes in csvy:
#> sector: surface-insitu
#> timespan: 2020
#> spatial_limits: north = 80, south = 10, east = -50, west = -170
#> data: Data averaged every 20 seconds
#> altitude: < 8000
#> hours: 14
#> local_time: used solar_time
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.csvy
obs_read_csvy(paste0(out,"_", datasetid, ".csvy"))
#>  [1] "---"                                                              
#>  [2] "name: Metadata "                                                  
#>  [3] "sector: surface-flask"                                            
#>  [4] "timespan: 2020"                                                   
#>  [5] "spatial_limits: north = 80, south = 10, east = -50, west = -170"  
#>  [6] "data: Data averaged every 20 seconds"                             
#>  [7] "altitude: < 8000"                                                 
#>  [8] "hours: 14"                                                        
#>  [9] "local_time: used solar_time"                                      
#> [10] "structure: "                                                      
#> [11] "Classes 'data.table' and 'data.frame':\t41 obs. of  20 variables:"
#> [12] " $ timeUTC                  : chr  \"2020-03-10 14:00:00\" \".."  
#> [13] " $ site_code                : chr  \"BRW\" \"BRW\" ..."           
#> [14] " $ altitude_final           : num  5 5 5 16.5 5 ..."              
#> [15] " $ type_altitude            : num  NA NA NA NA NA ..."            
#> [16] " $ lab_1_abbr               : chr  \"NOAA\" \"NOAA\" ..."         
#> [17] " $ dataset_calibration_scale: chr  \"WMO CH4 X2004A\" \"WMO \".." 
#> [18] " $ value                    : num  1.98e-06 1.98e-06 ..."         
#> [19] " $ latitude                 : num  71.3 71.3 ..."                 
#> [20] " $ longitude                : num  -157 -157 ..."                 
#> [21] " $ site_utc2lst             : num  -9 -9 -9 -9 -9 ..."            
#> [22] " $ year                     : int  2020 2020 2020 2020 202.."     
#> [23] " $ month                    : int  3 4 4 10 10 ..."               
#> [24] " $ day                      : chr  \"10\" \"01\" ..."             
#> [25] " $ hour                     : int  14 14 14 14 14 ..."            
#> [26] " $ minute                   : int  0 0 0 0 0 ..."                 
#> [27] " $ second                   : int  0 0 0 0 0 ..."                 
#> [28] " $ time                     : num  1.58e+09 1.59e+09 ..."         
#> [29] " $ time_decimal             : num  2020 2020 ..."                 
#> [30] " $ max_altitude             : num  16.5 16.5 16.5 16.5 16..."     
#> [31] " $ local_time               : chr  NA NA ..."                     
#> [32] " - attr(*, \".internal.selfref\")=<externalptr> "                 
#> [33] "NULL"                                                             
#> [34] "---"
#>                 timeUTC site_code altitude_final type_altitude lab_1_abbr
#>                  <POSc>    <char>          <num>        <lgcl>     <char>
#>  1: 2020-03-10 14:00:00       BRW            5.0            NA       NOAA
#>  2: 2020-04-01 14:00:00       BRW            5.0            NA       NOAA
#>  3: 2020-04-14 14:00:00       BRW            5.0            NA       NOAA
#>  4: 2020-10-02 14:00:00       BRW           16.5            NA       NOAA
#>  5: 2020-10-02 14:00:00       BRW            5.0            NA       NOAA
#>  6: 2020-10-05 14:00:00       BRW           16.5            NA       NOAA
#>  7: 2020-10-30 14:00:00       BRW           16.5            NA       NOAA
#>  8: 2020-10-30 14:00:00       BRW            5.0            NA       NOAA
#>  9: 2020-11-10 14:00:00       BRW           16.5            NA       NOAA
#> 10: 2020-11-10 14:00:00       BRW            5.0            NA       NOAA
#> 11: 2020-03-27 14:00:00       KEY            5.0            NA       NOAA
#> 12: 2020-04-22 14:00:00       KEY            5.0            NA       NOAA
#> 13: 2020-05-28 14:00:00       KEY            5.0            NA       NOAA
#> 14: 2020-06-09 14:00:00       KEY            5.0            NA       NOAA
#> 15: 2020-06-18 14:00:00       KEY            5.0            NA       NOAA
#> 16: 2020-06-25 14:00:00       KEY            5.0            NA       NOAA
#> 17: 2020-07-15 14:00:00       KEY            5.0            NA       NOAA
#> 18: 2020-12-12 14:00:00       KEY            5.0            NA       NOAA
#> 19: 2020-03-25 14:00:00       KUM            5.0            NA       NOAA
#> 20: 2020-04-09 14:00:00       KUM            5.0            NA       NOAA
#> 21: 2020-04-23 14:00:00       KUM            5.0            NA       NOAA
#> 22: 2020-04-27 14:00:00       KUM            5.0            NA       NOAA
#> 23: 2020-05-18 14:00:00       KUM            5.0            NA       NOAA
#> 24: 2020-06-15 14:00:00       KUM            5.0            NA       NOAA
#> 25: 2020-06-22 14:00:00       KUM            5.0            NA       NOAA
#> 26: 2020-08-05 14:00:00       KUM            5.0            NA       NOAA
#> 27: 2020-11-02 14:00:00       KUM            5.0            NA       NOAA
#> 28: 2020-11-12 14:00:00       KUM            5.0            NA       NOAA
#> 29: 2020-11-27 14:00:00       KUM            5.0            NA       NOAA
#> 30: 2020-12-22 14:00:00       KUM            5.0            NA       NOAA
#> 31: 2020-02-18 14:00:00       NWR            3.0            NA       NOAA
#> 32: 2020-01-06 14:00:00       UTA            5.0            NA       NOAA
#> 33: 2020-04-22 14:00:00       UTA            5.0            NA       NOAA
#> 34: 2020-05-05 14:00:00       UTA            5.0            NA       NOAA
#> 35: 2020-05-22 14:00:00       UTA            5.0            NA       NOAA
#> 36: 2020-06-04 14:00:00       UTA            5.0            NA       NOAA
#> 37: 2020-07-09 14:00:00       UTA            5.0            NA       NOAA
#> 38: 2020-08-26 14:00:00       UTA            5.0            NA       NOAA
#> 39: 2020-09-26 14:00:00       UTA            5.0            NA       NOAA
#> 40: 2020-12-13 14:00:00       UTA            5.0            NA       NOAA
#> 41: 2020-12-31 14:00:00       UTA            5.0            NA       NOAA
#>                 timeUTC site_code altitude_final type_altitude lab_1_abbr
#>     dataset_calibration_scale        value latitude longitude site_utc2lst
#>                        <char>        <num>    <num>     <num>        <int>
#>  1:            WMO CH4 X2004A 1.981330e-06  71.3230 -156.6114           -9
#>  2:            WMO CH4 X2004A 1.976885e-06  71.3230 -156.6114           -9
#>  3:            WMO CH4 X2004A 1.974370e-06  71.3230 -156.6114           -9
#>  4:            WMO CH4 X2004A 1.983600e-06  71.3230 -156.6114           -9
#>  5:            WMO CH4 X2004A 1.984495e-06  71.3230 -156.6114           -9
#>  6:            WMO CH4 X2004A 1.978800e-06  71.3230 -156.6114           -9
#>  7:            WMO CH4 X2004A 2.047570e-06  71.3230 -156.6114           -9
#>  8:            WMO CH4 X2004A 2.071295e-06  71.3230 -156.6114           -9
#>  9:            WMO CH4 X2004A 1.993950e-06  71.3230 -156.6114           -9
#> 10:            WMO CH4 X2004A 1.994955e-06  71.3230 -156.6114           -9
#> 11:            WMO CH4 X2004A 1.944370e-06  25.6654  -80.1580           -5
#> 12:            WMO CH4 X2004A 2.000845e-06  25.6654  -80.1580           -5
#> 13:            WMO CH4 X2004A 1.886655e-06  25.6654  -80.1580           -5
#> 14:            WMO CH4 X2004A 1.895580e-06  25.6654  -80.1580           -5
#> 15:            WMO CH4 X2004A 1.891915e-06  25.6654  -80.1580           -5
#> 16:            WMO CH4 X2004A 1.880020e-06  25.6654  -80.1580           -5
#> 17:            WMO CH4 X2004A 1.918770e-06  25.6654  -80.1580           -5
#> 18:            WMO CH4 X2004A 1.965620e-06  25.6654  -80.1580           -5
#> 19:            WMO CH4 X2004A 1.927310e-06  19.5610 -154.8880          -10
#> 20:            WMO CH4 X2004A 1.895440e-06  19.7333 -155.0500          -10
#> 21:            WMO CH4 X2004A 1.914920e-06  19.5610 -154.8880          -10
#> 22:            WMO CH4 X2004A 1.901675e-06  19.5610 -154.8880          -10
#> 23:            WMO CH4 X2004A 1.908090e-06  19.5608 -154.8880          -10
#> 24:            WMO CH4 X2004A 1.907615e-06  19.5608 -154.8880          -10
#> 25:            WMO CH4 X2004A 1.911838e-06  19.5609 -154.8880          -10
#> 26:            WMO CH4 X2004A 1.888437e-06  19.5608 -154.8880          -10
#> 27:            WMO CH4 X2004A 1.891255e-06  19.5610 -154.8880          -10
#> 28:            WMO CH4 X2004A 1.944140e-06  19.5610 -154.8880          -10
#> 29:            WMO CH4 X2004A 1.939297e-06  19.5609 -154.8880          -10
#> 30:            WMO CH4 X2004A 1.938703e-06  19.5610 -154.8880          -10
#> 31:            WMO CH4 X2004A 1.942230e-06  40.0531 -105.5864           -7
#> 32:            WMO CH4 X2004A 1.945065e-06  39.9018 -113.7181           -7
#> 33:            WMO CH4 X2004A 1.938295e-06  39.9018 -113.7181           -7
#> 34:            WMO CH4 X2004A 1.936620e-06  39.9018 -113.7181           -7
#> 35:            WMO CH4 X2004A 1.941100e-06  39.9018 -113.7181           -7
#> 36:            WMO CH4 X2004A 1.895945e-06  39.9018 -113.7181           -7
#> 37:            WMO CH4 X2004A 1.932505e-06  39.9018 -113.7181           -7
#> 38:            WMO CH4 X2004A 1.906940e-06  39.9018 -113.7181           -7
#> 39:            WMO CH4 X2004A 1.939845e-06  39.9018 -113.7181           -7
#> 40:            WMO CH4 X2004A 1.969730e-06  39.9018 -113.7181           -7
#> 41:            WMO CH4 X2004A 1.971970e-06  39.9018 -113.7181           -7
#>     dataset_calibration_scale        value latitude longitude site_utc2lst
#>      year month   day  hour minute second       time time_decimal max_altitude
#>     <int> <int> <int> <int>  <int>  <int>      <int>        <num>        <num>
#>  1:  2020     3    10    14      0      0 1583848800     2020.190         16.5
#>  2:  2020     4     1    14      0      0 1585749600     2020.250         16.5
#>  3:  2020     4    14    14      0      0 1586872800     2020.286         16.5
#>  4:  2020    10     2    14      0      0 1601647200     2020.753         16.5
#>  5:  2020    10     2    14      0      0 1601647200     2020.753         16.5
#>  6:  2020    10     5    14      0      0 1601906400     2020.761         16.5
#>  7:  2020    10    30    14      0      0 1604066400     2020.829         16.5
#>  8:  2020    10    30    14      0      0 1604066400     2020.829         16.5
#>  9:  2020    11    10    14      0      0 1605016800     2020.860         16.5
#> 10:  2020    11    10    14      0      0 1605016800     2020.860         16.5
#> 11:  2020     3    27    14      0      0 1585317600     2020.237          5.0
#> 12:  2020     4    22    14      0      0 1587564000     2020.308          5.0
#> 13:  2020     5    28    14      0      0 1590674400     2020.406          5.0
#> 14:  2020     6     9    14      0      0 1591711200     2020.439          5.0
#> 15:  2020     6    18    14      0      0 1592488800     2020.463          5.0
#> 16:  2020     6    25    14      0      0 1593093600     2020.482          5.0
#> 17:  2020     7    15    14      0      0 1594821600     2020.537          5.0
#> 18:  2020    12    12    14      0      0 1607781600     2020.947          5.0
#> 19:  2020     3    25    14      0      0 1585144800     2020.231          5.0
#> 20:  2020     4     9    14      0      0 1586440800     2020.272          5.0
#> 21:  2020     4    23    14      0      0 1587650400     2020.310          5.0
#> 22:  2020     4    27    14      0      0 1587996000     2020.321          5.0
#> 23:  2020     5    18    14      0      0 1589810400     2020.379          5.0
#> 24:  2020     6    15    14      0      0 1592229600     2020.455          5.0
#> 25:  2020     6    22    14      0      0 1592834400     2020.474          5.0
#> 26:  2020     8     5    14      0      0 1596636000     2020.594          5.0
#> 27:  2020    11     2    14      0      0 1604325600     2020.838          5.0
#> 28:  2020    11    12    14      0      0 1605189600     2020.865          5.0
#> 29:  2020    11    27    14      0      0 1606485600     2020.906          5.0
#> 30:  2020    12    22    14      0      0 1608645600     2020.974          5.0
#> 31:  2020     2    18    14      0      0 1582034400     2020.133          3.0
#> 32:  2020     1     6    14      0      0 1578319200     2020.015          5.0
#> 33:  2020     4    22    14      0      0 1587564000     2020.308          5.0
#> 34:  2020     5     5    14      0      0 1588687200     2020.343          5.0
#> 35:  2020     5    22    14      0      0 1590156000     2020.390          5.0
#> 36:  2020     6     4    14      0      0 1591279200     2020.425          5.0
#> 37:  2020     7     9    14      0      0 1594303200     2020.521          5.0
#> 38:  2020     8    26    14      0      0 1598450400     2020.652          5.0
#> 39:  2020     9    26    14      0      0 1601128800     2020.737          5.0
#> 40:  2020    12    13    14      0      0 1607868000     2020.950          5.0
#> 41:  2020    12    31    14      0      0 1609423200     2020.999          5.0
#>      year month   day  hour minute second       time time_decimal max_altitude
#>     local_time
#>         <lgcl>
#>  1:         NA
#>  2:         NA
#>  3:         NA
#>  4:         NA
#>  5:         NA
#>  6:         NA
#>  7:         NA
#>  8:         NA
#>  9:         NA
#> 10:         NA
#> 11:         NA
#> 12:         NA
#> 13:         NA
#> 14:         NA
#> 15:         NA
#> 16:         NA
#> 17:         NA
#> 18:         NA
#> 19:         NA
#> 20:         NA
#> 21:         NA
#> 22:         NA
#> 23:         NA
#> 24:         NA
#> 25:         NA
#> 26:         NA
#> 27:         NA
#> 28:         NA
#> 29:         NA
#> 30:         NA
#> 31:         NA
#> 32:         NA
#> 33:         NA
#> 34:         NA
#> 35:         NA
#> 36:         NA
#> 37:         NA
#> 38:         NA
#> 39:         NA
#> 40:         NA
#> 41:         NA
#>     local_time

Receptors

Now we can do the last step which is generating the receptor list files. Now we filter selected columns

receptor <- master[, c("site_code",
                     "year",
                     "month",
                     "day",
                     "hour",
                     "minute",
                     "second",
                     "latitude",
                     "longitude",
                     "altitude_final",
                     "type_altitude",
                     "time_decimal")]

We can round altitude also

receptor$altitude_final <- round(receptor$altitude_final)

Now we can format time variables with two digits

receptor <- obs_format(receptor,
                        spf =  c("month", "day",
                                 "hour", "minute", "second"))

We have a column that indicate AGL or ASL

receptor_agl <- receptor[type_altitude == 0]
receptor_asl <- receptor[type_altitude == 1]

Finally, we save the receptors

if(nrow(receptor_agl) > 0) {
  message(paste0(out, "_", datasetid, "_receptor_AGL.txt"), "\n")

  fwrite(x = receptor_agl,
         file = paste0(out, "_", datasetid, "_receptor_AGL.txt"),
         sep = " ")
}

if(nrow(receptor_asl) > 0) {
  message(paste0(out, "_", datasetid, "_receptor_ASL.txt"), "\n")

  fwrite(x = receptor_asl,
         file = paste0(out, "_", datasetid, "receptor_ASL.txt"),
         sep = " ")

}

Plot

Finally, we just plot some data, run it locally

obs_plot(df4, time = "timeUTC", yfactor = 1e9)
#> Found the following sites: 
#>  [1] ABT   BCK   BRA   BRW   CARL  CBY   CHL   CPS   EGB   ESP   EST   ETL  
#> [13] FNE   FORT  FSD   GHG06 GHG09 HNP   HOBB  INU   INX01 INX02 INX07 INX08
#> [25] INX09 INX10 INX13 INX14 LLB   MALJ  MLO   OLI   SGP   THD   TPD   WSA  
#> Plotting the following sites: 
#> [1] ABT BCK
Time series
Time series
library(sf)
dx <- df4[, 
    lapply(.SD, mean),
    .SDcols = "value",
    by = .(latitude, longitude)]
x <- st_as_sf(dx, coords = c("longitude", "latitude"), crs = 4326)
plot(x["value"], axes = T, reset = F)
maps::map(add = T)
Map
Map