cate = c("aircraft-pfp",
"aircraft-insitu",
"aircraft-flask",
"surface-insitu",
"surface-flask",
"surface-pfp",
"tower-insitu",
"aircore",
"shipboard-insitu",
"shipboard-flask")
obs <- "Z:/torf/obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08/data/nc/"
index <- obs_summary(obs = obs,
categories = cate)
#> Number of files of index: 429
#> sector N
#> <char> <int>
#> 1: aircraft-pfp 40
#> 2: aircraft-insitu 15
#> 3: surface-flask 106
#> 4: surface-insitu 174
#> 5: aircraft-flask 4
#> 6: aircore 1
#> 7: surface-pfp 33
#> 8: tower-insitu 51
#> 9: shipboard-flask 4
#> 10: shipboard-insitu 1
#> 11: Total sectors 429
#> Detected 190 files with agl
#> Detected 239 files without agl
Now we read the surface-flask
using the function
obs_read_nc
. solar_time
is included for
surface, so we TRUE
that argument.
datasetid <- "surface-flask"
df <- obs_read_nc(index = index,
categories = datasetid,
solar_time = TRUE,
verbose = TRUE)
#> Searching surface-flask...
#> 1: ch4_abp_surface-flask_1_representative.nc
#> 2: ch4_alt_surface-flask_1_representative.nc
#> 3: ch4_alt_surface-flask_2_representative.nc
#> 4: ch4_alt_surface-flask_45_representative.nc
#> 5: ch4_ams_surface-flask_1_representative.nc
#> 6: ch4_amt_surface-flask_1_representative.nc
#> 7: ch4_amy_surface-flask_1_representative.nc
#> 8: ch4_ara_surface-flask_2_representative.nc
#> 9: ch4_asc_surface-flask_1_representative.nc
#> 10: ch4_ask_surface-flask_1_representative.nc
#> 11: ch4_avi_surface-flask_1_representative.nc
#> 12: ch4_azr_surface-flask_1_representative.nc
#> 13: ch4_bal_surface-flask_1_representative.nc
#> 14: ch4_bhd_surface-flask_1_representative.nc
#> 15: ch4_bik_surface-flask_45_representative.nc
#> 16: ch4_bkt_surface-flask_1_representative.nc
#> 17: ch4_bme_surface-flask_1_representative.nc
#> 18: ch4_bmw_surface-flask_1_representative.nc
#> 19: ch4_bnt_surface-flask_1_representative.nc
#> 20: ch4_brw_surface-flask_1_representative.nc
#> 21: ch4_bsc_surface-flask_1_representative.nc
#> 22: ch4_cba_surface-flask_1_representative.nc
#> 23: ch4_cfa_surface-flask_2_representative.nc
#> 24: ch4_cgo_surface-flask_1_representative.nc
#> 25: ch4_cgo_surface-flask_2_representative.nc
#> 26: ch4_cgo_surface-flask_45_representative.nc
#> 27: ch4_chr_surface-flask_1_representative.nc
#> 28: ch4_cib_surface-flask_1_representative.nc
#> 29: ch4_cmo_surface-flask_1_representative.nc
#> 30: ch4_cpa_surface-flask_2_representative.nc
#> 31: ch4_cpt_surface-flask_1_representative.nc
#> 32: ch4_cri_surface-flask_2_representative.nc
#> 33: ch4_crz_surface-flask_1_representative.nc
#> 34: ch4_cvo_surface-flask_45_representative.nc
#> 35: ch4_cya_surface-flask_2_representative.nc
#> 36: ch4_dsi_surface-flask_1_representative.nc
#> 37: ch4_eic_surface-flask_1_representative.nc
#> 38: ch4_esp_surface-flask_2_representative.nc
#> 39: ch4_gmi_surface-flask_1_representative.nc
#> 40: ch4_goz_surface-flask_1_representative.nc
#> 41: ch4_gpa_surface-flask_2_representative.nc
#> 42: ch4_gvn_surface-flask_45_representative.nc
#> 43: ch4_hba_surface-flask_1_representative.nc
#> 44: ch4_hpb_surface-flask_1_representative.nc
#> 45: ch4_hsu_surface-flask_1_representative.nc
#> 46: ch4_hun_surface-flask_1_representative.nc
#> 47: ch4_ice_surface-flask_1_representative.nc
#> 48: ch4_izo_surface-flask_1_representative.nc
#> 49: ch4_jfj_surface-flask_45_representative.nc
#> 50: ch4_key_surface-flask_1_representative.nc
#> 51: ch4_kjn_surface-flask_45_representative.nc
#> 52: ch4_kum_surface-flask_1_representative.nc
#> 53: ch4_kzd_surface-flask_1_representative.nc
#> 54: ch4_kzm_surface-flask_1_representative.nc
#> 55: ch4_lef_surface-flask_1_representative.nc
#> 56: ch4_llb_surface-flask_1_representative.nc
#> 57: ch4_lln_surface-flask_1_representative.nc
#> 58: ch4_lmp_surface-flask_1_representative.nc
#> 59: ch4_maa_surface-flask_2_representative.nc
#> 60: ch4_mbc_surface-flask_1_representative.nc
#> 61: ch4_mex_surface-flask_1_representative.nc
#> 62: ch4_mhd_surface-flask_1_representative.nc
#> 63: ch4_mid_surface-flask_1_representative.nc
#> 64: ch4_mkn_surface-flask_1_representative.nc
#> 65: ch4_mlo_surface-flask_1_representative.nc
#> 66: ch4_mlo_surface-flask_2_representative.nc
#> 67: ch4_mqa_surface-flask_2_representative.nc
#> 68: ch4_nam_surface-flask_45_representative.nc
#> 69: ch4_nat_surface-flask_1_representative.nc
#> 70: ch4_nmb_surface-flask_1_representative.nc
#> 71: ch4_nwr_surface-flask_1_representative.nc
#> 72: ch4_obn_surface-flask_1_representative.nc
#> 73: ch4_opw_surface-flask_1_representative.nc
#> 74: ch4_ota_surface-flask_2_representative.nc
#> 75: ch4_oxk_surface-flask_1_representative.nc
#> 76: ch4_oxk_surface-flask_45_representative.nc
#> 77: ch4_pal_surface-flask_1_representative.nc
#> 78: ch4_psa_surface-flask_1_representative.nc
#> 79: ch4_pta_surface-flask_1_representative.nc
#> 80: ch4_rpb_surface-flask_1_representative.nc
#> 81: ch4_sdz_surface-flask_1_representative.nc
#> 82: ch4_sey_surface-flask_1_representative.nc
#> 83: ch4_sgi_surface-flask_1_representative.nc
#> 84: ch4_sgp_surface-flask_1_representative.nc
#> 85: ch4_shm_surface-flask_1_representative.nc
#> 86: ch4_sis_surface-flask_2_representative.nc
#> 87: ch4_sis_surface-flask_45_representative.nc
#> 88: ch4_smo_surface-flask_1_representative.nc
#> 89: ch4_spo_surface-flask_1_representative.nc
#> 90: ch4_spo_surface-flask_2_representative.nc
#> 91: ch4_stm_surface-flask_1_representative.nc
#> 92: ch4_sum_surface-flask_1_representative.nc
#> 93: ch4_syo_surface-flask_1_representative.nc
#> 94: ch4_tac_surface-flask_1_representative.nc
#> 95: ch4_tap_surface-flask_1_representative.nc
#> 96: ch4_thd_surface-flask_1_representative.nc
#> 97: ch4_tik_surface-flask_1_representative.nc
#> 98: ch4_ush_surface-flask_1_representative.nc
#> 99: ch4_uta_surface-flask_1_representative.nc
#> 100: ch4_uum_surface-flask_1_representative.nc
#> 101: ch4_vrs_surface-flask_45_representative.nc
#> 102: ch4_wis_surface-flask_1_representative.nc
#> 103: ch4_wkt_surface-flask_1_representative.nc
#> 104: ch4_wlg_surface-flask_1_representative.nc
#> 105: ch4_zep_surface-flask_1_representative.nc
#> 106: ch4_zot_surface-flask_45_representative.nc
Now we check the data
df
#> year month day hour minute second time start_time
#> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1: 2006 10 27 15 50 0 1161964200 1161964200
#> 2: 2006 11 3 16 24 0 1162571040 1162571040
#> 3: 2006 11 18 15 47 0 1163864820 1163864820
#> 4: 2006 11 24 15 54 0 1164383640 1164383640
#> 5: 2006 12 1 17 2 0 1164992520 1164992520
#> ---
#> 107797: 2021 11 28 12 30 0 1638102600 1638102600
#> 107798: 2021 12 5 8 56 0 1638694560 1638694560
#> 107799: 2021 12 12 9 46 0 1639302360 1639302360
#> 107800: 2021 12 19 7 59 0 1639900740 1639900740
#> 107801: 2021 12 26 12 51 0 1640523060 1640523060
#> midpoint_time datetime time_decimal time_interval
#> <int> <char> <num> <int>
#> 1: 1161964200 2006-10-27T15:50:00Z 2006.821 3600
#> 2: 1162571040 2006-11-03T16:24:00Z 2006.840 3600
#> 3: 1163864820 2006-11-18T15:47:00Z 2006.881 3600
#> 4: 1164383640 2006-11-24T15:54:00Z 2006.898 3600
#> 5: 1164992520 2006-12-01T17:02:00Z 2006.917 3600
#> ---
#> 107797: 1638102600 2021-11-28T12:30:00Z 2021.908 3600
#> 107798: 1638694560 2021-12-05T08:56:00Z 2021.927 3600
#> 107799: 1639302360 2021-12-12T09:46:00Z 2021.946 3600
#> 107800: 1639900740 2021-12-19T07:59:00Z 2021.965 3600
#> 107801: 1640523060 2021-12-26T12:51:00Z 2021.985 3600
#> value value_unc nvalue value_std_dev latitude longitude altitude
#> <num> <num> <int> <num> <num> <num> <num>
#> 1: 1.736280e-06 2.05e-09 2 7.636753e-10 -12.76 -38.16 6
#> 2: 1.739605e-06 2.05e-09 2 5.444722e-10 -12.76 -38.16 6
#> 3: 1.731770e-06 2.05e-09 2 1.159655e-09 -12.76 -38.16 6
#> 4: 1.733130e-06 2.05e-09 2 1.131371e-10 -12.76 -38.16 6
#> 5: 1.745230e-06 2.05e-09 2 5.232590e-10 -12.76 -38.16 6
#> ---
#> 107797: 2.039500e-06 NA 1 8.600000e-10 60.75 89.38 411
#> 107798: 2.102740e-06 NA 1 7.900000e-10 60.75 89.38 411
#> 107799: 2.087280e-06 NA 1 3.100000e-10 60.75 89.38 411
#> 107800: 2.067420e-06 NA 1 2.000000e-09 60.75 89.38 411
#> 107801: 2.024140e-06 NA 1 6.000000e-09 60.75 89.38 411
#> elevation intake_height qcflag instrument analysis_datetime method
#> <num> <num> <char> <char> <char> <char>
#> 1: 1 5 ... H4 2007-04-02T12:43:00 G
#> 2: 1 5 ... H4 2007-02-20T13:29:00 G
#> 3: 1 5 ... H4 2007-02-20T13:00:00 G
#> 4: 1 5 ... H4 2007-02-20T09:55:00 G
#> 5: 1 5 ... H4 2007-02-16T15:39:00 G
#> ---
#> 107797: 111 300 0 <NA> <NA> <NA>
#> 107798: 111 300 0 <NA> <NA> <NA>
#> 107799: 111 300 0 <NA> <NA> <NA>
#> 107800: 111 300 0 <NA> <NA> <NA>
#> 107801: 111 300 0 <NA> <NA> <NA>
#> event_number air_sample_container_id obs_flag obspack_num
#> <char> <char> <int> <int>
#> 1: 231671,231672 1779-99,1780-99 1 1033168
#> 2: 229105,229106 6717-66,6718-66 1 1033169
#> 3: 229103,229104 2969-99,2970-99 1 1033170
#> 4: 229081,229082 3217-99,3218-99 1 1033171
#> 5: 229079,229080 759-99,760-99 1 1033172
#> ---
#> 107797: <NA> <NA> 1 10187962
#> 107798: <NA> <NA> 1 10187963
#> 107799: <NA> <NA> 1 10187964
#> 107800: <NA> <NA> 1 10187965
#> 107801: <NA> <NA> 1 10187966
#> obspack_id
#> <char>
#> 1: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033168
#> 2: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033169
#> 3: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033170
#> 4: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033171
#> 5: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_abp_surface-flask_1_representative~1033172
#> ---
#> 107797: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187962
#> 107798: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187963
#> 107799: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187964
#> 107800: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187965
#> 107801: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_zot_surface-flask_45_representative~10187966
#> unique_sample_location_num year_st month_st day_st hour_st minute_st
#> <int> <int> <int> <int> <int> <int>
#> 1: 21955180 2006 10 27 13 33
#> 2: 21955181 2006 11 3 14 7
#> 3: 21955182 2006 11 18 13 28
#> 4: 21955183 2006 11 24 13 34
#> 5: 21955184 2006 12 1 14 39
#> ---
#> 107797: 54363784 2021 11 28 18 38
#> 107798: 54363785 2021 12 5 15 2
#> 107799: 54363786 2021 12 12 15 49
#> 107800: 54887377 2021 12 19 13 59
#> 107801: 54363787 2021 12 26 18 47
#> second_st scale site_elevation_unit dataset_project
#> <int> <char> <char> <char>
#> 1: 36 WMO CH4 X2004A masl surface-flask
#> 2: 42 WMO CH4 X2004A masl surface-flask
#> 3: 40 WMO CH4 X2004A masl surface-flask
#> 4: 4 WMO CH4 X2004A masl surface-flask
#> 5: 42 WMO CH4 X2004A masl surface-flask
#> ---
#> 107797: 56 WMO CH4 X2004A masl surface-flask
#> 107798: 19 WMO CH4 X2004A masl surface-flask
#> 107799: 21 WMO CH4 X2004A masl surface-flask
#> 107800: 9 WMO CH4 X2004A masl surface-flask
#> 107801: 54 WMO CH4 X2004A masl surface-flask
#> dataset_selection_tag site_name site_elevation site_latitude
#> <char> <char> <num> <num>
#> 1: representative Arembepe, Bahia 1 -12.77
#> 2: representative Arembepe, Bahia 1 -12.77
#> 3: representative Arembepe, Bahia 1 -12.77
#> 4: representative Arembepe, Bahia 1 -12.77
#> 5: representative Arembepe, Bahia 1 -12.77
#> ---
#> 107797: representative Zotino, Siberia 111 60.75
#> 107798: representative Zotino, Siberia 111 60.75
#> 107799: representative Zotino, Siberia 111 60.75
#> 107800: representative Zotino, Siberia 111 60.75
#> 107801: representative Zotino, Siberia 111 60.75
#> site_longitude site_country site_code site_utc2lst lab_1_abbr
#> <num> <char> <char> <num> <char>
#> 1: -38.17 Brazil ABP -3 NOAA
#> 2: -38.17 Brazil ABP -3 NOAA
#> 3: -38.17 Brazil ABP -3 NOAA
#> 4: -38.17 Brazil ABP -3 NOAA
#> 5: -38.17 Brazil ABP -3 NOAA
#> ---
#> 107797: 89.38 Russia ZOT 7 MPI-BGC
#> 107798: 89.38 Russia ZOT 7 MPI-BGC
#> 107799: 89.38 Russia ZOT 7 MPI-BGC
#> 107800: 89.38 Russia ZOT 7 MPI-BGC
#> 107801: 89.38 Russia ZOT 7 MPI-BGC
#> dataset_calibration_scale altitude_final type_altitude
#> <char> <num> <num>
#> 1: WMO CH4 X2004A 5 NA
#> 2: WMO CH4 X2004A 5 NA
#> 3: WMO CH4 X2004A 5 NA
#> 4: WMO CH4 X2004A 5 NA
#> 5: WMO CH4 X2004A 5 NA
#> ---
#> 107797: WMO CH4 X2004A 300 0
#> 107798: WMO CH4 X2004A 300 0
#> 107799: WMO CH4 X2004A 300 0
#> 107800: WMO CH4 X2004A 300 0
#> 107801: WMO CH4 X2004A 300 0
#> dataset_intake_ht_unit
#> <char>
#> 1: <NA>
#> 2: <NA>
#> 3: <NA>
#> 4: <NA>
#> 5: <NA>
#> ---
#> 107797: magl
#> 107798: magl
#> 107799: magl
#> 107800: magl
#> 107801: magl
Now we can process the data. We first filter for observations within our spatial domain:
Checks and definitions
north <- 80
south <- 10
west <- -170
east <- -50
max_altitude <- 8000
yy <- 2020
evening <- 14
We check altitude, intake_height, altitude_final and elevation. altitude_final is a column from intake_height, added to match column from obs_read text files.
df[, c("altitude", "altitude_final", "intake_height", "elevation",
"dataset_selection_tag",
"site_name")]
#> altitude altitude_final intake_height elevation dataset_selection_tag
#> <num> <num> <num> <num> <char>
#> 1: 6 5 5 1 representative
#> 2: 6 5 5 1 representative
#> 3: 6 5 5 1 representative
#> 4: 6 5 5 1 representative
#> 5: 6 5 5 1 representative
#> ---
#> 107797: 411 300 300 111 representative
#> 107798: 411 300 300 111 representative
#> 107799: 411 300 300 111 representative
#> 107800: 411 300 300 111 representative
#> 107801: 411 300 300 111 representative
#> site_name
#> <char>
#> 1: Arembepe, Bahia
#> 2: Arembepe, Bahia
#> 3: Arembepe, Bahia
#> 4: Arembepe, Bahia
#> 5: Arembepe, Bahia
#> ---
#> 107797: Zotino, Siberia
#> 107798: Zotino, Siberia
#> 107799: Zotino, Siberia
#> 107800: Zotino, Siberia
#> 107801: Zotino, Siberia
The temporal range of data is
range(df$year)
#> [1] 1983 2021
We also check for dimensions of data
dim(df)
#> [1] 107801 53
Filters
df <- df[year == yy]
df <- df[altitude_final < max_altitude &
latitude < north &
latitude > south &
longitude < east &
longitude > west]
dim(df)
#> [1] 799 53
Towers can have observations at different heights. Here we need to
select one site with the observations registered at the highest height.
The column with the height is named altitude_final
and the
max altitude was named max_altitude
.
#> site_code max_altitude
#> <char> <num>
#> 1: BMW 21.3
#> 2: BRW 16.5
#> 3: CBA 35.7
#> 4: KEY 5.0
#> 5: KUM 5.0
#> 6: LEF 396.0
#> 7: MEX 5.0
#> 8: MLO 40.0
#> 9: NWR 3.0
#> 10: RPB 5.0
#> 11: SGP 60.0
#> 12: UTA 5.0
Key Time
Here we need to start time columns. The function
obs_addtime
adds time columns timeUTC
,
timeUTC_start
which shows the start time of each
observation and timeUTC_end
which shows the end time for
each observation.
df2 <- obs_addtime(df)
#> Adding timeUTC
#> Adding timeUTC_start
#> Adding timeUTC_end
#> Found time_interval
Then we need a key_time to aggregate data. This can be done using UTC, solar, or local time. The normal approach is using afternoon solar or local time.
Hierarchy of solar or local time
- Solar time
- Local time with columns
site_utc2lst
- Local time longitude
solar time (default)
Here we select the hours of interest and then aggregate data by year, month and day of solar time. In this way, we will have one information per day. however this approach is not appropriate for aircraft which are aggregated every 10 or 20 seconds. Hence we need to aggregate data by one time column. Also, this helps to generate the receptor info files including hour, minute and second. Hence, we need to add solar or local time column.
df2$solar_time <- obs_addstime(df2)
local time with column
site_utc2lst
Then we need to identify the local time with the function
add_ltime
. This is important because to identifying
observations in the evening in local time. add_ltime
uses
two methods, first identify the time difference with utc by identifying
the metadata column “site_utc2lst”. If solar time is not available #now
we need to cut solar time for the frequency needed. As we will work
with
local time longitude
If this information is not available, with the aircrafts for instance, the local time is calculated with an approximation based on longitude:
Where is the local time, the time, the coordinate. Then, the time is cut every two hours. Now, we identify the local time to select evening hours.
Cut time
Now we have they key column time, we can cut it accordingly.
df2$solar_time_cut <- cut(x = df2$solar_time,
breaks = "1 hour") |>
as.character()
How we can check the solar time and the cut solar time. Please note that solar_time_cut, the column that it will be used to aggregate data
How we filter for the required solar time, in this case 14.
#> solar_time solar_time_cut
#> <POSc> <char>
#> 1: 2020-03-10 14:22:19 2020-03-10 14:00:00
#> 2: 2020-04-01 14:28:02 2020-04-01 14:00:00
#> 3: 2020-04-14 14:52:45 2020-04-14 14:00:00
#> 4: 2020-10-02 14:24:04 2020-10-02 14:00:00
#> 5: 2020-10-02 14:44:04 2020-10-02 14:00:00
#> 6: 2020-10-05 14:15:59 2020-10-05 14:00:00
#> 7: 2020-10-30 14:06:56 2020-10-30 14:00:00
#> 8: 2020-10-30 14:24:56 2020-10-30 14:00:00
#> 9: 2020-11-10 14:04:10 2020-11-10 14:00:00
#> 10: 2020-11-10 14:22:10 2020-11-10 14:00:00
#> 11: 2020-03-27 14:23:57 2020-03-27 14:00:00
#> 12: 2020-04-22 14:09:09 2020-04-22 14:00:00
#> 13: 2020-05-28 14:19:16 2020-05-28 14:00:00
#> 14: 2020-06-09 14:38:15 2020-06-09 14:00:00
#> 15: 2020-06-18 14:29:22 2020-06-18 14:00:00
#> 16: 2020-06-25 14:18:50 2020-06-25 14:00:00
#> 17: 2020-07-15 14:18:25 2020-07-15 14:00:00
#> 18: 2020-12-12 14:41:10 2020-12-12 14:00:00
#> 19: 2020-03-25 14:30:43 2020-03-25 14:00:00
#> 20: 2020-04-09 14:27:40 2020-04-09 14:00:00
#> 21: 2020-04-23 14:23:37 2020-04-23 14:00:00
#> 22: 2020-04-27 14:54:16 2020-04-27 14:00:00
#> 23: 2020-05-18 14:16:12 2020-05-18 14:00:00
#> 24: 2020-06-15 14:03:53 2020-06-15 14:00:00
#> 25: 2020-06-15 14:17:53 2020-06-15 14:00:00
#> 26: 2020-06-22 14:24:21 2020-06-22 14:00:00
#> 27: 2020-06-22 14:41:21 2020-06-22 14:00:00
#> 28: 2020-08-05 14:25:26 2020-08-05 14:00:00
#> 29: 2020-08-05 14:39:26 2020-08-05 14:00:00
#> 30: 2020-11-02 14:18:47 2020-11-02 14:00:00
#> 31: 2020-11-02 14:32:47 2020-11-02 14:00:00
#> 32: 2020-11-12 14:13:45 2020-11-12 14:00:00
#> 33: 2020-11-27 14:14:50 2020-11-27 14:00:00
#> 34: 2020-11-27 14:28:50 2020-11-27 14:00:00
#> 35: 2020-12-22 14:11:13 2020-12-22 14:00:00
#> 36: 2020-12-22 14:28:13 2020-12-22 14:00:00
#> 37: 2020-02-18 14:27:33 2020-02-18 14:00:00
#> 38: 2020-01-06 14:27:37 2020-01-06 14:00:00
#> 39: 2020-04-22 14:26:55 2020-04-22 14:00:00
#> 40: 2020-05-05 14:25:44 2020-05-05 14:00:00
#> 41: 2020-05-22 14:34:41 2020-05-22 14:00:00
#> 42: 2020-06-04 14:51:57 2020-06-04 14:00:00
#> 43: 2020-07-09 14:04:58 2020-07-09 14:00:00
#> 44: 2020-08-26 14:52:18 2020-08-26 14:00:00
#> 45: 2020-09-26 14:44:19 2020-09-26 14:00:00
#> 46: 2020-12-13 14:33:29 2020-12-13 14:00:00
#> 47: 2020-12-31 14:17:13 2020-12-31 14:00:00
#> solar_time solar_time_cut
At this point we can calculate the averages of several columns by the
cut time. The function obs_agg
does this aggregation as
shown in the following lines of code. The argument gby
establish the function used to aggregate cols
. I need to
aggregate the data by date (year, month, date), because it is already
filtered by the hours of interest. Then, I would have 1 observation per
day.
As standard, let us define key_time
as
solar_time
. The obs_agg
function will
aggregate the desired data by that column.
df3$key_time <- df3$solar_time_cut
df4 <- obs_agg(dt = df3,
cols = c("value",
"latitude",
"longitude",
"site_utc2lst"),
verbose = T,
byalt = TRUE)
#> Selecting by alt
#> Adding time
Here we add the column max_altitude
to identify the max
altitude by site.
df4[,
max_altitude := max(altitude_final),
by = site_code]
df4[,
c("site_code",
"altitude_final",
"max_altitude")] |> unique()
#> site_code altitude_final max_altitude
#> <char> <num> <num>
#> 1: BRW 5.0 16.5
#> 2: BRW 16.5 16.5
#> 3: KEY 5.0 5.0
#> 4: KUM 5.0 5.0
#> 5: NWR 3.0 3.0
#> 6: UTA 5.0 5.0
Master
Before generating the receptors list, we have the database with all the required information
master <- df4
We may replace missing values with a nine nines. Here is commented
#master[is.na(master)] <- 999999999
We transform the time variables to character and round coordinates with 4 digits
master$timeUTC <- as.character(master$timeUTC)
master$local_time <- as.character(master$local_time)
master$latitude <- round(master$latitude, 4)
master$longitude <- round(master$longitude, 4)
Save master
Finally we save the master file
out <- tempfile()
txt
message(paste0(out,"_", datasetid, ".txt\n"))
fwrite(master,
paste0(out,"_", datasetid, ".txt"),
sep = " ")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.txt
csv
message(paste0(out,"_", datasetid, ".csv\n"))
fwrite(master,
paste0(out,"_", datasetid, ".csv"),
sep = ",")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.csv
csvy
CSVY are csv files with a YAML header to include metadata in tabulated text files
cat("\nAdding notes in csvy:\n")
notes <- c(paste0("sector: ", datasetid),
paste0("timespan: ", yy),
paste0("spatial_limits: north = ", north, ", south = ", south, ", east = ", east, ", west = ", west),
paste0("altitude: < ", max_altitude),
paste0("hours: ", evening),
"local_time: used solar_time")
cat(notes, sep = "\n")
message(paste0(out,"_", datasetid, ".csvy\n"))
obs_write_csvy(dt = master,
notes = notes,
out = paste0(out,"_", datasetid, ".csvy"))
#> Adding notes in csvy:
#> sector: surface-insitu
#> timespan: 2020
#> spatial_limits: north = 80, south = 10, east = -50, west = -170
#> data: Data averaged every 20 seconds
#> altitude: < 8000
#> hours: 14
#> local_time: used solar_time
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.csvy
obs_read_csvy(paste0(out,"_", datasetid, ".csvy"))
#> [1] "---"
#> [2] "name: Metadata "
#> [3] "sector: surface-flask"
#> [4] "timespan: 2020"
#> [5] "spatial_limits: north = 80, south = 10, east = -50, west = -170"
#> [6] "data: Data averaged every 20 seconds"
#> [7] "altitude: < 8000"
#> [8] "hours: 14"
#> [9] "local_time: used solar_time"
#> [10] "structure: "
#> [11] "Classes 'data.table' and 'data.frame':\t41 obs. of 20 variables:"
#> [12] " $ timeUTC : chr \"2020-03-10 14:00:00\" \".."
#> [13] " $ site_code : chr \"BRW\" \"BRW\" ..."
#> [14] " $ altitude_final : num 5 5 5 16.5 5 ..."
#> [15] " $ type_altitude : num NA NA NA NA NA ..."
#> [16] " $ lab_1_abbr : chr \"NOAA\" \"NOAA\" ..."
#> [17] " $ dataset_calibration_scale: chr \"WMO CH4 X2004A\" \"WMO \".."
#> [18] " $ value : num 1.98e-06 1.98e-06 ..."
#> [19] " $ latitude : num 71.3 71.3 ..."
#> [20] " $ longitude : num -157 -157 ..."
#> [21] " $ site_utc2lst : num -9 -9 -9 -9 -9 ..."
#> [22] " $ year : int 2020 2020 2020 2020 202.."
#> [23] " $ month : int 3 4 4 10 10 ..."
#> [24] " $ day : chr \"10\" \"01\" ..."
#> [25] " $ hour : int 14 14 14 14 14 ..."
#> [26] " $ minute : int 0 0 0 0 0 ..."
#> [27] " $ second : int 0 0 0 0 0 ..."
#> [28] " $ time : num 1.58e+09 1.59e+09 ..."
#> [29] " $ time_decimal : num 2020 2020 ..."
#> [30] " $ max_altitude : num 16.5 16.5 16.5 16.5 16..."
#> [31] " $ local_time : chr NA NA ..."
#> [32] " - attr(*, \".internal.selfref\")=<externalptr> "
#> [33] "NULL"
#> [34] "---"
#> timeUTC site_code altitude_final type_altitude lab_1_abbr
#> <POSc> <char> <num> <lgcl> <char>
#> 1: 2020-03-10 14:00:00 BRW 5.0 NA NOAA
#> 2: 2020-04-01 14:00:00 BRW 5.0 NA NOAA
#> 3: 2020-04-14 14:00:00 BRW 5.0 NA NOAA
#> 4: 2020-10-02 14:00:00 BRW 16.5 NA NOAA
#> 5: 2020-10-02 14:00:00 BRW 5.0 NA NOAA
#> 6: 2020-10-05 14:00:00 BRW 16.5 NA NOAA
#> 7: 2020-10-30 14:00:00 BRW 16.5 NA NOAA
#> 8: 2020-10-30 14:00:00 BRW 5.0 NA NOAA
#> 9: 2020-11-10 14:00:00 BRW 16.5 NA NOAA
#> 10: 2020-11-10 14:00:00 BRW 5.0 NA NOAA
#> 11: 2020-03-27 14:00:00 KEY 5.0 NA NOAA
#> 12: 2020-04-22 14:00:00 KEY 5.0 NA NOAA
#> 13: 2020-05-28 14:00:00 KEY 5.0 NA NOAA
#> 14: 2020-06-09 14:00:00 KEY 5.0 NA NOAA
#> 15: 2020-06-18 14:00:00 KEY 5.0 NA NOAA
#> 16: 2020-06-25 14:00:00 KEY 5.0 NA NOAA
#> 17: 2020-07-15 14:00:00 KEY 5.0 NA NOAA
#> 18: 2020-12-12 14:00:00 KEY 5.0 NA NOAA
#> 19: 2020-03-25 14:00:00 KUM 5.0 NA NOAA
#> 20: 2020-04-09 14:00:00 KUM 5.0 NA NOAA
#> 21: 2020-04-23 14:00:00 KUM 5.0 NA NOAA
#> 22: 2020-04-27 14:00:00 KUM 5.0 NA NOAA
#> 23: 2020-05-18 14:00:00 KUM 5.0 NA NOAA
#> 24: 2020-06-15 14:00:00 KUM 5.0 NA NOAA
#> 25: 2020-06-22 14:00:00 KUM 5.0 NA NOAA
#> 26: 2020-08-05 14:00:00 KUM 5.0 NA NOAA
#> 27: 2020-11-02 14:00:00 KUM 5.0 NA NOAA
#> 28: 2020-11-12 14:00:00 KUM 5.0 NA NOAA
#> 29: 2020-11-27 14:00:00 KUM 5.0 NA NOAA
#> 30: 2020-12-22 14:00:00 KUM 5.0 NA NOAA
#> 31: 2020-02-18 14:00:00 NWR 3.0 NA NOAA
#> 32: 2020-01-06 14:00:00 UTA 5.0 NA NOAA
#> 33: 2020-04-22 14:00:00 UTA 5.0 NA NOAA
#> 34: 2020-05-05 14:00:00 UTA 5.0 NA NOAA
#> 35: 2020-05-22 14:00:00 UTA 5.0 NA NOAA
#> 36: 2020-06-04 14:00:00 UTA 5.0 NA NOAA
#> 37: 2020-07-09 14:00:00 UTA 5.0 NA NOAA
#> 38: 2020-08-26 14:00:00 UTA 5.0 NA NOAA
#> 39: 2020-09-26 14:00:00 UTA 5.0 NA NOAA
#> 40: 2020-12-13 14:00:00 UTA 5.0 NA NOAA
#> 41: 2020-12-31 14:00:00 UTA 5.0 NA NOAA
#> timeUTC site_code altitude_final type_altitude lab_1_abbr
#> dataset_calibration_scale value latitude longitude site_utc2lst
#> <char> <num> <num> <num> <int>
#> 1: WMO CH4 X2004A 1.981330e-06 71.3230 -156.6114 -9
#> 2: WMO CH4 X2004A 1.976885e-06 71.3230 -156.6114 -9
#> 3: WMO CH4 X2004A 1.974370e-06 71.3230 -156.6114 -9
#> 4: WMO CH4 X2004A 1.983600e-06 71.3230 -156.6114 -9
#> 5: WMO CH4 X2004A 1.984495e-06 71.3230 -156.6114 -9
#> 6: WMO CH4 X2004A 1.978800e-06 71.3230 -156.6114 -9
#> 7: WMO CH4 X2004A 2.047570e-06 71.3230 -156.6114 -9
#> 8: WMO CH4 X2004A 2.071295e-06 71.3230 -156.6114 -9
#> 9: WMO CH4 X2004A 1.993950e-06 71.3230 -156.6114 -9
#> 10: WMO CH4 X2004A 1.994955e-06 71.3230 -156.6114 -9
#> 11: WMO CH4 X2004A 1.944370e-06 25.6654 -80.1580 -5
#> 12: WMO CH4 X2004A 2.000845e-06 25.6654 -80.1580 -5
#> 13: WMO CH4 X2004A 1.886655e-06 25.6654 -80.1580 -5
#> 14: WMO CH4 X2004A 1.895580e-06 25.6654 -80.1580 -5
#> 15: WMO CH4 X2004A 1.891915e-06 25.6654 -80.1580 -5
#> 16: WMO CH4 X2004A 1.880020e-06 25.6654 -80.1580 -5
#> 17: WMO CH4 X2004A 1.918770e-06 25.6654 -80.1580 -5
#> 18: WMO CH4 X2004A 1.965620e-06 25.6654 -80.1580 -5
#> 19: WMO CH4 X2004A 1.927310e-06 19.5610 -154.8880 -10
#> 20: WMO CH4 X2004A 1.895440e-06 19.7333 -155.0500 -10
#> 21: WMO CH4 X2004A 1.914920e-06 19.5610 -154.8880 -10
#> 22: WMO CH4 X2004A 1.901675e-06 19.5610 -154.8880 -10
#> 23: WMO CH4 X2004A 1.908090e-06 19.5608 -154.8880 -10
#> 24: WMO CH4 X2004A 1.907615e-06 19.5608 -154.8880 -10
#> 25: WMO CH4 X2004A 1.911838e-06 19.5609 -154.8880 -10
#> 26: WMO CH4 X2004A 1.888437e-06 19.5608 -154.8880 -10
#> 27: WMO CH4 X2004A 1.891255e-06 19.5610 -154.8880 -10
#> 28: WMO CH4 X2004A 1.944140e-06 19.5610 -154.8880 -10
#> 29: WMO CH4 X2004A 1.939297e-06 19.5609 -154.8880 -10
#> 30: WMO CH4 X2004A 1.938703e-06 19.5610 -154.8880 -10
#> 31: WMO CH4 X2004A 1.942230e-06 40.0531 -105.5864 -7
#> 32: WMO CH4 X2004A 1.945065e-06 39.9018 -113.7181 -7
#> 33: WMO CH4 X2004A 1.938295e-06 39.9018 -113.7181 -7
#> 34: WMO CH4 X2004A 1.936620e-06 39.9018 -113.7181 -7
#> 35: WMO CH4 X2004A 1.941100e-06 39.9018 -113.7181 -7
#> 36: WMO CH4 X2004A 1.895945e-06 39.9018 -113.7181 -7
#> 37: WMO CH4 X2004A 1.932505e-06 39.9018 -113.7181 -7
#> 38: WMO CH4 X2004A 1.906940e-06 39.9018 -113.7181 -7
#> 39: WMO CH4 X2004A 1.939845e-06 39.9018 -113.7181 -7
#> 40: WMO CH4 X2004A 1.969730e-06 39.9018 -113.7181 -7
#> 41: WMO CH4 X2004A 1.971970e-06 39.9018 -113.7181 -7
#> dataset_calibration_scale value latitude longitude site_utc2lst
#> year month day hour minute second time time_decimal max_altitude
#> <int> <int> <int> <int> <int> <int> <int> <num> <num>
#> 1: 2020 3 10 14 0 0 1583848800 2020.190 16.5
#> 2: 2020 4 1 14 0 0 1585749600 2020.250 16.5
#> 3: 2020 4 14 14 0 0 1586872800 2020.286 16.5
#> 4: 2020 10 2 14 0 0 1601647200 2020.753 16.5
#> 5: 2020 10 2 14 0 0 1601647200 2020.753 16.5
#> 6: 2020 10 5 14 0 0 1601906400 2020.761 16.5
#> 7: 2020 10 30 14 0 0 1604066400 2020.829 16.5
#> 8: 2020 10 30 14 0 0 1604066400 2020.829 16.5
#> 9: 2020 11 10 14 0 0 1605016800 2020.860 16.5
#> 10: 2020 11 10 14 0 0 1605016800 2020.860 16.5
#> 11: 2020 3 27 14 0 0 1585317600 2020.237 5.0
#> 12: 2020 4 22 14 0 0 1587564000 2020.308 5.0
#> 13: 2020 5 28 14 0 0 1590674400 2020.406 5.0
#> 14: 2020 6 9 14 0 0 1591711200 2020.439 5.0
#> 15: 2020 6 18 14 0 0 1592488800 2020.463 5.0
#> 16: 2020 6 25 14 0 0 1593093600 2020.482 5.0
#> 17: 2020 7 15 14 0 0 1594821600 2020.537 5.0
#> 18: 2020 12 12 14 0 0 1607781600 2020.947 5.0
#> 19: 2020 3 25 14 0 0 1585144800 2020.231 5.0
#> 20: 2020 4 9 14 0 0 1586440800 2020.272 5.0
#> 21: 2020 4 23 14 0 0 1587650400 2020.310 5.0
#> 22: 2020 4 27 14 0 0 1587996000 2020.321 5.0
#> 23: 2020 5 18 14 0 0 1589810400 2020.379 5.0
#> 24: 2020 6 15 14 0 0 1592229600 2020.455 5.0
#> 25: 2020 6 22 14 0 0 1592834400 2020.474 5.0
#> 26: 2020 8 5 14 0 0 1596636000 2020.594 5.0
#> 27: 2020 11 2 14 0 0 1604325600 2020.838 5.0
#> 28: 2020 11 12 14 0 0 1605189600 2020.865 5.0
#> 29: 2020 11 27 14 0 0 1606485600 2020.906 5.0
#> 30: 2020 12 22 14 0 0 1608645600 2020.974 5.0
#> 31: 2020 2 18 14 0 0 1582034400 2020.133 3.0
#> 32: 2020 1 6 14 0 0 1578319200 2020.015 5.0
#> 33: 2020 4 22 14 0 0 1587564000 2020.308 5.0
#> 34: 2020 5 5 14 0 0 1588687200 2020.343 5.0
#> 35: 2020 5 22 14 0 0 1590156000 2020.390 5.0
#> 36: 2020 6 4 14 0 0 1591279200 2020.425 5.0
#> 37: 2020 7 9 14 0 0 1594303200 2020.521 5.0
#> 38: 2020 8 26 14 0 0 1598450400 2020.652 5.0
#> 39: 2020 9 26 14 0 0 1601128800 2020.737 5.0
#> 40: 2020 12 13 14 0 0 1607868000 2020.950 5.0
#> 41: 2020 12 31 14 0 0 1609423200 2020.999 5.0
#> year month day hour minute second time time_decimal max_altitude
#> local_time
#> <lgcl>
#> 1: NA
#> 2: NA
#> 3: NA
#> 4: NA
#> 5: NA
#> 6: NA
#> 7: NA
#> 8: NA
#> 9: NA
#> 10: NA
#> 11: NA
#> 12: NA
#> 13: NA
#> 14: NA
#> 15: NA
#> 16: NA
#> 17: NA
#> 18: NA
#> 19: NA
#> 20: NA
#> 21: NA
#> 22: NA
#> 23: NA
#> 24: NA
#> 25: NA
#> 26: NA
#> 27: NA
#> 28: NA
#> 29: NA
#> 30: NA
#> 31: NA
#> 32: NA
#> 33: NA
#> 34: NA
#> 35: NA
#> 36: NA
#> 37: NA
#> 38: NA
#> 39: NA
#> 40: NA
#> 41: NA
#> local_time
Receptors
Now we can do the last step which is generating the receptor list files. Now we filter selected columns
receptor <- master[, c("site_code",
"year",
"month",
"day",
"hour",
"minute",
"second",
"latitude",
"longitude",
"altitude_final",
"type_altitude",
"time_decimal")]
We can round altitude also
receptor$altitude_final <- round(receptor$altitude_final)
Now we can format time variables with two digits
receptor <- obs_format(receptor,
spf = c("month", "day",
"hour", "minute", "second"))
We have a column that indicate AGL or ASL
receptor_agl <- receptor[type_altitude == 0]
receptor_asl <- receptor[type_altitude == 1]
Finally, we save the receptors
if(nrow(receptor_agl) > 0) {
message(paste0(out, "_", datasetid, "_receptor_AGL.txt"), "\n")
fwrite(x = receptor_agl,
file = paste0(out, "_", datasetid, "_receptor_AGL.txt"),
sep = " ")
}
if(nrow(receptor_asl) > 0) {
message(paste0(out, "_", datasetid, "_receptor_ASL.txt"), "\n")
fwrite(x = receptor_asl,
file = paste0(out, "_", datasetid, "receptor_ASL.txt"),
sep = " ")
}
Plot
Finally, we just plot some data, run it locally
obs_plot(df4, time = "timeUTC", yfactor = 1e9)
#> Found the following sites:
#> [1] ABT BCK BRA BRW CARL CBY CHL CPS EGB ESP EST ETL
#> [13] FNE FORT FSD GHG06 GHG09 HNP HOBB INU INX01 INX02 INX07 INX08
#> [25] INX09 INX10 INX13 INX14 LLB MALJ MLO OLI SGP THD TPD WSA
#> Plotting the following sites:
#> [1] ABT BCK
library(sf)
dx <- df4[,
lapply(.SD, mean),
.SDcols = "value",
by = .(latitude, longitude)]
x <- st_as_sf(dx, coords = c("longitude", "latitude"), crs = 4326)
plot(x["value"], axes = T, reset = F)
maps::map(add = T)