cate = c("aircraft-pfp",
"aircraft-insitu",
"aircraft-flask",
"surface-insitu",
"surface-flask",
"surface-pfp",
"tower-insitu",
"aircore",
"shipboard-insitu",
"shipboard-flask")
obs <- "Z:/torf/obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08/data/nc/"
index <- obs_summary(obs = obs,
categories = cate)
#> Number of files of index: 429
#> sector N
#> <char> <int>
#> 1: aircraft-pfp 40
#> 2: aircraft-insitu 15
#> 3: surface-flask 106
#> 4: surface-insitu 174
#> 5: aircraft-flask 4
#> 6: aircore 1
#> 7: surface-pfp 33
#> 8: tower-insitu 51
#> 9: shipboard-flask 4
#> 10: shipboard-insitu 1
#> 11: Total sectors 429
#> Detected 190 files with agl
#> Detected 239 files without agl
Now we read the shipboard-flask
using the function
obs_read_nc
. solar_time
is included for
surface, so we TRUE
that argument.
datasetid <- "surface-flask"
df <- obs_read_nc(index = index,
categories = datasetid,
solar_time = TRUE,
verbose = TRUE)
#> Searching shipboard-flask...
#> 1: ch4_drp_shipboard-flask_1_representative.nc
#> 2: ch4_poc_shipboard-flask_1_representative.nc
#> 3: ch4_scs_shipboard-flask_1_representative.nc
#> 4: ch4_wpc_shipboard-flask_1_representative.nc
Now we check the data
df
#> year month day hour minute second time start_time midpoint_time
#> <int> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1: 2006 3 22 20 40 0 1143060000 1143060000 1143060000
#> 2: 2006 3 23 13 0 0 1143118800 1143118800 1143118800
#> 3: 2006 3 24 8 10 0 1143187800 1143187800 1143187800
#> 4: 2006 3 25 12 10 0 1143288600 1143288600 1143288600
#> 5: 2006 3 25 17 54 0 1143309240 1143309240 1143309240
#> ---
#> 8721: 2013 5 30 14 46 0 1369925160 1369925160 1369925160
#> 8722: 2013 5 31 0 14 0 1369959240 1369959240 1369959240
#> 8723: 2013 5 31 10 8 0 1369994880 1369994880 1369994880
#> 8724: 2013 6 1 4 2 0 1370059320 1370059320 1370059320
#> 8725: 2013 6 1 23 26 0 1370129160 1370129160 1370129160
#> datetime time_decimal time_interval value value_unc
#> <char> <num> <int> <num> <num>
#> 1: 2006-03-22T20:40:00Z 2006.222 3600 1.724095e-06 2.05e-09
#> 2: 2006-03-23T13:00:00Z 2006.223 3600 1.711290e-06 2.05e-09
#> 3: 2006-03-24T08:10:00Z 2006.226 3600 1.711190e-06 2.05e-09
#> 4: 2006-03-25T12:10:00Z 2006.229 3600 1.709910e-06 2.05e-09
#> 5: 2006-03-25T17:54:00Z 2006.229 3600 1.710660e-06 2.05e-09
#> ---
#> 8721: 2013-05-30T14:46:00Z 2013.410 3600 1.803340e-06 9.10e-10
#> 8722: 2013-05-31T00:14:00Z 2013.411 3600 1.809640e-06 9.10e-10
#> 8723: 2013-05-31T10:08:00Z 2013.412 3600 1.846435e-06 9.10e-10
#> 8724: 2013-06-01T04:02:00Z 2013.414 3600 1.862995e-06 9.10e-10
#> 8725: 2013-06-01T23:26:00Z 2013.416 3600 1.865480e-06 9.10e-10
#> nvalue value_std_dev latitude longitude altitude elevation intake_height
#> <int> <num> <num> <num> <num> <num> <num>
#> 1: 2 1.251579e-09 -55.00 -64.91 10 0 10
#> 2: 2 1.612203e-09 -56.06 -64.60 10 0 10
#> 3: 2 1.979899e-09 -57.11 -64.25 10 0 10
#> 4: 2 3.917372e-09 -57.83 -64.01 10 0 10
#> 5: 2 1.103087e-09 -58.20 -63.88 10 0 10
#> ---
#> 8721: 2 2.404163e-10 15.01 144.52 10 0 10
#> 8722: 2 2.404163e-10 17.53 143.31 10 0 10
#> 8723: 2 4.030509e-10 20.09 142.05 10 0 10
#> 8724: 2 1.343503e-10 25.00 139.59 10 0 10
#> 8725: 2 9.333809e-10 30.11 136.91 10 0 10
#> qcflag instrument analysis_datetime method event_number
#> <char> <char> <char> <char> <char>
#> 1: ... H4 2006-04-20T11:00:00 N 215659
#> 2: ... H4 2006-04-20T13:10:00 N 215648
#> 3: ... H4 2006-04-20T14:50:00 N 215645
#> 4: ... H4 2006-04-20T15:19:00 N 215643
#> 5: ... H4 2006-04-20T12:27:00 N 215657
#> ---
#> 8721: ... H11 2013-07-31T11:21:00 G 354983
#> 8722: ... H11 2013-07-30T10:12:00 G 354993
#> 8723: ... H11 2013-07-30T09:40:00 G 354995
#> 8724: ... H11 2013-07-30T09:24:00 G 354990
#> 8725: ... H11 2013-07-29T16:34:00 G 354992
#> air_sample_container_id obs_flag obspack_num
#> <char> <int> <int>
#> 1: 4114-99 1 1059927
#> 2: 3943-99 1 1059928
#> 3: 3356-99 1 1059929
#> 4: 2962-99 1 1059930
#> 5: 1248-99 1 1059931
#> ---
#> 8721: 2783-99 1 1130189
#> 8722: 1042-99 1 1130190
#> 8723: 4167-99 1 1130191
#> 8724: 3595-99 1 1130192
#> 8725: 4080-99 1 1130193
#> obspack_id
#> <char>
#> 1: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_drp_shipboard-flask_1_representative~1059927
#> 2: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_drp_shipboard-flask_1_representative~1059928
#> 3: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_drp_shipboard-flask_1_representative~1059929
#> 4: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_drp_shipboard-flask_1_representative~1059930
#> 5: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_drp_shipboard-flask_1_representative~1059931
#> ---
#> 8721: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_wpc_shipboard-flask_1_representative~1130189
#> 8722: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_wpc_shipboard-flask_1_representative~1130190
#> 8723: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_wpc_shipboard-flask_1_representative~1130191
#> 8724: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_wpc_shipboard-flask_1_representative~1130192
#> 8725: obspack_ch4_1_GLOBALVIEWplus_v5.1_2023-03-08~ch4_wpc_shipboard-flask_1_representative~1130193
#> unique_sample_location_num year_st month_st day_st hour_st minute_st
#> <int> <int> <int> <int> <int> <int>
#> 1: 1047155 2006 3 22 16 13
#> 2: 1047150 2006 3 23 8 34
#> 3: 1047146 2006 3 24 3 46
#> 4: 1047142 2006 3 25 7 47
#> 5: 1047139 2006 3 25 13 32
#> ---
#> 8721: 22039456 2013 5 31 0 26
#> 8722: 22039471 2013 5 31 9 49
#> 8723: 22039489 2013 5 31 19 38
#> 8724: 22039499 2013 6 1 13 22
#> 8725: 22039524 2013 6 2 8 36
#> second_st scale site_elevation_unit dataset_project
#> <int> <char> <char> <char>
#> 1: 7 WMO CH4 X2004A masl shipboard-flask
#> 2: 41 WMO CH4 X2004A masl shipboard-flask
#> 3: 24 WMO CH4 X2004A masl shipboard-flask
#> 4: 40 WMO CH4 X2004A masl shipboard-flask
#> 5: 11 WMO CH4 X2004A masl shipboard-flask
#> ---
#> 8721: 48 WMO CH4 X2004A masl shipboard-flask
#> 8722: 48 WMO CH4 X2004A masl shipboard-flask
#> 8723: 46 WMO CH4 X2004A masl shipboard-flask
#> 8724: 47 WMO CH4 X2004A masl shipboard-flask
#> 8725: 3 WMO CH4 X2004A masl shipboard-flask
#> dataset_selection_tag site_name site_elevation site_latitude
#> <char> <char> <num> <num>
#> 1: representative Drake Passage 0 -5.9e+01
#> 2: representative Drake Passage 0 -5.9e+01
#> 3: representative Drake Passage 0 -5.9e+01
#> 4: representative Drake Passage 0 -5.9e+01
#> 5: representative Drake Passage 0 -5.9e+01
#> ---
#> 8721: representative Western Pacific Cruise 0 -1.0e+34
#> 8722: representative Western Pacific Cruise 0 -1.0e+34
#> 8723: representative Western Pacific Cruise 0 -1.0e+34
#> 8724: representative Western Pacific Cruise 0 -1.0e+34
#> 8725: representative Western Pacific Cruise 0 -1.0e+34
#> site_longitude site_country site_code site_utc2lst lab_1_abbr
#> <num> <char> <char> <num> <char>
#> 1: -6.469e+01 N/A DRP -4 NOAA
#> 2: -6.469e+01 N/A DRP -4 NOAA
#> 3: -6.469e+01 N/A DRP -4 NOAA
#> 4: -6.469e+01 N/A DRP -4 NOAA
#> 5: -6.469e+01 N/A DRP -4 NOAA
#> ---
#> 8721: -1.000e+34 N/A WPC NA NOAA
#> 8722: -1.000e+34 N/A WPC NA NOAA
#> 8723: -1.000e+34 N/A WPC NA NOAA
#> 8724: -1.000e+34 N/A WPC NA NOAA
#> 8725: -1.000e+34 N/A WPC NA NOAA
#> dataset_calibration_scale altitude_final type_altitude
#> <char> <num> <lgcl>
#> 1: WMO CH4 X2004A 10 NA
#> 2: WMO CH4 X2004A 10 NA
#> 3: WMO CH4 X2004A 10 NA
#> 4: WMO CH4 X2004A 10 NA
#> 5: WMO CH4 X2004A 10 NA
#> ---
#> 8721: WMO CH4 X2004A 10 NA
#> 8722: WMO CH4 X2004A 10 NA
#> 8723: WMO CH4 X2004A 10 NA
#> 8724: WMO CH4 X2004A 10 NA
#> 8725: WMO CH4 X2004A 10 NA
Now we can process the data. We first filter for observations within our spatial domain:
Checks and definitions
yy <- 2020
evening <- 14
We check altitude, intake_height, altitude_final and elevation. altitude_final is a column from intake_height, added to match column from obs_read text files.
df[, c("altitude", "altitude_final", "intake_height", "elevation",
"dataset_selection_tag",
"site_name")]
#> altitude altitude_final intake_height elevation dataset_selection_tag
#> <num> <num> <num> <num> <char>
#> 1: 10 10 10 0 representative
#> 2: 10 10 10 0 representative
#> 3: 10 10 10 0 representative
#> 4: 10 10 10 0 representative
#> 5: 10 10 10 0 representative
#> ---
#> 8721: 10 10 10 0 representative
#> 8722: 10 10 10 0 representative
#> 8723: 10 10 10 0 representative
#> 8724: 10 10 10 0 representative
#> 8725: 10 10 10 0 representative
#> site_name
#> <char>
#> 1: Drake Passage
#> 2: Drake Passage
#> 3: Drake Passage
#> 4: Drake Passage
#> 5: Drake Passage
#> ---
#> 8721: Western Pacific Cruise
#> 8722: Western Pacific Cruise
#> 8723: Western Pacific Cruise
#> 8724: Western Pacific Cruise
#> 8725: Western Pacific Cruise
The temporal range of data is
range(df$year)
#> [1] 1986 2021
We also check for dimensions of data
dim(df)
#> [1] 8725 52
Filters
df <- df[year == yy]
dim(df)
#> [1] 8 52
Towers can have observations at different heights. Here we need to
select one site with the observations registered at the highest height.
The column with the height is named altitude_final
and the
max altitude was named max_altitude
.
#> site_code max_altitude
#> <char> <num>
#> 1: DRP 10
Key Time
Here we need to start time columns. The function
obs_addtime
adds time columns timeUTC
,
timeUTC_start
which shows the start time of each
observation and timeUTC_end
which shows the end time for
each observation.
df2 <- obs_addtime(df)
#> Adding timeUTC
#> Adding timeUTC_start
#> Adding timeUTC_end
#> Found time_interval
Then we need a key_time to aggregate data. This can be done using UTC, solar, or local time. The normal approach is using afternoon solar or local time.
Hierarchy of solar or local time
- Solar time
- Local time with columns
site_utc2lst
- Local time longitude
solar time (default)
Here we select the hours of interest and then aggregate data by year, month and day of solar time. In this way, we will have one information per day. however this approach is not appropriate for aircraft which are aggregated every 10 or 20 seconds. Hence we need to aggregate data by one time column. Also, this helps to generate the receptor info files including hour, minute and second. Hence, we need to add solar or local time column.
df2$solar_time <- obs_addstime(df2)
local time with column
site_utc2lst
Then we need to identify the local time with the function
add_ltime
. This is important because to identifying
observations in the evening in local time. add_ltime
uses
two methods, first identify the time difference with utc by identifying
the metadata column “site_utc2lst”. If solar time is not available #now
we need to cut solar time for the frequency needed. As we will work
with
local time longitude
If this information is not available, with the aircrafts for instance, the local time is calculated with an approximation based on longitude:
Where is the local time, the time, the coordinate. Then, the time is cut every two hours. Now, we identify the local time to select evening hours.
Cut time
Now we have they key column time, we can cut it accordingly.
df2$solar_time_cut <- cut(x = df2$solar_time,
breaks = "1 hour") |>
as.character()
How we can check the solar time and the cut solar time. Please note that solar_time_cut, the column that it will be used to aggregate data
How we filter for the required solar time, in this case 14.
df3 <- df2
df3[, c("solar_time", "solar_time_cut")]
#> solar_time solar_time_cut
#> <POSc> <char>
#> 1: 2020-01-01 15:15:55 2020-01-01 15:00:00
#> 2: 2020-02-09 20:49:04 2020-02-09 20:00:00
#> 3: 2020-02-18 22:09:41 2020-02-18 22:00:00
#> 4: 2020-03-18 15:56:42 2020-03-18 15:00:00
#> 5: 2020-06-07 17:50:25 2020-06-07 17:00:00
#> 6: 2020-06-19 03:58:54 2020-06-19 03:00:00
#> 7: 2020-12-01 07:08:23 2020-12-01 07:00:00
#> 8: 2020-12-27 03:03:01 2020-12-27 03:00:00
At this point we can calculate the averages of several columns by the
cut time. The function obs_agg
does this aggregation as
shown in the following lines of code. The argument gby
establish the function used to aggregate cols
. I need to
aggregate the data by date (year, month, date), because it is already
filtered by the hours of interest. Then, I would have 1 observation per
day.
As standard, let us define key_time
as
solar_time
. The obs_agg
function will
aggregate the desired data by that column.
df3$key_time <- df3$solar_time_cut
df4 <- obs_agg(dt = df3,
cols = c("value",
"latitude",
"longitude",
"site_utc2lst"),
verbose = T,
byalt = TRUE)
#> Selecting by alt
#> Adding time
Here we add the column max_altitude
to identify the max
altitude by site.
df4[,
max_altitude := max(altitude_final),
by = site_code]
df4[,
c("site_code",
"altitude_final",
"max_altitude")] |> unique()
#> site_code altitude_final max_altitude
#> <char> <num> <num>
#> 1: DRP 10 10
Master
Before generating the receptors list, we have the database with all the required information
master <- df4
We may replace missing values with a nine nines. Here is commented
#master[is.na(master)] <- 999999999
We transform the time variables to character and round coordinates with 4 digits
master$timeUTC <- as.character(master$timeUTC)
master$local_time <- as.character(master$local_time)
master$latitude <- round(master$latitude, 4)
master$longitude <- round(master$longitude, 4)
Save master
Finally we save the master file
out <- tempfile()
txt
message(paste0(out,"_", datasetid, ".txt\n"))
fwrite(master,
paste0(out,"_", datasetid, ".txt"),
sep = " ")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpMzbU8P\file5e287aa53d25_shipboard-flask.txt
csv
message(paste0(out,"_", datasetid, ".csv\n"))
fwrite(master,
paste0(out,"_", datasetid, ".csv"),
sep = ",")
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpMzbU8P\file5e287aa53d25_shipboard-flask.csv
csvy
CSVY are csv files with a YAML header to include metadata in tabulated text files
cat("\nAdding notes in csvy:\n")
notes <- c(paste0("sector: ", datasetid),
paste0("timespan: ", yy),
paste0("hours: ", evening),
"local_time: used solar_time")
cat(notes, sep = "\n")
message(paste0(out,"_", datasetid, ".csvy\n"))
obs_write_csvy(dt = master,
notes = notes,
out = paste0(out,"_", datasetid, ".csvy"))
#> Adding notes in csvy:
#> sector: surface-insitu
#> timespan: 2020
#> spatial_limits: north = 80, south = 10, east = -50, west = -170
#> data: Data averaged every 20 seconds
#> altitude: < 8000
#> hours: 14
#> local_time: used solar_time
#> C:\Users\sibarrae\AppData\Local\Temp\RtmpSMAWpf\file9558428068b2_surface-flask.csvy
obs_read_csvy(paste0(out,"_", datasetid, ".csvy"))
#> [1] "---"
#> [2] "name: Metadata "
#> [3] "sector: shipboard-flask"
#> [4] "timespan: 2020"
#> [5] "hours: 14"
#> [6] "local_time: used solar_time"
#> [7] "structure: "
#> [8] "Classes 'data.table' and 'data.frame':\t8 obs. of 20 variables:"
#> [9] " $ timeUTC : chr \"2020-01-01 15:00:00\" \".."
#> [10] " $ site_code : chr \"DRP\" \"DRP\" ..."
#> [11] " $ altitude_final : num 10 10 10 10 10 ..."
#> [12] " $ type_altitude : logi NA NA NA ..."
#> [13] " $ lab_1_abbr : chr \"NOAA\" \"NOAA\" ..."
#> [14] " $ dataset_calibration_scale: chr \"WMO CH4 X2004A\" \"WMO \".."
#> [15] " $ value : num 1.81e-06 1.81e-06 ..."
#> [16] " $ latitude : num -59 -59 ..."
#> [17] " $ longitude : num -63.7 -63.7 ..."
#> [18] " $ site_utc2lst : num -4 -4 -4 -4 -4 ..."
#> [19] " $ year : int 2020 2020 2020 2020 202.."
#> [20] " $ month : int 1 2 2 3 6 ..."
#> [21] " $ day : chr \"01\" \"09\" ..."
#> [22] " $ hour : int 15 20 22 15 17 ..."
#> [23] " $ minute : int 0 0 0 0 0 ..."
#> [24] " $ second : int 0 0 0 0 0 ..."
#> [25] " $ time : num 1.58e+09 1.58e+09 ..."
#> [26] " $ time_decimal : num 2020 2020 ..."
#> [27] " $ max_altitude : num 10 10 10 10 10 ..."
#> [28] " $ local_time : chr NA NA ..."
#> [29] " - attr(*, \".internal.selfref\")=<externalptr> "
#> [30] "NULL"
#> [31] "---"
#> timeUTC site_code altitude_final type_altitude lab_1_abbr
#> <POSc> <char> <int> <lgcl> <char>
#> 1: 2020-01-01 15:00:00 DRP 10 NA NOAA
#> 2: 2020-02-09 20:00:00 DRP 10 NA NOAA
#> 3: 2020-02-18 22:00:00 DRP 10 NA NOAA
#> 4: 2020-03-18 15:00:00 DRP 10 NA NOAA
#> 5: 2020-06-07 17:00:00 DRP 10 NA NOAA
#> 6: 2020-06-19 03:00:00 DRP 10 NA NOAA
#> 7: 2020-12-01 07:00:00 DRP 10 NA NOAA
#> 8: 2020-12-27 03:00:00 DRP 10 NA NOAA
#> dataset_calibration_scale value latitude longitude site_utc2lst year
#> <char> <num> <num> <num> <int> <int>
#> 1: WMO CH4 X2004A 1.810345e-06 -59.0000 -63.6833 -4 2020
#> 2: WMO CH4 X2004A 1.805695e-06 -58.9833 -63.6833 -4 2020
#> 3: WMO CH4 X2004A 1.804080e-06 -59.0167 -62.8167 -4 2020
#> 4: WMO CH4 X2004A 1.803575e-06 -58.9500 -64.2667 -4 2020
#> 5: WMO CH4 X2004A 1.821020e-06 -58.9500 -68.2167 -4 2020
#> 6: WMO CH4 X2004A 1.824280e-06 -58.9833 -64.9667 -4 2020
#> 7: WMO CH4 X2004A 1.833670e-06 -59.0050 -67.7335 -4 2020
#> 8: WMO CH4 X2004A 1.825290e-06 -58.9750 -63.7260 -4 2020
#> month day hour minute second time time_decimal max_altitude
#> <int> <int> <int> <int> <int> <int> <num> <int>
#> 1: 1 1 15 0 0 1577890800 2020.002 10
#> 2: 2 9 20 0 0 1581278400 2020.109 10
#> 3: 2 18 22 0 0 1582063200 2020.134 10
#> 4: 3 18 15 0 0 1584543600 2020.212 10
#> 5: 6 7 17 0 0 1591549200 2020.434 10
#> 6: 6 19 3 0 0 1592535600 2020.465 10
#> 7: 12 1 7 0 0 1606806000 2020.916 10
#> 8: 12 27 3 0 0 1609038000 2020.987 10
#> local_time
#> <lgcl>
#> 1: NA
#> 2: NA
#> 3: NA
#> 4: NA
#> 5: NA
#> 6: NA
#> 7: NA
#> 8: NA
Receptors
Now we can do the last step which is generating the receptor list files. Now we filter selected columns
receptor <- master[, c("site_code",
"year",
"month",
"day",
"hour",
"minute",
"second",
"latitude",
"longitude",
"altitude_final",
"type_altitude",
"time_decimal")]
We can round altitude also
receptor$altitude_final <- round(receptor$altitude_final)
Now we can format time variables with two digits
receptor <- obs_format(receptor,
spf = c("month", "day",
"hour", "minute", "second"))
We have a column that indicate AGL or ASL
receptor_agl <- receptor[type_altitude == 0]
receptor_asl <- receptor[type_altitude == 1]
Finally, we save the receptors
if(nrow(receptor_agl) > 0) {
message(paste0(out, "_", datasetid, "_receptor_AGL.txt"), "\n")
fwrite(x = receptor_agl,
file = paste0(out, "_", datasetid, "_receptor_AGL.txt"),
sep = " ")
}
if(nrow(receptor_asl) > 0) {
message(paste0(out, "_", datasetid, "_receptor_ASL.txt"), "\n")
fwrite(x = receptor_asl,
file = paste0(out, "_", datasetid, "receptor_ASL.txt"),
sep = " ")
}
Plot
Finally, we just plot some data, run it locally
obs_plot(df4, time = "timeUTC", yfactor = 1e9)
#> Found the following sites:
#> [1] DRP
#> Plotting the following sites:
#> [1] DRP
#> png
#> 2