hysplit • rtorf

library(rtorf)
library(data.table)
library(rslurm)

Assuming you have a receptor file as shown in the previous articles/vignettes:

Get sure you know your working directory. You can use getwd() to check it. You can set it as

setwd("/path/to/myfootprints/")

Read receptor

x <- fread("receptor.csv")

Rename variables and add the expected footprint id name


x$altitude  <- x$altitude_final

x[, id := obs_footname(year = year, 
                       month = month, 
                       day = day, 
                       hour = hour, 
                       minute = minute, 
                       lat = latitude, 
                       lon = longitude, 
                       alt = altitude)]

for instance

x$id[1]
[1] "2020x12x30x09x54x03.2133Nx030.9131Ex00497"

Which include

year (for digits)
x
month (two digits)
x
day (two digits)
x
hour (two digits)
x
minute (two digits)
x
latitude (floating point 6 digits, 4 decimals)
Capital N or S
longitude (floating point 7 digits, 4 decimals)
Capital for E or W
x
altitude (five digits)

Now we need to create the directories where we will be running hysplit

x[, dir := paste0("/Path/To/Footprints/",
                  year, 
                  "/", 
                  sprintf("%02d", 
                          month), 
                  "/tmp_", 
                  id)]

For instance

x$dir[1]
[1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497"

The structure is:

path
year (four digits)
month (2 digits)
tmp_ + id

Now we can create the directories recursively. If you run this command again you will receive warning that the directory exists and nothing will be overwritten

invisible(sapply(x$dir, dir.create, recursive = T))

We can add an index for each row

x[, idn := 1:.N]

Now we can create the SETUP in each directory

for(i in seq_along(x$dir)) {
  obs_hysplit_setup(setup = paste0(x$dir[i], "/SETUP.CFG"))
}

Now we can create the ASCDATA in each directory

for(i in seq_along(x$dir)) {
  obs_hysplit_ascdata(ascdata = paste0(x$dir[i], "/ASCDATA.CFG"))
}

And now the same with the CONTROL files:

for(i in seq_along(x$dir)) {
  # print(paste0(x$dir[i], "/CONTROL"))
  obs_hysplit_control(df = x[i], 
                      top_model_domain = 10000,
                      met = "gfs0p25",
                      metpath = "/Path/To/metfiles/gfs0p25/",
                      emissions_rate = 0,
                      hour_emissions = 0.01,
                      center_conc_grids = c(5, 45),
                      grid_spacing = c(1, 1),
                      grid_span = c(69, 69),
                      height_vert_levels = 50,
                      sampling_interval_type = c(0, 1, 0),
                      control = paste0(x$dir[i], "/CONTROL"))
  
}

We can order the data.table by id, just in case

setorderv(x, "idn")

We have some internal R scripts to transform hysplit outputs into a NetCDF file. We are currently porting the functions into an R package, well documented, available and easy to use. So we need to check if the file exists:

x[, nc := paste0(dir,  "/hysplit", id, ".nc")]

x[, nc_exists := file.exists(paste0(dir,  "/hysplit", id, ".nc"))]

for instance:

x$nc[1]
[1] "/Path/To/Footprints/2020/12/tmp_2020x12x30x09x54x03.2133Nx030.9131Ex00497/hysplit2020x12x30x09x54x03.2133Nx030.9131Ex00497.nc"

We can check the number of NetCDF generated:

x[,.N, by = nc_exists]

x <- x[nc_exists == FALSE]

if(nrow(x) == 0) stop("ALL FOOTPRINTS GENERATED")

Now we can write the function to run parallel hysplit:

fx <- function(dir, idn){

  torf <- "
          *@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@&         
      %@@@@@.                                    %@@@@@     
    @@@@                          @                  ,@@@   
   @@@                       *@@@@                     %@@& 
  @@@             @@@@@@.@@@@@@@@@@@#                   @@@ 
  @@@          @@@@%@@* @@@@@@@@@            /@@@@@(    ,@@,
  @@@        @@@@@ @@@@@@@@@@@#       ,@(           *   ,@@,
  @@@      @@@@@@% (@@@@@@@@@                           ,@@,
  @@@    &@@@@@@@@  @@@@@@@@@, @@*          (@@@@@@@#   ,@@,
  @@@   @@@@@@@@@@   @@@@@@@@@@@(                       ,@@,
  @@@     @@@@@@@@@  @@@@@@@@@@@@@@@&                   ,@@,
  @@@  @.  .@@@@@@,  @@@@@@@.                           ,@@,
  @@@  @@@   @@@@@  %@@ @@@                             ,@@,
  @@@  @@@@.       &   /                                ,@@,
  @@@                                                   ,@@,
  @@@                                                   ,@@,
  @@@   @@@@@@@@@* @@@@@@@@@@   @@@@@@@@@@  @@@@@@@@@@  ,@@,
  @@@      @@@    @@@      @@@@ @@@    @@@  @@@         ,@@,
  @@@      @@@   #@@@       @@@ @@@@@@@@@.  @@@@@@@.    ,@@,
  @@@      @@@    @@@@*   @@@@  @@@  @@@@   @@@         @@@ 
   @@@#    @@@      (@@@@@@@    @@@    @@@. @@@        @@@  
     @@@@                                           .@@@@   
       %@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@      
                                                          
  "
  

  setwd(dir)
  
  system("/Path/To/hysplit/exec/hycs_std")
  
  sink("log.txt")
  cat("")
  
  cat(torf)
  cat("\n\n")
  
  utils::sessionInfo()
  cat("\n\n")
  
  cat("Receptor:\n")
  cat("receptor.csv\n\n")
  
  cat("logs:\n")
  cat("_rslurm_rtorf_job/slurm_*.out\n\n")

  sink()
  

  rdir="/Path/To/rscripts/"
    
  system(
    paste0('Rscript ',
           rdir,
           '/hysplit_netcdf.r ',
           '--rsource=',
           rdir,
           ' --gridspecs=',
           rdir, 
           '/gridspecs_uganda.txt',
           ' --plotfoot',
           ' --footnearfield',
           ' --thinpart',
           ' --outpath=', 
           dir,
           '/')) 

}

and to submit parallel jobs we use:

sjob <- slurm_apply(fx,
                    x[, c("dir", "idn")], 
                    jobname = 'rtorf_job',
                    nodes = 8, 
                    cpus_per_node = 4, 
                    submit = T)

You can use submit = FALSE and check and edit the script

file.edit("_rslurm_rtorf_job/submit.sh")
#!/bin/bash
#
#SBATCH --array=0-7
#SBATCH --cpus-per-task=4
#SBATCH --job-name=rtorf_job
#SBATCH --output=slurm_%a.out
/Path/To/R/bin/Rscript --vanilla slurm_run.R