chopin
automatically distributes geospatial data
computation over multiple threads.chopin
workflowpar_pad_*
functions to par_grid
,
or running par_hierarchy
, or par_multirasters
functions at once.temp_dir <- tempdir(check = TRUE)
url_nccnty <-
paste0(
"https://raw.githubusercontent.com/",
"ropensci/chopin/refs/heads/main/",
"tests/testdata/nc_hierarchy.gpkg"
)
url_ncelev <-
paste0(
"https://raw.githubusercontent.com/",
"ropensci/chopin/refs/heads/main/",
"tests/testdata/nc_srtm15_otm.tif"
)
nccnty_path <- file.path(temp_dir, "nc_hierarchy.gpkg")
ncelev_path <- file.path(temp_dir, "nc_srtm15_otm.tif")
# download data
download.file(url_nccnty, nccnty_path, mode = "wb", quiet = TRUE)
download.file(url_ncelev, ncelev_path, mode = "wb", quiet = TRUE)
nccnty <- terra::vect(nccnty_path, layer = "county")
ncelev <- terra::rast(ncelev_path)
ncgrid <- par_pad_grid(ncsamp, mode = "grid", nx = 4L, ny = 2L, padding = 10000)
plot(ncgrid$original)
par_*
functions operate on
future
backends, users should define the future plan before
running the functions. multicore
plan supports
terra
objects which may lead to faster computation, but it
is not supported in Windows. An alternative is
future.mirai
’s mirai_multisession
plan, which
is supported in many platforms and generally faster than plain future
multisession plan.workers
argument should be defined with an integer
value to specify the number of threads to be used.extract_at
runs on the grid
polygons.pg <-
par_grid(
grids = ncgrid,
pad_y = FALSE,
.debug = TRUE,
fun_dist = extract_at,
x = ncelev_path,
y = sf::st_as_sf(ncsamp),
id = "pid",
radius = 1e4,
func = "mean"
)
# also possible in mirai with par_*_mirai functions
# mirai daemons should be created first
mirai::daemons(n = 2L)
pg <-
par_grid_mirai(
grids = ncgrid,
pad_y = FALSE,
.debug = TRUE,
fun_dist = extract_at,
x = ncelev_path,
y = sf::st_as_sf(ncsamp),
id = "pid",
radius = 1e4,
func = "mean"
)
mirai::daemons(n = 0L)
## Reading layer `county' from data source `/tmp/RtmpbClS6n/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 100 features and 1 field
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
## Reading layer `tracts' from data source `/tmp/RtmpbClS6n/nc_hierarchy.gpkg' using driver `GPKG'
## Simple feature collection with 2672 features and 1 field
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 1054155 ymin: 1341756 xmax: 1838923 ymax: 1690176
## Projected CRS: NAD83 / Conus Albers
px <-
par_hierarchy(
# from here the par_hierarchy-specific arguments
regions = nctrct,
regions_id = "GEOID",
length_left = 5,
pad = 10000,
pad_y = FALSE,
.debug = TRUE,
# from here are the dispatched function definition
# for parallel workers
fun_dist = extract_at,
# below should follow the arguments of the dispatched function
x = ncelev,
y = sf::st_as_sf(ncsamp),
id = "pid",
radius = 1e4,
func = "mean"
)
dim(px)
## [1] 10000 2
## pid mean
## 1 55 -2.242934
## 2 83 7.638708
## 3 89 7.517975
## 4 190 8.162208
## 5 271 -8.555017
## 6 358 9.314404
## pid mean
## 9995 9213 -4.7030168
## 9996 9483 5.4337306
## 9997 9608 6.7779775
## 9998 9766 -4.9676342
## 9999 9907 -0.5347484
## 10000 9932 -4.1729984
ncelev <- terra::rast(ncelev_path)
tdir <- tempdir(check = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test1.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test2.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test3.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test4.tif"), overwrite = TRUE)
terra::writeRaster(ncelev, file.path(tdir, "test5.tif"), overwrite = TRUE)
rasts <- list.files(tdir, pattern = "tif$", full.names = TRUE)
pm <-
par_multirasters(
filenames = rasts,
fun_dist = extract_at,
x = NA,
y = sf::st_as_sf(ncsamp)[1:500, ],
id = "pid",
radius = 1e4,
func = "mean",
.debug = TRUE
)
dim(pm)
## [1] 3000 2
## mean base_raster
## 1 -5.211319 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 2 304.373932 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 3 158.474487 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 4 37.591179 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 5 10.516018 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## 6 18.762609 /tmp/RtmpbClS6n/nc_srtm15_otm.tif
## mean base_raster
## 2995 36.50384 /tmp/RtmpbClS6n/test5.tif
## 2996 220.32663 /tmp/RtmpbClS6n/test5.tif
## 2997 51.63021 /tmp/RtmpbClS6n/test5.tif
## 2998 151.88278 /tmp/RtmpbClS6n/test5.tif
## 2999 755.19885 /tmp/RtmpbClS6n/test5.tif
## 3000 225.11125 /tmp/RtmpbClS6n/test5.tif
From version 0.9.9, a custom function that takes sf
objects in x
and y
arguments can be used with
par_grid
and par_hierarchy
functions. The
custom function should return a data.frame or tibble object.
An example below demonstrates to compute the floor area ratio (or area-enclosed ratio, AER) in 100 meters circular buffers of points from random points in central Seoul, South Korea.
gpkg_path <- system.file("extdata/osm_seoul.gpkg", package = "chopin")
bldg <- sf::st_read(gpkg_path, layer = "buildings")
## Reading layer `buildings' from data source
## `/tmp/Rtmpns0kJS/Rinst3f5f861c2ced8/chopin/extdata/osm_seoul.gpkg'
## using driver `GPKG'
## Simple feature collection with 11112 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 319332.8 ymin: 4159652 xmax: 325432.1 ymax: 4166901
## Projected CRS: WGS 84 / UTM zone 52N
## Reading layer `points' from data source
## `/tmp/Rtmpns0kJS/Rinst3f5f861c2ced8/chopin/extdata/osm_seoul.gpkg'
## using driver `GPKG'
## Simple feature collection with 595 features and 1 field
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: 319096.8 ymin: 4159684 xmax: 325296.8 ymax: 4166884
## Projected CRS: WGS 84 / UTM zone 52N
# plot
plot(sf::st_geometry(bldg), col = "lightgrey")
plot(sf::st_geometry(grdpnt), add = TRUE, pch = 20, col = "blue")
# Radius for AER (meters)
radius_m <- 100
# This function will be dispatched in parallel over computational grids.
aer_at_points <-
function(
x, y,
radius,
id_col = "pid",
floors_col = "floors",
area_col = "foot_m2"
) {
if (!inherits(x, "sf") && !inherits(y, "sf")) {
x <- sf::st_as_sf(x)
y <- sf::st_as_sf(y)
}
yorig <- y
y <- sf::st_geometry(y)
# Buffers around each point
buf <- sf::st_buffer(y, radius)
# spatial join (buildings intersecting buffers)
j <- sf::st_intersects(buf, sf::st_geometry(x))
# Compute
out <- vapply(seq_along(j), function(i) {
if (length(j[[i]]) == 0) return(NA_real_)
sub <- x[j[[i]], ]
sub <- sub[!is.na(sub[[floors_col]]) & !is.na(sub[[area_col]]), ]
if (nrow(sub) == 0) return(NA_real_)
aer_num <- sum(sub[[area_col]] * sub[[floors_col]], na.rm = TRUE)
aer_den <- pi * radius^2
aer_num / aer_den
}, numeric(1))
res <-
data.frame(
pid = yorig[[id_col]],
aer = out
)
res
}
## Parallel processing
# Initiate mirai damons
plan(mirai_multisession, workers = 4L)
grd <- chopin::par_pad_grid(
bldg,
mode = "grid",
nx = 1L,
ny = 4L,
padding = 300
)
bldg_cast <- sf::st_cast(bldg, "POLYGON")
radius_m <- 300
res <- par_grid_mirai(
grids = grd,
fun_dist = aer_at_points,
x = bldg_cast,
y = grdpnt,
radius = radius_m,
id_col = "pid",
.debug = TRUE
)
future::plan(future::sequential)
mirai::daemons(n = 0L)
## [1] 0
## pid aer
## 1 1 0.2847932
## 2 2 0.7716681
## 3 3 0.4084137
## 4 4 0.4438036
## 5 5 0.6123537
## 6 6 0.5815721
chopin
works best with two-dimensional
(planar) geometries. Users should disable
s2
spherical geometry mode in sf
by setting
sf::sf_use_s2(FALSE)
. Running any chopin
functions at spherical or three-dimensional (e.g., including M/Z
dimensions) geometries may produce incorrect or unexpected results.