Skip to content

Calculate Module

assign_voxels(arr, voxel_resolution)

Assigns voxel grids to spatial data points based on the specified resolutions.

Parameters:

Name Type Description Default
arr ndarray

Input array-like object containing point cloud data with 'X', 'Y', and 'HeightAboveGround' fields.

required
voxel_resolution tuple of floats

The resolution for x, y, and z dimensions of the voxel grid.

required

Returns:

Type Description
Tuple[ndarray, List]

tuple of (numpy.ndarray, List): A tuple containing the histogram of the voxel grid (with corrected orientation) and the extent of the point cloud.

Source code in pyforestscan/calculate.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def assign_voxels(arr, voxel_resolution) -> Tuple[np.ndarray, List]:
    """
    Assigns voxel grids to spatial data points based on the specified resolutions.

    Args:
        arr (numpy.ndarray): Input array-like object containing point cloud data with 'X', 'Y', and 'HeightAboveGround' fields.
        voxel_resolution (tuple of floats): The resolution for x, y, and z dimensions of the voxel grid.

    Returns:
        tuple of (numpy.ndarray, List): A tuple containing the histogram of the voxel grid (with corrected orientation) and the extent of the point cloud.
    """
    dx, dy, dz = voxel_resolution

    pts = arr[arr['HeightAboveGround'] >= 0]

    x0 = np.floor(pts['X'].min() / dx) * dx
    y0 = np.ceil (pts['Y'].max() / dy) * dy

    x_bins = np.arange(x0, pts['X'].max() + dx, dx)
    y_bins = np.arange(y0, pts['Y'].min() - dy, -dy)
    z_bins = np.arange(0.0, pts['HeightAboveGround'].max() + dz, dz)

    hist, _ = np.histogramdd(
        np.column_stack((pts['X'], pts['Y'], pts['HeightAboveGround'])),
        bins=(x_bins, y_bins[::-1], z_bins)
    )
    hist = hist[:, ::-1, :]

    extent = [x_bins[0], x_bins[-1], y_bins[-1], y_bins[0]]
    return hist, extent

calculate_canopy_cover(pad, voxel_height, min_height=2.0, max_height=None, k=0.5)

Calculate GEDI-style canopy cover at a height threshold using PAD.

Uses the Beer–Lambert relation: Cover(z) = 1 - exp(-k * PAI_above(z)), where PAI_above(z) is the integrated Plant Area Index above height z.

Parameters:

Name Type Description Default
pad ndarray

3D array of PAD values with shape (X, Y, Z).

required
voxel_height float

Height of each voxel in meters (> 0).

required
min_height float

Height-above-ground threshold z (in meters) at which to compute canopy cover. Defaults to 2.0 m (GEDI convention).

2.0
max_height float or None

Maximum height to integrate up to. If None, integrates to the top of the PAD volume. Defaults to None.

None
k float

Extinction coefficient (Beer–Lambert constant). Defaults to 0.5.

0.5

Returns:

Type Description
ndarray

np.ndarray: 2D array (X, Y) of canopy cover values in [0, 1], with NaN where PAD is entirely missing for the integration range. If the requested integration range is empty (e.g., min_height >= available max height), returns a zeros array (no canopy above the threshold).

Raises:

Type Description
ValueError

If parameters are invalid (e.g., non-positive voxel_height, k < 0, or min_height >= max_height).

Source code in pyforestscan/calculate.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def calculate_canopy_cover(pad: np.ndarray,
                           voxel_height: float,
                           min_height: float = 2.0,
                           max_height: float | None = None,
                           k: float = 0.5) -> np.ndarray:
    """
    Calculate GEDI-style canopy cover at a height threshold using PAD.

    Uses the Beer–Lambert relation: Cover(z) = 1 - exp(-k * PAI_above(z)), where
    PAI_above(z) is the integrated Plant Area Index above height z.

    Args:
        pad (np.ndarray): 3D array of PAD values with shape (X, Y, Z).
        voxel_height (float): Height of each voxel in meters (> 0).
        min_height (float, optional): Height-above-ground threshold z (in meters) at which
            to compute canopy cover. Defaults to 2.0 m (GEDI convention).
        max_height (float or None, optional): Maximum height to integrate up to. If None,
            integrates to the top of the PAD volume. Defaults to None.
        k (float, optional): Extinction coefficient (Beer–Lambert constant). Defaults to 0.5.

    Returns:
        np.ndarray: 2D array (X, Y) of canopy cover values in [0, 1], with NaN where
            PAD is entirely missing for the integration range. If the requested
            integration range is empty (e.g., min_height >= available max height),
            returns a zeros array (no canopy above the threshold).

    Raises:
        ValueError: If parameters are invalid (e.g., non-positive voxel_height, k < 0,
            or min_height >= max_height).
    """
    if voxel_height <= 0:
        raise ValueError(f"voxel_height must be > 0 metres (got {voxel_height})")
    if k < 0:
        raise ValueError(f"k must be >= 0 (got {k})")

    # Determine effective max height and handle empty integration range
    effective_max_height = max_height if max_height is not None else pad.shape[2] * voxel_height
    if min_height >= effective_max_height:
        # No foliage above threshold: cover is zero everywhere
        return np.zeros((pad.shape[0], pad.shape[1]), dtype=float)

    # Compute PAI integrated from min_height up to effective_max_height/top
    pai_above = calculate_pai(pad, voxel_height, min_height=min_height, max_height=max_height)

    # Identify columns that are entirely NaN within the integration range
    start_idx = int(np.ceil(min_height / voxel_height))
    end_idx = int(np.floor(effective_max_height / voxel_height))
    range_slice = pad[:, :, start_idx:end_idx]
    all_nan_mask = np.all(np.isnan(range_slice), axis=2)

    # Beer–Lambert canopy cover
    cover = 1.0 - np.exp(-k * pai_above)

    # Clamp to [0,1] and set invalids
    cover = np.where(np.isfinite(cover), cover, np.nan)
    cover = np.clip(cover, 0.0, 1.0)
    cover[all_nan_mask] = np.nan
    return cover

calculate_chm(arr, voxel_resolution, interpolation='linear', interp_valid_region=False, interp_clean_edges=False)

Calculate the Canopy Height Model (CHM) for a given voxel grid.

The CHM is computed as the maximum 'HeightAboveGround' value within each (X, Y) voxel. Optionally, gaps in the CHM can be filled using interpolation.

Parameters:

Name Type Description Default
arr ndarray

Input structured numpy array containing point cloud data with fields 'X', 'Y', and 'HeightAboveGround'.

required
voxel_resolution tuple of float

The resolution for the X and Y dimensions of the voxel grid, specified as (x_resolution, y_resolution).

required
interpolation str or None

Method for interpolating gaps in the CHM. Supported methods are "nearest", "linear", "cubic", or None. If None, no interpolation is performed. Defaults to "linear".

'linear'
interp_valid_region bool

Whether to calculate a valid region mask using morphological operations for interpolation. If True, interpolation is only applied within the valid data region. If False (default), interpolation is applied to all NaN values. Ignored if interpolation is None.

False
interp_clean_edges bool

Whether to clean edge fringes of the interpolated CHM. Default is False. Ignored if interpolation is None.

False

Returns:

Name Type Description
tuple Tuple[ndarray, List]
  • np.ndarray: 2D numpy array representing the CHM, with each value corresponding to the maximum height in that (X, Y) voxel.
  • list: The spatial extent as [x_min, x_max, y_min, y_max].

Raises:

Type Description
ValueError

If input array does not contain the required fields.

ValueError

If interpolation is specified but not one of the supported methods.

Source code in pyforestscan/calculate.py
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
def calculate_chm(arr, voxel_resolution, interpolation="linear",
                  interp_valid_region=False, interp_clean_edges=False) -> Tuple[np.ndarray, List]:
    """
    Calculate the Canopy Height Model (CHM) for a given voxel grid.

    The CHM is computed as the maximum 'HeightAboveGround' value within each (X, Y) voxel.
    Optionally, gaps in the CHM can be filled using interpolation.

    Args:
        arr (np.ndarray): Input structured numpy array containing point cloud data
            with fields 'X', 'Y', and 'HeightAboveGround'.
        voxel_resolution (tuple of float): The resolution for the X and Y dimensions
            of the voxel grid, specified as (x_resolution, y_resolution).
        interpolation (str or None, optional): Method for interpolating gaps in the CHM.
            Supported methods are "nearest", "linear", "cubic", or None. If None, no interpolation
            is performed. Defaults to "linear".
        interp_valid_region (bool): Whether to calculate a valid region mask using morphological operations for
            interpolation. If True, interpolation is only applied within the valid data region. If False (default),
            interpolation is applied to all NaN values. Ignored if `interpolation` is None.
        interp_clean_edges (bool): Whether to clean edge fringes of the interpolated CHM. Default is False.
            Ignored if `interpolation` is None.

    Returns:
        tuple:
            - np.ndarray: 2D numpy array representing the CHM, with each value corresponding to the maximum
                height in that (X, Y) voxel.
            - list: The spatial extent as [x_min, x_max, y_min, y_max].

    Raises:
        ValueError: If input array does not contain the required fields.
        ValueError: If `interpolation` is specified but not one of the supported methods.

    """
    x_resolution, y_resolution = voxel_resolution[:2]
    x = arr['X']
    y = arr['Y']
    z = arr['HeightAboveGround']

    x_min, x_max = x.min(), x.max()
    y_min, y_max = y.min(), y.max()

    nx = int(np.ceil((x_max - x_min) / x_resolution))
    ny = int(np.ceil((y_max - y_min) / y_resolution))

    chm = np.full((nx, ny), np.nan)

    x_bins = x_min + np.arange(nx + 1) * x_resolution
    y_bins = y_min + np.arange(ny + 1) * y_resolution

    x_indices = np.floor((x - x_min) / x_resolution).astype(int)
    y_indices = np.floor((y - y_min) / y_resolution).astype(int)

    np.minimum(x_indices, nx - 1, out=x_indices)
    np.minimum(y_indices, ny - 1, out=y_indices)

    for xi, yi, zi in zip(x_indices, y_indices, z):
        if 0 <= xi < chm.shape[0] and 0 <= yi < chm.shape[1]:
            if np.isnan(chm[xi, yi]) or zi > chm[xi, yi]:
                chm[xi, yi] = zi

    if interpolation is not None:
        if interp_valid_region is True:
            valid_region_mask = _calc_valid_region_mask(chm)
            interp_mask = np.isnan(chm) & valid_region_mask
        else:
            interp_mask = np.isnan(chm)

        if np.any(interp_mask):
            x_grid, y_grid = np.meshgrid(
                (x_bins[:-1] + x_bins[1:]) / 2,
                (y_bins[:-1] + y_bins[1:]) / 2
            )

            valid_mask = ~np.isnan(chm)
            valid_x = x_grid.flatten()[valid_mask.flatten()]
            valid_y = y_grid.flatten()[valid_mask.flatten()]
            valid_values = chm.flatten()[valid_mask.flatten()]

            interp_coords = np.column_stack([
                x_grid.flatten()[interp_mask.flatten()],
                y_grid.flatten()[interp_mask.flatten()]
            ])

            if len(interp_coords) > 0 and len(valid_values) > 0:
                chm[interp_mask] = griddata(
                    points=np.column_stack([valid_x, valid_y]),
                    values=valid_values,
                    xi=interp_coords,
                    method=interpolation
                )
            if interp_clean_edges:
                chm = _clean_edges(chm)

    chm = np.flip(chm, axis=1)
    extent = [x_min, x_min + nx * x_resolution, y_min, y_min + ny * y_resolution]

    return chm, extent

calculate_fhd(voxel_returns, voxel_height=1.0, min_height=0.0, max_height=None)

Calculate the Foliage Height Diversity (FHD) for a given set of voxel returns.

This function computes FHD by calculating the entropy of the voxel return proportions along the Z (height) axis, which represents the vertical diversity of canopy structure.

Parameters:

Name Type Description Default
voxel_returns ndarray

3D numpy array of shape (X, Y, Z) representing voxel returns, where X and Y are spatial dimensions and Z represents height bins (vertical layers).

required
voxel_height float

Height of each voxel in meters (> 0). Defaults to 1.0.

1.0
min_height float

Minimum height (in meters) to include in the entropy calculation. Defaults to 0.0 (use all heights by default).

0.0
max_height float or None

Maximum height (in meters) to include. If None, uses the full height of the voxel grid. Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: 2D numpy array of shape (X, Y) with FHD values for each (X, Y) location. Areas with no voxel returns in the requested height range will have NaN values.

Source code in pyforestscan/calculate.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def calculate_fhd(voxel_returns,
                  voxel_height: float = 1.0,
                  min_height: float = 0.0,
                  max_height: float | None = None) -> np.ndarray:
    """
    Calculate the Foliage Height Diversity (FHD) for a given set of voxel returns.

    This function computes FHD by calculating the entropy of the voxel return proportions
    along the Z (height) axis, which represents the vertical diversity of canopy structure.

    Args:
        voxel_returns (np.ndarray): 3D numpy array of shape (X, Y, Z) representing voxel returns,
            where X and Y are spatial dimensions and Z represents height bins (vertical layers).
        voxel_height (float, optional): Height of each voxel in meters (> 0). Defaults to 1.0.
        min_height (float, optional): Minimum height (in meters) to include in the entropy calculation.
            Defaults to 0.0 (use all heights by default).
        max_height (float or None, optional): Maximum height (in meters) to include. If None, uses the full
            height of the voxel grid. Defaults to None.

    Returns:
        np.ndarray: 2D numpy array of shape (X, Y) with FHD values for each (X, Y) location.
            Areas with no voxel returns in the requested height range will have NaN values.
    """
    if voxel_height <= 0:
        raise ValueError(f"voxel_height must be > 0 metres (got {voxel_height})")

    effective_max_height = max_height if max_height is not None else voxel_returns.shape[2] * voxel_height
    if min_height >= effective_max_height:
        return np.full(voxel_returns.shape[:2], np.nan, dtype=float)

    start_idx = int(np.ceil(min_height / voxel_height))
    end_idx = int(np.floor(effective_max_height / voxel_height))
    if start_idx >= end_idx:
        return np.full(voxel_returns.shape[:2], np.nan, dtype=float)

    core_returns = voxel_returns[:, :, start_idx:end_idx]
    sum_counts = np.sum(core_returns, axis=2)

    with np.errstate(divide='ignore', invalid='ignore'):
        proportions = np.divide(
            core_returns,
            sum_counts[..., None],
            out=np.zeros_like(core_returns, dtype=float),
            where=sum_counts[..., None] != 0
        )

    fhd = entropy(proportions, axis=2)
    fhd[sum_counts == 0] = np.nan
    return fhd

calculate_pad(voxel_returns, voxel_height=1.0, beer_lambert_constant=1.0, drop_ground=True)

Calculate the Plant Area Density (PAD) using the Beer-Lambert Law.

Parameters:

Name Type Description Default
voxel_returns ndarray

3D numpy array of shape (X, Y, Z) representing the LiDAR returns in each voxel column.

required
voxel_height float

Height of each voxel. Defaults to 1.0.

1.0
beer_lambert_constant float

The Beer-Lambert constant used in the calculation. Defaults to 1.0.

1.0
drop_ground bool

If True, sets PAD values in the ground (lowest) voxel layer to NaN in the output. Defaults to True.

True

Returns:

Type Description
ndarray

np.ndarray: 3D numpy array containing PAD values for each voxel, same shape as voxel_returns. Columns that have zero returns across all Z are set to NaN.

Source code in pyforestscan/calculate.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def calculate_pad(voxel_returns,
                  voxel_height=1.0,
                  beer_lambert_constant=1.0,
                  drop_ground=True
                  ) -> np.ndarray:
    """
    Calculate the Plant Area Density (PAD) using the Beer-Lambert Law.

    Args:
        voxel_returns (np.ndarray): 3D numpy array of shape (X, Y, Z) representing
            the LiDAR returns in each voxel column.
        voxel_height (float, optional): Height of each voxel. Defaults to 1.0.
        beer_lambert_constant (float, optional): The Beer-Lambert constant used
            in the calculation. Defaults to 1.0.
        drop_ground (bool, optional): If True, sets PAD values in the ground (lowest)
            voxel layer to NaN in the output. Defaults to True.

    Returns:
        np.ndarray: 3D numpy array containing PAD values for each voxel, same shape as `voxel_returns`.
            Columns that have zero returns across all Z are set to NaN.
    """
    if voxel_height <= 0:
        raise ValueError(
            f"voxel_height must be > 0 metres (got {voxel_height})"
        )
    reversed_cols = voxel_returns[:, :, ::-1]

    total = np.sum(reversed_cols, axis=2, keepdims=True)

    csum = np.cumsum(reversed_cols, axis=2)

    shots_out = total - csum

    shots_in = np.concatenate(
        (total, shots_out[:, :, :-1]), axis=2
    )

    with np.errstate(divide='ignore', invalid='ignore'):
        pad_sky = np.log(shots_in / shots_out) / (beer_lambert_constant * voxel_height)
    pad_sky[~np.isfinite(pad_sky)] = np.nan

    pad = pad_sky[:, :, ::-1]

    if drop_ground:
        pad[:, :, 0] = np.nan

    # Mask only columns that have zero returns across all Z (true empty columns)
    empty_columns = (np.sum(voxel_returns, axis=2) == 0)
    pad[empty_columns, :] = np.nan

    return pad

calculate_pai(pad, voxel_height, min_height=1.0, max_height=None)

Calculate Plant Area Index (PAI) from Plant Area Density (PAD) data by summing PAD values along the height (Z) axis.

Parameters:

Name Type Description Default
pad ndarray

3D numpy array representing Plant Area Density (PAD) values, shape (X, Y, Z).

required
voxel_height float

Height of each voxel in meters.

required
min_height float

Minimum height in meters for summing PAD values. Defaults to 1.0.

1.0
max_height float

Maximum height in meters for summing PAD values. If None, uses the full height of the input array. Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: 2D numpy array of shape (X, Y) with PAI values for each (x, y) voxel column.

Notes
  • If the requested integration range is empty (e.g., min_height >= available maximum height), returns a zeros array (no canopy above the threshold), mirroring the behavior used by canopy cover.
Source code in pyforestscan/calculate.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def calculate_pai(pad,
                  voxel_height,
                  min_height=1.0,
                  max_height=None) -> np.ndarray:
    """
    Calculate Plant Area Index (PAI) from Plant Area Density (PAD) data by summing PAD values along the height (Z) axis.

    Args:
        pad (np.ndarray): 3D numpy array representing Plant Area Density (PAD) values, shape (X, Y, Z).
        voxel_height (float): Height of each voxel in meters.
        min_height (float, optional): Minimum height in meters for summing PAD values. Defaults to 1.0.
        max_height (float, optional): Maximum height in meters for summing PAD values. If None, uses the full height of the input array. Defaults to None.

    Returns:
        np.ndarray: 2D numpy array of shape (X, Y) with PAI values for each (x, y) voxel column.

    Notes:
        - If the requested integration range is empty (e.g., min_height >= available
          maximum height), returns a zeros array (no canopy above the threshold),
          mirroring the behavior used by canopy cover.
    """
    if voxel_height <= 0:
        raise ValueError(f"voxel_height must be > 0 metres (got {voxel_height})")

    effective_max_height = max_height if max_height is not None else pad.shape[2] * voxel_height

    # Empty integration range: return zeros (no canopy above threshold)
    if min_height >= effective_max_height:
        return np.zeros((pad.shape[0], pad.shape[1]), dtype=float)

    start_idx = int(np.ceil(min_height / voxel_height))
    end_idx   = int(np.floor(effective_max_height / voxel_height))

    # If rounding collapses the slice, also treat as empty range
    if start_idx >= end_idx:
        return np.zeros((pad.shape[0], pad.shape[1]), dtype=float)

    core = pad[:, :, start_idx:end_idx]
    pai  = np.nansum(core, axis=2) * voxel_height

    # If an entire column within the integration range is NaN, propagate NaN
    all_nan_mask = np.all(np.isnan(core), axis=2)
    pai[all_nan_mask] = np.nan
    return pai

calculate_point_density(voxel_returns, per_area=False, cell_area=None)

Calculate point density (or count) per (X, Y) voxel column by summing returns across Z.

Parameters:

Name Type Description Default
voxel_returns ndarray

3D numpy array of shape (X, Y, Z) representing voxel returns (counts).

required
per_area bool

If True, divide counts by cell_area to yield points per unit area. Defaults to False.

False
cell_area float or None

Area of a single (X, Y) cell in the same units as the coordinates (e.g., m^2). Required when per_area=True.

None

Returns:

Type Description
ndarray

np.ndarray: 2D array (X, Y) of point counts (or density if per_area=True).

Notes
  • For columns with no returns, the count is 0. This differs from metrics like FHD where no-data is NaN.
  • If you want density per m^2, set per_area=True and pass cell_area = dx * dy.
Source code in pyforestscan/calculate.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
def calculate_point_density(voxel_returns: np.ndarray,
                            per_area: bool = False,
                            cell_area: float | None = None) -> np.ndarray:
    """
    Calculate point density (or count) per (X, Y) voxel column by summing returns across Z.

    Args:
        voxel_returns (np.ndarray): 3D numpy array of shape (X, Y, Z) representing voxel returns (counts).
        per_area (bool, optional): If True, divide counts by ``cell_area`` to yield points per unit area. Defaults to False.
        cell_area (float or None, optional): Area of a single (X, Y) cell in the same units as the coordinates (e.g., m^2).
            Required when ``per_area=True``.

    Returns:
        np.ndarray: 2D array (X, Y) of point counts (or density if per_area=True).

    Notes:
        - For columns with no returns, the count is 0. This differs from metrics like FHD where no-data is NaN.
        - If you want density per m^2, set ``per_area=True`` and pass ``cell_area = dx * dy``.
    """
    counts = np.sum(voxel_returns, axis=2, dtype=float)
    if per_area:
        if cell_area is None or cell_area <= 0:
            raise ValueError("cell_area must be > 0 when per_area=True")
        return counts / float(cell_area)
    return counts

calculate_voxel_stat(arr, voxel_resolution, dimension, stat, z_index_range=None)

Compute a column-wise statistic for a given dimension over a 3-D voxel grid.

The function bins points into voxels with the same XY/Z sizing used by assign_voxels. For each (X, Y) column it filters points to the requested Z-index range, then evaluates a simple statistic (mean, min, max, etc.) on the provided dimension.

Parameters:

Name Type Description Default
arr ndarray

Structured array containing at least 'X', 'Y', and 'HeightAboveGround' fields, plus the provided dimension.

required
voxel_resolution tuple[float, float, float]

(dx, dy, dz) sizes in the same units as the coordinates and height-above-ground values. All components must be > 0.

required
dimension str

Dimension/field name to evaluate (e.g. 'Z', 'Intensity', 'HeightAboveGround'). The field must exist on arr.

required
stat str

Statistic to compute. Supported values (case-insensitive) are: {'mean', 'sum', 'count', 'min', 'max', 'median', 'std'}.

required
z_index_range Tuple[int, Optional[int]] | None

Inclusive-exclusive Z index bounds expressed in voxel indices (start, stop). Defaults to the full column when None. stop may be None to include the topmost voxels. For example, (0, 3) covers the first three voxels (indices 0, 1, 2).

None

Returns:

Type Description

tuple[np.ndarray, list]: (stat_array, extent) - stat_array: 2-D array shaped (nx, ny) with the requested statistic per column. Cells without points are NaN (except for 'count', which yields 0). - extent: [x_min, x_max, y_min, y_max] covering the raster footprint.

Source code in pyforestscan/calculate.py
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
def calculate_voxel_stat(
    arr,
    voxel_resolution: Tuple[float, float, float],
    dimension: str,
    stat: str,
    z_index_range: Optional[Tuple[int, Optional[int]]] = None,
):
    """
    Compute a column-wise statistic for a given dimension over a 3-D voxel grid.

    The function bins points into voxels with the same XY/Z sizing used by ``assign_voxels``.
    For each (X, Y) column it filters points to the requested Z-index range, then evaluates a
    simple statistic (mean, min, max, etc.) on the provided dimension.

    Args:
        arr (np.ndarray): Structured array containing at least 'X', 'Y', and 'HeightAboveGround'
            fields, plus the provided ``dimension``.
        voxel_resolution (tuple[float, float, float]): (dx, dy, dz) sizes in the same units
            as the coordinates and height-above-ground values. All components must be > 0.
        dimension (str): Dimension/field name to evaluate (e.g. 'Z', 'Intensity',
            'HeightAboveGround'). The field must exist on ``arr``.
        stat (str): Statistic to compute. Supported values (case-insensitive) are:
            {'mean', 'sum', 'count', 'min', 'max', 'median', 'std'}.
        z_index_range (Tuple[int, Optional[int]] | None): Inclusive-exclusive Z index bounds
            expressed in voxel indices `(start, stop)`. Defaults to the full column when None.
            ``stop`` may be None to include the topmost voxels. For example, `(0, 3)` covers
            the first three voxels (indices 0, 1, 2).

    Returns:
        tuple[np.ndarray, list]: (stat_array, extent)
            - stat_array: 2-D array shaped (nx, ny) with the requested statistic per column.
              Cells without points are NaN (except for 'count', which yields 0).
            - extent: [x_min, x_max, y_min, y_max] covering the raster footprint.
    """
    if dimension not in arr.dtype.names:
        raise KeyError(f"Dimension '{dimension}' not found in array fields")
    if 'HeightAboveGround' not in arr.dtype.names:
        raise KeyError("Input array must include a 'HeightAboveGround' field")

    dx, dy, dz = voxel_resolution
    if dx <= 0 or dy <= 0 or dz <= 0:
        raise ValueError("voxel_resolution components must be > 0")

    supported_stats = {'mean', 'sum', 'count', 'min', 'max', 'median', 'std'}
    key = stat.lower()
    if key not in supported_stats:
        raise ValueError(f"Unsupported statistic '{stat}'. "
                         f"Choose from {sorted(supported_stats)}")

    pts = arr[arr['HeightAboveGround'] >= 0]
    if pts.size == 0:
        raise ValueError("No points available (all HeightAboveGround < 0)")

    x_vals = pts['X']
    y_vals = pts['Y']
    hag_vals = pts['HeightAboveGround']

    x0 = np.floor(x_vals.min() / dx) * dx
    y0 = np.ceil(y_vals.max() / dy) * dy

    x_bins = np.arange(x0, x_vals.max() + dx, dx)
    if x_bins.size < 2:
        x_bins = np.array([x0, x0 + dx])

    y_bins_desc = np.arange(y0, y_vals.min() - dy, -dy)
    if y_bins_desc.size < 2:
        y_bins_desc = np.array([y0, y0 - dy])
    y_bins = y_bins_desc[::-1]

    z_max = hag_vals.max()
    z_bins = np.arange(0.0, z_max + dz, dz)
    if z_bins.size < 2:
        z_bins = np.array([0.0, dz])

    nx = len(x_bins) - 1
    ny = len(y_bins) - 1
    nz = len(z_bins) - 1

    x_idx = np.digitize(x_vals, x_bins) - 1
    y_idx = np.digitize(y_vals, y_bins) - 1
    z_idx = np.digitize(hag_vals, z_bins) - 1

    np.clip(x_idx, 0, nx - 1, out=x_idx)
    np.clip(y_idx, 0, ny - 1, out=y_idx)
    np.clip(z_idx, 0, nz - 1, out=z_idx)

    if z_index_range is None:
        z_start, z_stop = 0, nz
    else:
        if len(z_index_range) != 2:
            raise ValueError("z_index_range must be a (start, stop) tuple")
        z_start = max(0, int(z_index_range[0]))
        z_stop = z_index_range[1]
        z_stop = nz if z_stop is None else min(int(z_stop), nz)
        if z_start >= z_stop:
            raise ValueError("z_index_range start must be < stop")

    mask = (z_idx >= z_start) & (z_idx < z_stop)
    if not np.any(mask):
        result = np.full((nx, ny), np.nan, dtype=float)
        if key == 'count':
            result.fill(0.0)
        extent = [x_bins[0], x_bins[-1], y_bins_desc[-1], y_bins_desc[0]]
        return result, extent

    x_idx = x_idx[mask]
    y_idx = y_idx[mask]
    values = np.asarray(pts[dimension][mask], dtype=float)

    # Flip Y axis to match assign_voxels orientation
    y_idx = (ny - 1) - y_idx

    flat_idx = x_idx * ny + y_idx
    flat_size = nx * ny

    counts = np.bincount(flat_idx, minlength=flat_size).astype(float)

    if key == 'count':
        data = counts
    elif key == 'sum':
        data = np.bincount(flat_idx, weights=values, minlength=flat_size)
    elif key == 'mean':
        sums = np.bincount(flat_idx, weights=values, minlength=flat_size)
        with np.errstate(invalid='ignore', divide='ignore'):
            data = sums / counts
        data[counts == 0] = np.nan
    elif key == 'std':
        sums = np.bincount(flat_idx, weights=values, minlength=flat_size)
        sumsq = np.bincount(flat_idx, weights=values * values, minlength=flat_size)
        with np.errstate(invalid='ignore', divide='ignore'):
            mean = sums / counts
            var = (sumsq / counts) - (mean ** 2)
        var[counts <= 0] = np.nan
        var[var < 0] = 0.0  # numerical safety
        data = np.sqrt(var)
    elif key == 'min':
        data = np.full(flat_size, np.inf, dtype=float)
        np.minimum.at(data, flat_idx, values)
        data[data == np.inf] = np.nan
    elif key == 'max':
        data = np.full(flat_size, -np.inf, dtype=float)
        np.maximum.at(data, flat_idx, values)
        data[data == -np.inf] = np.nan
    elif key == 'median':
        data = np.full(flat_size, np.nan, dtype=float)
        order = np.argsort(flat_idx, kind='mergesort')
        sorted_idx = flat_idx[order]
        sorted_vals = values[order]
        unique, first = np.unique(sorted_idx, return_index=True)
        counts_unique = np.diff(np.append(first, sorted_vals.size))
        for u, start, count in zip(unique, first, counts_unique):
            chunk = sorted_vals[start:start + count]
            data[u] = np.median(chunk)
    else:
        raise AssertionError("Unhandled statistic path")

    grid = data.reshape(nx, ny)
    if key not in ('count', 'sum'):
        grid[counts.reshape(nx, ny) == 0] = np.nan

    extent = [x_bins[0], x_bins[-1], y_bins_desc[-1], y_bins_desc[0]]
    return grid, extent

generate_dtm(ground_points, resolution=2.0)

Generates a Digital Terrain Model (DTM) raster from classified ground points.

Parameters:

Name Type Description Default
ground_points list

Point cloud arrays of classified ground points.

required
resolution float

Spatial resolution of the DTM in meters.

2.0

Returns:

Name Type Description
tuple Tuple[ndarray, List]

A tuple containing the DTM as a 2D NumPy array and the spatial extent [x_min, x_max, y_min, y_max].

Raises:

Type Description
ValueError

If no ground points are found for DTM generation.

KeyError

If point cloud data is missing 'X', 'Y', or 'Z' fields.

Source code in pyforestscan/calculate.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def generate_dtm(ground_points, resolution=2.0) -> Tuple[np.ndarray, List]:
    """
    Generates a Digital Terrain Model (DTM) raster from classified ground points.

    Args:
        ground_points (list): Point cloud arrays of classified ground points.
        resolution (float): Spatial resolution of the DTM in meters.

    Returns:
        tuple: A tuple containing the DTM as a 2D NumPy array and the spatial extent [x_min, x_max, y_min, y_max].

    Raises:
        ValueError: If no ground points are found for DTM generation.
        KeyError: If point cloud data is missing 'X', 'Y', or 'Z' fields.
    """
    #todo: add parameter to allow interpolation of NA values.
    try:
        x = np.array([pt['X'] for array in ground_points for pt in array])
        y = np.array([pt['Y'] for array in ground_points for pt in array])
        z = np.array([pt['Z'] for array in ground_points for pt in array])
    except ValueError:
        raise ValueError("Ground point cloud data missing 'X', 'Y', or 'Z' fields.")

    x_min, x_max = x.min(), x.max()
    y_min, y_max = y.min(), y.max()

    x_bins = np.arange(x_min, x_max + resolution, resolution)
    y_bins = np.arange(y_min, y_max + resolution, resolution)

    x_indices = np.digitize(x, x_bins) - 1
    y_indices = np.digitize(y, y_bins) - 1

    dtm = np.full((len(x_bins) - 1, len(y_bins) - 1), np.nan)

    for xi, yi, zi in zip(x_indices, y_indices, z):
        if 0 <= xi < dtm.shape[0] and 0 <= yi < dtm.shape[1]:
            if np.isnan(dtm[xi, yi]) or zi < dtm[xi, yi]:
                dtm[xi, yi] = zi

    dtm = np.fliplr(dtm)

    extent = [x_min, x_max, y_min, y_max]

    return dtm, extent