Skip to content

Functions relying on transform break with partitioned, dask-backed elements (dask 2025) #1064

@ppm1337

Description

@ppm1337

Description

After upgrading to spatialdata 0.7.0 and dask 2025, plotting of points and other elements with dask dataframe as the underlying data structure appears to be broken.

The issue originates from the calls to transform that render_* functions perform:

transform(sdata["transcripts"], to_coordinate_system="global")

Results in ValueError: The number of items in 'lengths' does not match the number of partitions. if sdata["transcripts"].npartitions is > 1.

MRE

Since I am working with Xenium data, I used the xenium_rep1_io data for my MRE listed in your docs.

import spatialdata as sd
import spatialdata_plot

sdata = sd.read_zarr("temp/xenium/example_data/xenium_v1_human_lung/data.zarr")
feat = sdata_xen["table"].var_names.tolist()[0]
(
    sdata
    .pl.render_images("he_image")
    .pl.render_points(
        "transcripts",
        table_name="table",
        color="feature_name",
        groups=feat,
        palette="red",
    )
    .pl.show()
)

Yields ValueError: The number of items in 'lengths' does not match the number of partitions. 2 != 4

Expand for stacktrace
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[41], line 11
      1 feat = sdata["table"].var_names.tolist()[0]
      2 (
      3     sdata.pl.render_images("he_image")
      4     .pl.render_points(
      5         "transcripts",
      6         table_name="table",
      7         color="feature_name",
      8         groups=feat,
      9         palette="red",
     10     )
---> 11     .pl.show()
     12 )

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata_plot/pl/basic.py:1163, in PlotAccessor.show(self, coordinate_systems, legend_fontsize, legend_fontweight, legend_loc, legend_fontoutline, na_in_legend, colorbar, colorbar_params, wspace, hspace, ncols, frameon, figsize, dpi, fig, title, share_extent, pad_extent, ax, return_ax, save)
1160 ax.set_title(t)
1161 ax.set_aspect("equal")
-> 1163 extent = get_extent(
1164 sdata,
1165 coordinate_system=cs,
1166 has_images=has_images and wants_images,
1167 has_labels=has_labels and wants_labels,
1168 has_points=has_points and wants_points,
1169 has_shapes=has_shapes and wants_shapes,
1170 elements=wanted_elements,
1171 )
1172 cs_x_min, cs_x_max = extent["x"]
1173 cs_y_min, cs_y_max = extent["y"]

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/data_extent.py:213, in _(e, coordinate_system, exact, has_images, has_labels, has_points, has_shapes, elements)
211 if coordinate_system in coordinate_systems:
212 if isinstance(element_obj, DaskDataFrame | GeoDataFrame):
--> 213 extent = get_extent(element_obj, coordinate_system=coordinate_system, exact=exact)
214 else:
215 extent = get_extent(element_obj, coordinate_system=coordinate_system)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/data_extent.py:291, in _(e, coordinate_system, exact)
285 extent = _get_extent_of_points(e)
286 return _compute_extent_in_coordinate_system(
287 element=e,
288 coordinate_system=coordinate_system,
289 extent=extent,
290 )
--> 291 transformed = transform(e, to_coordinate_system=coordinate_system)
292 return _get_extent_of_points(transformed)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/operations/transform.py:443, in _(data, transformation, maintain_positioning, to_coordinate_system)
441 arrays = []
442 for ax in axes:
--> 443 arrays.append(data[ax].to_dask_array(lengths=True).reshape(-1, 1))
444 xdata = DataArray(da.concatenate(arrays, axis=1), coords={"points": range(len(data)), "dim": list(axes)})
445 xtransformed = transformation._transform_coordinates(xdata)

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/dask/dataframe/dask_expr/_collection.py:1422, in FrameBase.to_dask_array(self, lengths, meta, optimize, **optimize_kwargs)
1418 lengths = tuple(self.map_partitions(len).compute())
1420 arr = self.values
-> 1422 chunks = self._validate_chunks(arr, lengths)
1423 arr._chunks = chunks
1425 if meta is not None:

File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/dask/dataframe/dask_expr/_collection.py:2471, in FrameBase._validate_chunks(self, arr, lengths)
2468 lengths = tuple(lengths)
2470 if len(lengths) != self.npartitions:
-> 2471 raise ValueError(
2472 "The number of items in 'lengths' does not match the number of "
2473 f"partitions. {len(lengths)} != {self.npartitions}"
2474 )
2476 if self.ndim == 1:
2477 chunks = normalize_chunks((lengths,))

ValueError: The number of items in 'lengths' does not match the number of partitions. 2 != 4

Workaround

print(sdata["transcripts"].npartitions)
# 4

sdata["transcripts"] = sd.models.PointsModel.parse(
    sdata["transcripts"].repartition(npartitions=1).reset_index(drop=True),
    transformations={"global": sd.transformations.get_transformation(sdata["transcripts"])},
)

print(sdata["transcripts"].npartitions)
# 1

# works without error
(
    sdata
    .pl.render_images("he_image")
    .pl.render_points(
        "transcripts",
        table_name="table",
        color="feature_name",
        groups=feat,
        palette="red",
    )
    .pl.show()
)

Additional context

# works
sd.get_extent(sdata["transcripts"], coordinate_system="global", exact=False)
# raises ValueError
sd.get_extent(sdata["transcripts"], coordinate_system="global", exact=True)
# originating from
sd.transform(sdata["transcripts"], to_coordinate_system="global")

The call to reset_index(drop=True) is necessary in the MRE to avoid ValueError: cannot reindex on an axis with duplicate labels.

This issue cannot be reproduced by partitioning the blobs dataset:

from spatialdata.datasets import blobs

sdata = blobs()

print(sdata["blobs_points"].npartitions)
# 1

sdata["blobs_points"] = sd.models.PointsModel.parse(
    sdata["blobs_points"].repartition(npartitions=4),
    transformations={"global": sd.transformations.get_transformation(sdata["blobs_points"])},
)

print(sdata["blobs_points"].npartitions)
# 4

# works
sd.transform(sdata["blobs_points"], to_coordinate_system="global")

This error occurs with

  • spatialdata 0.7.0 + dask 2025.12, dask 2025.9 or dask 2025.2.0

This error does not occur with

  • spatialdata 0.6.1 + dask 2024.11.2

Session Info

Click to view session information
-----
dask                2025.12.0
session_info        v1.0.1
spatialdata         0.7.0
spatialdata_plot    0.2.14
-----
Click to view modules imported as dependencies
PIL                         12.1.0
anndata                     0.12.9
annsel                      0.1.2
anyio                       NA
argcomplete                 NA
arrow                       1.4.0
asttokens                   NA
attr                        25.4.0
attrs                       25.4.0
babel                       2.18.0
certifi                     2026.01.04
cffi                        2.0.0
charset_normalizer          3.4.4
cloudpickle                 3.1.2
comm                        0.2.3
cuda                        12.9.4
cycler                      0.12.1
cython_runtime              NA
dask_image                  NA
datashader                  0.18.2
dateutil                    2.9.0.post0
debugpy                     1.8.20
decorator                   5.2.1
defusedxml                  0.7.1
donfig                      0.8.1.post1
executing                   2.2.1
fast_array_utils            NA
fastjsonschema              NA
fqdn                        NA
fsspec                      2026.2.0
geopandas                   1.1.2
google_crc32c               NA
h5py                        3.15.1
idna                        3.11
ipykernel                   7.1.0
ipywidgets                  8.1.8
isoduration                 NA
jaraco                      NA
jedi                        0.19.2
jinja2                      3.1.6
joblib                      1.5.3
json5                       0.13.0
jsonpointer                 3.0.0
jsonschema                  4.26.0
jsonschema_specifications   NA
jupyter_events              0.12.0
jupyter_server              2.17.0
jupyterlab_server           2.28.0
kiwisolver                  1.4.9
lark                        1.3.1
lazy_loader                 0.4
legacy_api_wrap             NA
llvmlite                    0.46.0
locket                      NA
markupsafe                  3.0.3
matplotlib                  3.10.8
matplotlib_inline           0.2.1
matplotlib_scalebar         0.9.0
more_itertools              10.8.0
mpl_toolkits                NA
msgpack                     1.1.2
multipledispatch            0.6.0
multiscale_spatial_image    2.0.3
narwhals                    2.16.0
natsort                     8.4.0
nbformat                    5.10.4
networkx                    3.6.1
numba                       0.63.1
numcodecs                   0.16.5
numexpr                     2.14.1
numpy                       2.3.5
ome_zarr                    NA
packaging                   26.0
pandas                      2.3.3
parso                       0.8.5
partd                       1.4.2
pathlib_abc                 NA
pkg_resources               NA
platformdirs                4.5.1
pooch                       v1.9.0
prometheus_client           NA
prompt_toolkit              3.0.52
psutil                      7.2.2
pure_eval                   0.2.3
pyarrow                     22.0.0
pycparser                   3.00
pyct                        0.6.0
pydev_ipython               NA
pydevconsole                NA
pydevd                      3.2.3
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygments                    2.19.2
pyparsing                   3.3.2
pyproj                      3.7.2
pythonjsonlogger            NA
pytz                        2025.2
referencing                 NA
requests                    2.32.5
rfc3339_validator           0.1.4
rfc3986_validator           0.1.1
rfc3987_syntax              NA
rich                        NA
rpds                        NA
scanpy                      1.12
scipy                       1.16.0
send2trash                  NA
shapely                     2.1.2
six                         1.17.0
skimage                     0.26.0
sklearn                     1.8.0
spatial_image               1.2.3
stack_data                  0.6.3
testing                     NA
threadpoolctl               3.6.0
tlz                         1.1.0
toolz                       1.1.0
tornado                     6.5.4
tqdm                        4.67.3
traitlets                   5.14.3
typing_extensions           NA
upath                       0.3.9
uri_template                NA
urllib3                     2.6.3
vscode                      NA
wcwidth                     0.5.3
webcolors                   NA
websocket                   1.9.0
xarray                      2026.1.0
xarray_dataclass            3.0.0
xarray_schema               0.0.3
xrspatial                   0.5.2
yaml                        6.0.3
zarr                        3.1.5
zmq                         27.1.0
zoneinfo                    NA
-----
IPython             9.10.0
jupyter_client      8.8.0
jupyter_core        5.9.1
jupyterlab          4.5.3
notebook            7.5.3
-----
Python 3.12.12 | packaged by conda-forge | (main, Jan 26 2026, 23:51:32) [GCC 14.3.0]
Linux-6.8.0-90-generic-x86_64-with-glibc2.39
-----
Session information updated at 2026-02-06 13:16

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions