-
Notifications
You must be signed in to change notification settings - Fork 81
Description
Description
After upgrading to spatialdata 0.7.0 and dask 2025, plotting of points and other elements with dask dataframe as the underlying data structure appears to be broken.
The issue originates from the calls to transform that render_* functions perform:
transform(sdata["transcripts"], to_coordinate_system="global")
Results in ValueError: The number of items in 'lengths' does not match the number of partitions. if sdata["transcripts"].npartitions is > 1.
MRE
Since I am working with Xenium data, I used the xenium_rep1_io data for my MRE listed in your docs.
import spatialdata as sd
import spatialdata_plot
sdata = sd.read_zarr("temp/xenium/example_data/xenium_v1_human_lung/data.zarr")
feat = sdata_xen["table"].var_names.tolist()[0]
(
sdata
.pl.render_images("he_image")
.pl.render_points(
"transcripts",
table_name="table",
color="feature_name",
groups=feat,
palette="red",
)
.pl.show()
)Yields ValueError: The number of items in 'lengths' does not match the number of partitions. 2 != 4
Expand for stacktrace
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[41], line 11
1 feat = sdata["table"].var_names.tolist()[0]
2 (
3 sdata.pl.render_images("he_image")
4 .pl.render_points(
5 "transcripts",
6 table_name="table",
7 color="feature_name",
8 groups=feat,
9 palette="red",
10 )
---> 11 .pl.show()
12 )
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata_plot/pl/basic.py:1163, in PlotAccessor.show(self, coordinate_systems, legend_fontsize, legend_fontweight, legend_loc, legend_fontoutline, na_in_legend, colorbar, colorbar_params, wspace, hspace, ncols, frameon, figsize, dpi, fig, title, share_extent, pad_extent, ax, return_ax, save)
1160 ax.set_title(t)
1161 ax.set_aspect("equal")
-> 1163 extent = get_extent(
1164 sdata,
1165 coordinate_system=cs,
1166 has_images=has_images and wants_images,
1167 has_labels=has_labels and wants_labels,
1168 has_points=has_points and wants_points,
1169 has_shapes=has_shapes and wants_shapes,
1170 elements=wanted_elements,
1171 )
1172 cs_x_min, cs_x_max = extent["x"]
1173 cs_y_min, cs_y_max = extent["y"]
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/data_extent.py:213, in _(e, coordinate_system, exact, has_images, has_labels, has_points, has_shapes, elements)
211 if coordinate_system in coordinate_systems:
212 if isinstance(element_obj, DaskDataFrame | GeoDataFrame):
--> 213 extent = get_extent(element_obj, coordinate_system=coordinate_system, exact=exact)
214 else:
215 extent = get_extent(element_obj, coordinate_system=coordinate_system)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/data_extent.py:291, in _(e, coordinate_system, exact)
285 extent = _get_extent_of_points(e)
286 return _compute_extent_in_coordinate_system(
287 element=e,
288 coordinate_system=coordinate_system,
289 extent=extent,
290 )
--> 291 transformed = transform(e, to_coordinate_system=coordinate_system)
292 return _get_extent_of_points(transformed)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/functools.py:912, in singledispatch..wrapper(*args, **kw)
908 if not args:
909 raise TypeError(f'{funcname} requires at least '
910 '1 positional argument')
--> 912 return dispatch(args[0].class)(*args, **kw)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/spatialdata/_core/operations/transform.py:443, in _(data, transformation, maintain_positioning, to_coordinate_system)
441 arrays = []
442 for ax in axes:
--> 443 arrays.append(data[ax].to_dask_array(lengths=True).reshape(-1, 1))
444 xdata = DataArray(da.concatenate(arrays, axis=1), coords={"points": range(len(data)), "dim": list(axes)})
445 xtransformed = transformation._transform_coordinates(xdata)
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/dask/dataframe/dask_expr/_collection.py:1422, in FrameBase.to_dask_array(self, lengths, meta, optimize, **optimize_kwargs)
1418 lengths = tuple(self.map_partitions(len).compute())
1420 arr = self.values
-> 1422 chunks = self._validate_chunks(arr, lengths)
1423 arr._chunks = chunks
1425 if meta is not None:
File ~/projects/ddr_st/ddr_st_xenium/lib/python3.12/site-packages/dask/dataframe/dask_expr/_collection.py:2471, in FrameBase._validate_chunks(self, arr, lengths)
2468 lengths = tuple(lengths)
2470 if len(lengths) != self.npartitions:
-> 2471 raise ValueError(
2472 "The number of items in 'lengths' does not match the number of "
2473 f"partitions. {len(lengths)} != {self.npartitions}"
2474 )
2476 if self.ndim == 1:
2477 chunks = normalize_chunks((lengths,))
ValueError: The number of items in 'lengths' does not match the number of partitions. 2 != 4
Workaround
print(sdata["transcripts"].npartitions)
# 4
sdata["transcripts"] = sd.models.PointsModel.parse(
sdata["transcripts"].repartition(npartitions=1).reset_index(drop=True),
transformations={"global": sd.transformations.get_transformation(sdata["transcripts"])},
)
print(sdata["transcripts"].npartitions)
# 1
# works without error
(
sdata
.pl.render_images("he_image")
.pl.render_points(
"transcripts",
table_name="table",
color="feature_name",
groups=feat,
palette="red",
)
.pl.show()
)Additional context
# works
sd.get_extent(sdata["transcripts"], coordinate_system="global", exact=False)
# raises ValueError
sd.get_extent(sdata["transcripts"], coordinate_system="global", exact=True)
# originating from
sd.transform(sdata["transcripts"], to_coordinate_system="global")The call to reset_index(drop=True) is necessary in the MRE to avoid ValueError: cannot reindex on an axis with duplicate labels.
This issue cannot be reproduced by partitioning the blobs dataset:
from spatialdata.datasets import blobs
sdata = blobs()
print(sdata["blobs_points"].npartitions)
# 1
sdata["blobs_points"] = sd.models.PointsModel.parse(
sdata["blobs_points"].repartition(npartitions=4),
transformations={"global": sd.transformations.get_transformation(sdata["blobs_points"])},
)
print(sdata["blobs_points"].npartitions)
# 4
# works
sd.transform(sdata["blobs_points"], to_coordinate_system="global")This error occurs with
- spatialdata 0.7.0 + dask 2025.12, dask 2025.9 or dask 2025.2.0
This error does not occur with
- spatialdata 0.6.1 + dask 2024.11.2
Session Info
Click to view session information
----- dask 2025.12.0 session_info v1.0.1 spatialdata 0.7.0 spatialdata_plot 0.2.14 -----
Click to view modules imported as dependencies
PIL 12.1.0 anndata 0.12.9 annsel 0.1.2 anyio NA argcomplete NA arrow 1.4.0 asttokens NA attr 25.4.0 attrs 25.4.0 babel 2.18.0 certifi 2026.01.04 cffi 2.0.0 charset_normalizer 3.4.4 cloudpickle 3.1.2 comm 0.2.3 cuda 12.9.4 cycler 0.12.1 cython_runtime NA dask_image NA datashader 0.18.2 dateutil 2.9.0.post0 debugpy 1.8.20 decorator 5.2.1 defusedxml 0.7.1 donfig 0.8.1.post1 executing 2.2.1 fast_array_utils NA fastjsonschema NA fqdn NA fsspec 2026.2.0 geopandas 1.1.2 google_crc32c NA h5py 3.15.1 idna 3.11 ipykernel 7.1.0 ipywidgets 8.1.8 isoduration NA jaraco NA jedi 0.19.2 jinja2 3.1.6 joblib 1.5.3 json5 0.13.0 jsonpointer 3.0.0 jsonschema 4.26.0 jsonschema_specifications NA jupyter_events 0.12.0 jupyter_server 2.17.0 jupyterlab_server 2.28.0 kiwisolver 1.4.9 lark 1.3.1 lazy_loader 0.4 legacy_api_wrap NA llvmlite 0.46.0 locket NA markupsafe 3.0.3 matplotlib 3.10.8 matplotlib_inline 0.2.1 matplotlib_scalebar 0.9.0 more_itertools 10.8.0 mpl_toolkits NA msgpack 1.1.2 multipledispatch 0.6.0 multiscale_spatial_image 2.0.3 narwhals 2.16.0 natsort 8.4.0 nbformat 5.10.4 networkx 3.6.1 numba 0.63.1 numcodecs 0.16.5 numexpr 2.14.1 numpy 2.3.5 ome_zarr NA packaging 26.0 pandas 2.3.3 parso 0.8.5 partd 1.4.2 pathlib_abc NA pkg_resources NA platformdirs 4.5.1 pooch v1.9.0 prometheus_client NA prompt_toolkit 3.0.52 psutil 7.2.2 pure_eval 0.2.3 pyarrow 22.0.0 pycparser 3.00 pyct 0.6.0 pydev_ipython NA pydevconsole NA pydevd 3.2.3 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygments 2.19.2 pyparsing 3.3.2 pyproj 3.7.2 pythonjsonlogger NA pytz 2025.2 referencing NA requests 2.32.5 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rfc3987_syntax NA rich NA rpds NA scanpy 1.12 scipy 1.16.0 send2trash NA shapely 2.1.2 six 1.17.0 skimage 0.26.0 sklearn 1.8.0 spatial_image 1.2.3 stack_data 0.6.3 testing NA threadpoolctl 3.6.0 tlz 1.1.0 toolz 1.1.0 tornado 6.5.4 tqdm 4.67.3 traitlets 5.14.3 typing_extensions NA upath 0.3.9 uri_template NA urllib3 2.6.3 vscode NA wcwidth 0.5.3 webcolors NA websocket 1.9.0 xarray 2026.1.0 xarray_dataclass 3.0.0 xarray_schema 0.0.3 xrspatial 0.5.2 yaml 6.0.3 zarr 3.1.5 zmq 27.1.0 zoneinfo NA
----- IPython 9.10.0 jupyter_client 8.8.0 jupyter_core 5.9.1 jupyterlab 4.5.3 notebook 7.5.3 ----- Python 3.12.12 | packaged by conda-forge | (main, Jan 26 2026, 23:51:32) [GCC 14.3.0] Linux-6.8.0-90-generic-x86_64-with-glibc2.39 ----- Session information updated at 2026-02-06 13:16