scverse · melonora · Nov 22, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -4,7 +4,7 @@ on:
     push:
         branches: [main]
         tags:
-            - "v*" # Push events to matching v*, i.e. v1.0, v20.15.10
+            - "v*"
     pull_request:
         branches: "*"
 
@@ -13,26 +13,24 @@ jobs:
         runs-on: ${{ matrix.os }}
         defaults:
             run:
-                shell: bash -e {0} # -e to fail on error
+                shell: bash -e {0}
 
         strategy:
             fail-fast: false
             matrix:
-                python: ["3.11", "3.13"]
-                os: [ubuntu-latest]
                 include:
-                    - os: macos-latest
-                      python: "3.11"
-                    - os: macos-latest
-                      python: "3.12"
-                      pip-flags: "--pre"
-                      name: "Python 3.12 (pre-release)"
-                    - os: windows-latest
-                      python: "3.11"
-
+                    - {os: windows-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
+                    - {os: windows-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
+                    - {os: ubuntu-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
+                    - {os: ubuntu-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
+                    - {os: ubuntu-latest, python: "3.13", dask-version: "latest", name: "Dask latest"}
+                    - {os: macos-latest, python: "3.11", dask-version: "2025.2.0", name: "Dask 2025.2.0"}
+                    - {os: macos-latest, python: "3.11", dask-version: "latest", name: "Dask latest"}
+                    - {os: macos-latest, python: "3.12", pip-flags: "--pre", name: "Python 3.12 (pre-release)"}
         env:
             OS: ${{ matrix.os }}
             PYTHON: ${{ matrix.python }}
+            DASK_VERSION: ${{ matrix.dask-version }}
 
         steps:
             - uses: actions/checkout@v2
@@ -42,7 +40,15 @@ jobs:
                   version: "latest"
                   python-version: ${{ matrix.python }}
             - name: Install dependencies
-              run: "uv sync --extra test"
+              run: |
+                  uv sync --extra test
+                  if [[ -n "${DASK_VERSION}" ]]; then
+                    if [[ "${DASK_VERSION}" == "latest" ]]; then
+                      uv pip install --upgrade dask
+                    else
+                      uv pip install dask==${DASK_VERSION}
+                    fi
+                  fi
             - name: Test
               env:
                   MPLBACKEND: agg

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,6 +12,7 @@ repos:
       rev: v3.5.3
       hooks:
           - id: prettier
+            exclude: ^.github/workflows/test.yaml
     - repo: https://github.com/pre-commit/mirrors-mypy
       rev: v1.15.0
       hooks:

diff --git a/pyproject.toml b/pyproject.toml
@@ -25,7 +25,7 @@ dependencies = [
     "anndata>=0.9.1",
     "click",
     "dask-image",
-    "dask>=2024.10.0,<=2024.11.2",
+    "dask>=2025.2.0",
     "datashader",
     "fsspec[s3,http]",
     "geopandas>=0.14",

diff --git a/src/spatialdata/__init__.py b/src/spatialdata/__init__.py
@@ -1,20 +1,7 @@
-import dask
-
-dask.config.set({"dataframe.query-planning": False})
-import dask.dataframe as dd
-
-# Setting `dataframe.query-planning` to False is effective only if run before `dask.dataframe` is initialized. In
-# the case in which the user had initilized `dask.dataframe` before, we would have DASK_EXPER_ENABLED set to `True`.
-# Here we check that this does not happen.
-if hasattr(dd, "DASK_EXPR_ENABLED") and dd.DASK_EXPR_ENABLED:
-    raise RuntimeError(
-        "Unsupported backend: dask-expr has been detected as the backend of dask.dataframe. Please "
-        "use:\nimport dask\ndask.config.set({'dataframe.query-planning': False})\nbefore importing "
-        "dask.dataframe to disable dask-expr. The support is being worked on, for more information please see"
-        "https://github.com/scverse/spatialdata/pull/570"
-    )
 from importlib.metadata import version
 
+import spatialdata.models._accessor  # noqa: F401
+
 __version__ = version("spatialdata")
 
 __all__ = [

diff --git a/src/spatialdata/_core/_deepcopy.py b/src/spatialdata/_core/_deepcopy.py
@@ -94,9 +94,12 @@ def _(gdf: GeoDataFrame) -> GeoDataFrame:
 @deepcopy.register(DaskDataFrame)
 def _(df: DaskDataFrame) -> DaskDataFrame:
     # bug: the parser may change the order of the columns
-    new_ddf = PointsModel.parse(df.compute().copy(deep=True))
+    compute_df = df.compute().copy(deep=True)
+    new_ddf = PointsModel.parse(compute_df)
     # the problem is not .copy(deep=True), but the parser, which discards some metadata https://github.com/scverse/spatialdata/issues/503#issuecomment-2015275322
-    new_ddf.attrs = _deepcopy(df.attrs)
+    # We need to use the compute_df here as with deepcopy, df._attrs does not exist anymore.
+    # print(type(new_ddf.attrs))
+    new_ddf.attrs.update(_deepcopy(compute_df.attrs))
     return new_ddf
 
 

diff --git a/src/spatialdata/_core/operations/rasterize.py b/src/spatialdata/_core/operations/rasterize.py
@@ -653,20 +653,28 @@ def rasterize_shapes_points(
 
     table_name = table_name if table_name is not None else "table"
 
+    index = False
     if value_key is not None:
         kwargs = {"sdata": sdata, "element_name": element_name} if element_name is not None else {"element": data}
         data[VALUES_COLUMN] = get_values(value_key, table_name=table_name, **kwargs).iloc[:, 0]  # type: ignore[arg-type, union-attr]
     elif isinstance(data, GeoDataFrame) or isinstance(data, DaskDataFrame) and return_regions_as_labels is True:
         value_key = VALUES_COLUMN
         data[VALUES_COLUMN] = data.index.astype("category")
+        index = True
     else:
         value_key = VALUES_COLUMN
         data[VALUES_COLUMN] = 1
 
     label_index_to_category = None
     if VALUES_COLUMN in data and data[VALUES_COLUMN].dtype == "category":
         if isinstance(data, DaskDataFrame):
-            data[VALUES_COLUMN] = data[VALUES_COLUMN].cat.as_known()
+            # We have to do this because as_known() does not preserve the order anymore in latest dask versions
+            # TODO discuss whether we can always expect the index from before to be monotonically increasing, because
+            # then we don't have to check order.
+            if index:
+                data[VALUES_COLUMN] = data[VALUES_COLUMN].cat.set_categories(data.index, ordered=True)
+            else:
+                data[VALUES_COLUMN] = data[VALUES_COLUMN].cat.as_known()
         label_index_to_category = dict(enumerate(data[VALUES_COLUMN].cat.categories, start=1))
 
     if return_single_channel is None:

diff --git a/src/spatialdata/_core/operations/transform.py b/src/spatialdata/_core/operations/transform.py
@@ -3,11 +3,12 @@
 import itertools
 import warnings
 from functools import singledispatch
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, cast
 
 import dask.array as da
 import dask_image.ndinterp
 import numpy as np
+import pandas as pd
 from dask.array.core import Array as DaskArray
 from dask.dataframe import DataFrame as DaskDataFrame
 from geopandas import GeoDataFrame
@@ -432,18 +433,20 @@ def _(
     xtransformed = transformation._transform_coordinates(xdata)
     transformed = data.drop(columns=list(axes)).copy()
     # dummy transformation that will be replaced by _adjust_transformation()
-    transformed.attrs[TRANSFORM_KEY] = {DEFAULT_COORDINATE_SYSTEM: Identity()}
-    # TODO: the following line, used in place of the line before, leads to an incorrect aggregation result. Look into
-    #  this! Reported here: ...
-    # transformed.attrs = {TRANSFORM_KEY: {DEFAULT_COORDINATE_SYSTEM: Identity()}}
-    assert isinstance(transformed, DaskDataFrame)
+    default_cs = {DEFAULT_COORDINATE_SYSTEM: Identity()}
+    transformed.attrs[TRANSFORM_KEY] = default_cs
+
     for ax in axes:
         indices = xtransformed["dim"] == ax
         new_ax = xtransformed[:, indices]
-        transformed[ax] = new_ax.data.flatten()
+        # TODO: discuss with dask team
+        # This is not nice, but otherwise there is a problem with the joint graph of new_ax and transformed, causing
+        # a getattr missing dependency of dependent from_dask_array.
+        new_col = pd.Series(new_ax.data.flatten().compute(), index=transformed.index)
+        transformed[ax] = new_col
+
+    old_transformations = cast(dict[str, Any], get_transformation(data, get_all=True))
 
-    old_transformations = get_transformation(data, get_all=True)
-    assert isinstance(old_transformations, dict)
     _set_transformation_for_transformed_elements(
         transformed,
         old_transformations,

diff --git a/src/spatialdata/_core/query/spatial_query.py b/src/spatialdata/_core/query/spatial_query.py
@@ -672,14 +672,24 @@ def _(
         max_coordinate=max_coordinate_intrinsic,
     )
 
-    # assert that the number of bounding boxes is correct
-    assert len(in_intrinsic_bounding_box) == len(min_coordinate)
+    if not (len_df := len(in_intrinsic_bounding_box)) == (len_bb := len(min_coordinate)):
+        raise ValueError(f"Number of dataframes `{len_df}` is not equal to the number of bounding boxes `{len_bb}`.")
     points_in_intrinsic_bounding_box: list[DaskDataFrame | None] = []
+    points_pd = points.compute()
+    attrs = points.attrs.copy()
     for mask in in_intrinsic_bounding_box:
         if mask.sum() == 0:
             points_in_intrinsic_bounding_box.append(None)
         else:
-            points_in_intrinsic_bounding_box.append(points.loc[mask])
+            # TODO there is a problem when mixing dask dataframe graph with dask array graph. Need to compute for now.
+            # we can't compute either mask or points as when we calculate either one of them
+            # test_query_points_multiple_partitions will fail as the mask will be used to index each partition.
+            # However, if we compute and then create the dask array again we get the mixed dask graph problem.
+            mask_np = mask.compute()
+            filtered_pd = points_pd[mask_np]
+            points_filtered = dd.from_pandas(filtered_pd, npartitions=points.npartitions)
+            points_filtered.attrs.update(attrs)
+            points_in_intrinsic_bounding_box.append(points_filtered)
     if len(points_in_intrinsic_bounding_box) == 0:
         return None
 

diff --git a/src/spatialdata/_core/spatialdata.py b/src/spatialdata/_core/spatialdata.py
@@ -13,8 +13,7 @@
 import zarr
 from anndata import AnnData
 from dask.dataframe import DataFrame as DaskDataFrame
-from dask.dataframe import read_parquet
-from dask.delayed import Delayed
+from dask.dataframe import Scalar, read_parquet
 from geopandas import GeoDataFrame
 from shapely import MultiPolygon, Polygon
 from xarray import DataArray, DataTree
@@ -1985,9 +1984,7 @@ def h(s: str) -> str:
                     else:
                         shape_str = (
                             "("
-                            + ", ".join(
-                                [(str(dim) if not isinstance(dim, Delayed) else "<Delayed>") for dim in v.shape]
-                            )
+                            + ", ".join([(str(dim) if not isinstance(dim, Scalar) else "<Delayed>") for dim in v.shape])
                             + ")"
                         )
                     descr += f"{h(attr + 'level1.1')}{k!r}: {descr_class} with shape: {shape_str} {dim_string}"

diff --git a/src/spatialdata/_io/_utils.py b/src/spatialdata/_io/_utils.py
@@ -14,6 +14,7 @@
 
 import zarr
 from anndata import AnnData
+from dask._task_spec import Task
 from dask.array import Array as DaskArray
 from dask.dataframe import DataFrame as DaskDataFrame
 from geopandas import GeoDataFrame
@@ -301,6 +302,19 @@ def _get_backing_files(element: DaskArray | DaskDataFrame) -> list[str]:
     return files
 
 
+def _find_piece_dict(obj: dict[str, tuple[str | None]] | Task) -> dict[str, tuple[str | None | None]] | None:
+    """Recursively search for dict containing the key 'piece' in Dask task specs containing the parquet file path."""
+    if isinstance(obj, dict):
+        if "piece" in obj:
+            return obj
+    elif hasattr(obj, "args"):  # Handles dask._task_spec.* objects like Task and List
+        for v in obj.args:
+            result = _find_piece_dict(v)
+            if result is not None:
+                return result
+    return None
+
+
 def _search_for_backing_files_recursively(subgraph: Any, files: list[str]) -> None:
     # see the types allowed for the dask graph here: https://docs.dask.org/en/stable/spec.html
 
@@ -327,25 +341,31 @@ def _search_for_backing_files_recursively(subgraph: Any, files: list[str]) -> No
                     path = getattr(v.store, "path", None) if getattr(v.store, "path", None) else v.store.root
                     files.append(str(UPath(path).resolve()))
                 elif name.startswith("read-parquet") or name.startswith("read_parquet"):
-                    if hasattr(v, "creation_info"):
-                        # https://github.com/dask/dask/blob/ff2488aec44d641696e0b7aa41ed9e995c710705/dask/dataframe/io/parquet/core.py#L625
-                        t = v.creation_info["args"]
-                        if not isinstance(t, tuple) or len(t) != 1:
-                            raise ValueError(
-                                f"Unable to parse the parquet file from the dask subgraph {subgraph}. Please "
-                                f"report this bug."
-                            )
-                        parquet_file = t[0]
-                        files.append(str(UPath(parquet_file).resolve()))
-                    elif isinstance(v, tuple) and len(v) > 1 and isinstance(v[1], dict) and "piece" in v[1]:
+                    # Here v is a read_parquet task with arguments and the only value is a dictionary.
+                    if "piece" in v.args[0]:
                         # https://github.com/dask/dask/blob/ff2488aec44d641696e0b7aa41ed9e995c710705/dask/dataframe/io/parquet/core.py#L870
-                        parquet_file, check0, check1 = v[1]["piece"]
+                        parquet_file, check0, check1 = v.args[0]["piece"]
                         if not parquet_file.endswith(".parquet") or check0 is not None or check1 is not None:
                             raise ValueError(
                                 f"Unable to parse the parquet file from the dask subgraph {subgraph}. Please "
                                 f"report this bug."
                             )
                         files.append(os.path.realpath(parquet_file))
+                    else:
+                        # This occurs when for example points and images are mixed, the main task still starts with
+                        # read_parquet, but the execution happens through a subgraph which we iterate over to get the
+                        # actual read_parquet task.
+                        for task in v.args[0].values():
+                            # Recursively go through tasks, this is required because differences between dask versions.
+                            piece_dict = _find_piece_dict(task)
+                            if isinstance(piece_dict, dict) and "piece" in piece_dict:
+                                parquet_file, check0, check1 = piece_dict["piece"]  # type: ignore[misc]
+                                if not parquet_file.endswith(".parquet") or check0 is not None or check1 is not None:
+                                    raise ValueError(
+                                        f"Unable to parse the parquet file from the dask subgraph {subgraph}. Please "
+                                        f"report this bug."
+                                    )
+                                files.append(os.path.realpath(parquet_file))
 
 
 def _backed_elements_contained_in_path(path: Path, object: SpatialData | SpatialElement | AnnData) -> list[bool]:

diff --git a/src/spatialdata/_io/io_raster.py b/src/spatialdata/_io/io_raster.py
@@ -347,7 +347,9 @@ def _write_raster_datatree(
         compute=False,
     )
     # Compute all pyramid levels at once to allow Dask to optimize the computational graph.
-    da.compute(*dask_delayed)
+    # Optimize_graph is set to False for now as this causes permission denied errors when during atomic writes
+    # os.replace is called. These can also be alleviated by using 'single-threaded' scheduler.
+    da.compute(*dask_delayed, optimize_graph=False)
 
     trans_group = group["labels"][element_name] if raster_type == "labels" else group
     overwrite_coordinate_transformations_raster(

diff --git a/src/spatialdata/datasets.py b/src/spatialdata/datasets.py
@@ -365,7 +365,7 @@ def blobs_annotating_element(name: BlobsTypes) -> SpatialData:
         instance_id = get_element_instances(sdata[name]).tolist()
     else:
         index = sdata[name].index
-        instance_id = index.compute().tolist() if isinstance(index, dask.dataframe.core.Index) else index.tolist()
+        instance_id = index.compute().tolist() if isinstance(index, dask.dataframe.Index) else index.tolist()
     n = len(instance_id)
     new_table = AnnData(shape=(n, 0), obs={"region": pd.Categorical([name] * n), "instance_id": instance_id})
     new_table = TableModel.parse(new_table, region=name, region_key="region", instance_key="instance_id")