code · pull · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026
diff --git a/comfy/lora.py b/comfy/lora.py
@@ -332,6 +332,12 @@ def model_lora_keys_unet(model, key_map={}):
                 key_map["{}".format(key_lora)] = k
                 key_map["transformer.{}".format(key_lora)] = k
 
+    if isinstance(model, comfy.model_base.ACEStep15):
+        for k in sdk:
+            if k.startswith("diffusion_model.decoder.") and k.endswith(".weight"):
+                key_lora = k[len("diffusion_model.decoder."):-len(".weight")]
+                key_map["base_model.model.{}".format(key_lora)] = k  # Official base model loras
+
     return key_map
 
 

diff --git a/comfy/sd.py b/comfy/sd.py
@@ -554,6 +554,8 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
             elif "decoder.layers.1.layers.0.beta" in sd:
                 config = {}
                 param_key = None
+                self.upscale_ratio = 2048
+                self.downscale_ratio = 2048
                 if "decoder.layers.2.layers.1.weight_v" in sd:
                     param_key = "decoder.layers.2.layers.1.weight_v"
                 if "decoder.layers.2.layers.1.parametrizations.weight.original1" in sd:
@@ -562,15 +564,15 @@ def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None)
                     if sd[param_key].shape[-1] == 12:
                         config["strides"] = [2, 4, 4, 6, 10]
                         self.audio_sample_rate = 48000
+                        self.upscale_ratio = 1920
+                        self.downscale_ratio = 1920
 
                 self.first_stage_model = AudioOobleckVAE(**config)
                 self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype)
                 self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * 2048) * model_management.dtype_size(dtype)
                 self.latent_channels = 64
                 self.output_channels = 2
                 self.pad_channel_value = "replicate"
-                self.upscale_ratio = 2048
-                self.downscale_ratio = 2048
                 self.latent_dim = 1
                 self.process_output = lambda audio: audio
                 self.process_input = lambda audio: audio
@@ -870,7 +872,7 @@ def decode_tiled_(self, samples, tile_x=64, tile_y=64, overlap = 16):
             / 3.0)
         return output
 
-    def decode_tiled_1d(self, samples, tile_x=128, overlap=32):
+    def decode_tiled_1d(self, samples, tile_x=256, overlap=32):
         if samples.ndim == 3:
             decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float()
         else:

diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py
@@ -6,6 +6,7 @@
 
 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
+import comfy.ops
 import comfy.ldm.common_dit
 import comfy.clip_model
 
@@ -627,10 +628,10 @@ def forward(self, x, attention_mask=None, embeds=None, num_tokens=None, intermed
         mask = None
         if attention_mask is not None:
             mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1])
-            mask = mask.masked_fill(mask.to(torch.bool), float("-inf"))
+            mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min)
 
         if seq_len > 1:
-            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1)
+            causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min).triu_(1)
             if mask is not None:
                 mask += causal_mask
             else:
@@ -794,7 +795,19 @@ def __init__(self, config_dict, dtype, device, operations):
         self.dtype = dtype
 
     def logits(self, x):
-        return torch.nn.functional.linear(x[:, -1:], self.model.embed_tokens.weight.to(x), None)
+        input = x[:, -1:]
+        module = self.model.embed_tokens
+
+        offload_stream = None
+        if module.comfy_cast_weights:
+            weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
+        else:
+            weight = self.model.embed_tokens.weight.to(x)
+
+        x = torch.nn.functional.linear(input, weight, None)
+
+        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
+        return x
 
 class Qwen3_4B(BaseLlama, torch.nn.Module):
     def __init__(self, config_dict, dtype, device, operations):

diff --git a/comfy_api/latest/__init__.py b/comfy_api/latest/__init__.py
@@ -7,7 +7,7 @@
 from comfy_api.internal.async_to_sync import create_sync_class
 from ._input import ImageInput, AudioInput, MaskInput, LatentInput, VideoInput
 from ._input_impl import VideoFromFile, VideoFromComponents
-from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL
+from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL, File3D
 from . import _io_public as io
 from . import _ui_public as ui
 from comfy_execution.utils import get_executing_context
@@ -105,6 +105,7 @@ class Types:
     VideoComponents = VideoComponents
     MESH = MESH
     VOXEL = VOXEL
+    File3D = File3D
 
 ComfyAPI = ComfyAPI_latest
 

diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py
@@ -27,7 +27,7 @@
 from comfy_api.internal import (_ComfyNodeInternal, _NodeOutputInternal, classproperty, copy_class, first_real_override, is_class,
     prune_dict, shallow_clone_class)
 from comfy_execution.graph_utils import ExecutionBlocker
-from ._util import MESH, VOXEL, SVG as _SVG
+from ._util import MESH, VOXEL, SVG as _SVG, File3D
 
 
 class FolderType(str, Enum):
@@ -667,6 +667,49 @@ class Voxel(ComfyTypeIO):
 class Mesh(ComfyTypeIO):
     Type = MESH
 
+
+@comfytype(io_type="FILE_3D")
+class File3DAny(ComfyTypeIO):
+    """General 3D file type - accepts any supported 3D format."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_GLB")
+class File3DGLB(ComfyTypeIO):
+    """GLB format 3D file - binary glTF, best for web and cross-platform."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_GLTF")
+class File3DGLTF(ComfyTypeIO):
+    """GLTF format 3D file - JSON-based glTF with external resources."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_FBX")
+class File3DFBX(ComfyTypeIO):
+    """FBX format 3D file - best for game engines and animation."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_OBJ")
+class File3DOBJ(ComfyTypeIO):
+    """OBJ format 3D file - simple geometry format."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_STL")
+class File3DSTL(ComfyTypeIO):
+    """STL format 3D file - best for 3D printing."""
+    Type = File3D
+
+
+@comfytype(io_type="FILE_3D_USDZ")
+class File3DUSDZ(ComfyTypeIO):
+    """USDZ format 3D file - Apple AR format."""
+    Type = File3D
+
+
 @comfytype(io_type="HOOKS")
 class Hooks(ComfyTypeIO):
     if TYPE_CHECKING:
@@ -2037,6 +2080,13 @@ def as_dict(self) -> dict:
     "LossMap",
     "Voxel",
     "Mesh",
+    "File3DAny",
+    "File3DGLB",
+    "File3DGLTF",
+    "File3DFBX",
+    "File3DOBJ",
+    "File3DSTL",
+    "File3DUSDZ",
     "Hooks",
     "HookKeyframes",
     "TimestepsRange",

diff --git a/comfy_api/latest/_util/__init__.py b/comfy_api/latest/_util/__init__.py
@@ -1,5 +1,5 @@
 from .video_types import VideoContainer, VideoCodec, VideoComponents
-from .geometry_types import VOXEL, MESH
+from .geometry_types import VOXEL, MESH, File3D
 from .image_types import SVG
 
 __all__ = [
@@ -9,5 +9,6 @@
     "VideoComponents",
     "VOXEL",
     "MESH",
+    "File3D",
     "SVG",
 ]
diff --git a/comfy_api/latest/_util/geometry_types.py b/comfy_api/latest/_util/geometry_types.py
@@ -1,3 +1,8 @@
+import shutil
+from io import BytesIO
+from pathlib import Path
+from typing import IO
+
 import torch
 
 
@@ -10,3 +15,75 @@ class MESH:
     def __init__(self, vertices: torch.Tensor, faces: torch.Tensor):
         self.vertices = vertices
         self.faces = faces
+
+
+class File3D:
+    """Class representing a 3D file from a file path or binary stream.
+
+    Supports both disk-backed (file path) and memory-backed (BytesIO) storage.
+    """
+
+    def __init__(self, source: str | IO[bytes], file_format: str = ""):
+        self._source = source
+        self._format = file_format or self._infer_format()
+
+    def _infer_format(self) -> str:
+        if isinstance(self._source, str):
+            return Path(self._source).suffix.lstrip(".").lower()
+        return ""
+
+    @property
+    def format(self) -> str:
+        return self._format
+
+    @format.setter
+    def format(self, value: str) -> None:
+        self._format = value.lstrip(".").lower() if value else ""
+
+    @property
+    def is_disk_backed(self) -> bool:
+        return isinstance(self._source, str)
+
+    def get_source(self) -> str | IO[bytes]:
+        if isinstance(self._source, str):
+            return self._source
+        if hasattr(self._source, "seek"):
+            self._source.seek(0)
+        return self._source
+
+    def get_data(self) -> BytesIO:
+        if isinstance(self._source, str):
+            with open(self._source, "rb") as f:
+                result = BytesIO(f.read())
+            return result
+        if hasattr(self._source, "seek"):
+            self._source.seek(0)
+        if isinstance(self._source, BytesIO):
+            return self._source
+        return BytesIO(self._source.read())
+
+    def save_to(self, path: str) -> str:
+        dest = Path(path)
+        dest.parent.mkdir(parents=True, exist_ok=True)
+
+        if isinstance(self._source, str):
+            if Path(self._source).resolve() != dest.resolve():
+                shutil.copy2(self._source, dest)
+        else:
+            if hasattr(self._source, "seek"):
+                self._source.seek(0)
+            with open(dest, "wb") as f:
+                f.write(self._source.read())
+        return str(dest)
+
+    def get_bytes(self) -> bytes:
+        if isinstance(self._source, str):
+            return Path(self._source).read_bytes()
+        if hasattr(self._source, "seek"):
+            self._source.seek(0)
+        return self._source.read()
+
+    def __repr__(self) -> str:
+        if isinstance(self._source, str):
+            return f"File3D(source={self._source!r}, format={self._format!r})"
+        return f"File3D(<stream>, format={self._format!r})"
diff --git a/comfy_api_nodes/apis/meshy.py b/comfy_api_nodes/apis/meshy.py
@@ -109,14 +109,19 @@ class MeshyTextureRequest(BaseModel):
 
 class MeshyModelsUrls(BaseModel):
     glb: str = Field("")
+    fbx: str = Field("")
+    usdz: str = Field("")
+    obj: str = Field("")
 
 
 class MeshyRiggedModelsUrls(BaseModel):
     rigged_character_glb_url: str = Field("")
+    rigged_character_fbx_url: str = Field("")
 
 
 class MeshyAnimatedModelsUrls(BaseModel):
     animation_glb_url: str = Field("")
+    animation_fbx_url: str = Field("")
 
 
 class MeshyResultTextureUrls(BaseModel):