-
-
Notifications
You must be signed in to change notification settings - Fork 4.1k
Open
Labels
Description
docker run -d -e JUPYTER_PASSWORD="mypassword" \
-p 8888:8888 -p 2222:22 \
-v $(pwd)/work:/workspace/work \
--gpus all \
unsloth/unsloth🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))== Unsloth 2025.12.5: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.2.
\\ /| NVIDIA H200. Num GPUs = 1. Max memory: 139.812 GB. Platform: Linux.
O^O/ \_/ \ Torch: 2.9.0+cu128. CUDA: 9.0. CUDA Toolkit: 12.8. Triton: 3.5.0
\ / Bfloat16 = TRUE. FA [Xformers = 0.0.33.post1. FA2 = False]
"-____-" Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Traceback (most recent call last):
File "/workspace/work/train_oss.py", line 21, in <module>
model, tokenizer = FastLanguageModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/unsloth/models/loader.py", line 486, in from_pretrained
return FastModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/unsloth/models/loader.py", line 1154, in from_pretrained
model, tokenizer = FastBaseModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/unsloth/models/vision.py", line 661, in from_pretrained
model = auto_model.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 604, in from_pretrained
return model_class.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 288, in _wrapper
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5179, in from_pretrained
) = cls._load_pretrained_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5496, in _load_pretrained_model
model._initialize_missing_keys(checkpoint_keys, ignore_mismatched_sizes, is_quantized)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 6068, in _initialize_missing_keys
self.initialize_weights()
File "/opt/conda/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 120, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3105, in initialize_weights
self.smart_apply(self._initialize_weights)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3096, in smart_apply
module.smart_apply(module._initialize_weights)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3098, in smart_apply
module.smart_apply(fn)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3098, in smart_apply
module.smart_apply(fn)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3098, in smart_apply
module.smart_apply(fn)
[Previous line repeated 1 more time]
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3099, in smart_apply
fn(self)
File "/opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py", line 3073, in _initialize_weights
self._init_weights(module)
File "/opt/conda/lib/python3.11/site-packages/transformers/models/gpt_oss/modeling_gpt_oss.py", line 430, in _init_weights
module.weight.data.normal_(mean=0.0, std=std)
^^^^^^^^^^^^^
File "/opt/conda/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1964, in __getattr__
raise AttributeError(
AttributeError: 'GptOssTopKRouter' object has no attribute 'weight'