NCCL_P2P_LEVEL=SYS CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7,8 python3 -m sglang.launch_server --model-path /root/windows/gpt-oss-120b --served-model-name sglang --tensor-parallel-size 8 --max-running-requests 20 --trust-remote-code --host 0.0.0.0 --port 8099 --context-length 131072 --dtype auto --quantization mxfp4 --disable-hybrid-swa-memory --mem-fraction-static 0.9 --sleep-on-idle