apt-get update
apt-get install pciutils -y
curl -fsSL https://ollama.com/install.sh | sh
OLLAMA_MODELS=unsloth ollama serve &
OLLAMA_MODELS=unsloth ollama run hf.co/unsloth/DeepSeek-V3.1-GGUF:TQ1_0
./llama.cpp/llama-gguf-split --merge \
DeepSeek-V3.1-GGUF/DeepSeek-V3.1-UD-Q2_K_XL/DeepSeek-V3.1-UD-Q2_K_XL-00001-of-00006.gguf \
merged_file.gguf
OLLAMA_MODELS=unsloth ollama serve &
OLLAMA_MODELS=unsloth ollama run merged_file.gguf
apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-quantize llama-cli llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cpp
export LLAMA_CACHE="unsloth/DeepSeek-V3.1-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/DeepSeek-V3.1-GGUF:Q2_K_XL \
--cache-type-k q4_0 \
--threads -1 \
--n-gpu-layers 99 \
--prio 3 \
--temp 0.6 \
--top_p 0.95 \
--min_p 0.01 \
--ctx-size 16384 \
--seed 3407 \
-ot ".ffn_.*_exps.=CPU"
# !pip install huggingface_hub hf_transfer
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" # Can sometimes rate limit, so set to 0 to disable
from huggingface_hub import snapshot_download
snapshot_download(
repo_id = "unsloth/DeepSeek-V3.1-GGUF",
local_dir = "unsloth/DeepSeek-V3.1-GGUF",
allow_patterns = ["*UD-Q2_K_XL*"], # Dynamic 1bit (168GB) Use "*UD-Q2_K_XL*" for Dynamic 2bit (251GB)
)
./llama.cpp/llama-cli \
--model unsloth/DeepSeek-V3.1-GGUF/UD-Q2_K_XL/DeepSeek-V3.1-UD-Q2_K_XL-00001-of-00006.gguf \
--cache-type-k q4_0 \
--jinja \
--threads -1 \
--n-gpu-layers 99 \
--temp 0.6 \
--top_p 0.95 \
--min_p 0.01 \
--ctx-size 16384 \
--seed 3407 \
-ot ".ffn_.*_exps.=CPU"
from huggingface_hub import snapshot_download
snapshot_download(
repo_id = "unsloth/DeepSeek-V3.1-GGUF",
local_dir = "unsloth/DeepSeek-V3.1-GGUF",
allow_patterns = ["*UD-TQ1_0*"], # Use "*UD-Q2_K_XL*" for Dynamic 2bit
)
terminate called after throwing an instance of 'std::runtime_error' what(): split method must have between 1 and 1 positional arguments and between 0 and 0 keyword arguments at row 3, column 1908