docker run -d --rm --gpus all --shm-size 1g -p 8080:80 -v /mnt:/data -e GPTQ_BITS=4 -e GPTQ_GROUPSIZE=32 -e REVISION=gptq-4bit-32g-actorder_True ghcr.io/huggingface/text-generation-inference:latest --model-id TheBloke/Upstage-Llama-2-70B-instruct-v2-GPTQ --max-batch-prefill-tokens=12000 --max-total-tokens=12000 --max-input-length=10000 --quantize=gptq --sharded=true --num-shard=$(lspci | grep NVIDIA | wc -l | tr -d '\n') --rope-factor=2
Error response from daemon: could not select device driver “” with capabilities: [[gpu]].