0. 閰嶇疆鐜鍙橀噺
HF_ENDPOINT=https://hf-mirror.com
HF_HOME=/root/autodl-tmp/paul/tools/huggingface
1. 鏈満瀹夎python 3.10, 騫惰緗蔣浠舵簮
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
pip config set global.index-url https://mirrors.huaweicloud.com/repository/pypi/simple
2. 瀹夎miniconda
3. 鏂板緩涓涓幆澧? 騫舵縺媧?/div>
conda create -n quantization python=3.12
2. 鏈満瀹夎pytorch2.5.1+cuda12.4
pip3 install torch torchvision torchaudio
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
3. clone llamafactory婧愮爜
git clone https://github.com/hiyouga/LLaMA-Factory
4. llamafactory鏈湴瀹夎渚濊禆
pip install -e .["vllm","gptq"]
5. 鍚姩webui
6. 鍦ㄩ〉闈腑濉叆鐩稿叧鍙傛暟榪涜鎿嶄綔
]]>
閲忓寲澶фā鍨嬪伐鍏?/title> http://www.aygfsteel.com/paulwong/archive/2025/01/15/451557.htmlpaulwong paulwong Wed, 15 Jan 2025 10:00:00 GMT http://www.aygfsteel.com/paulwong/archive/2025/01/15/451557.html http://www.aygfsteel.com/paulwong/comments/451557.html http://www.aygfsteel.com/paulwong/archive/2025/01/15/451557.html#Feedback 0 http://www.aygfsteel.com/paulwong/comments/commentRss/451557.html http://www.aygfsteel.com/paulwong/services/trackbacks/451557.html VLLM閲忓寲鎺ㄧ悊
瀹夎姝ゅ伐鍏峰墠闇瀹夎涓や釜鍖?
sudo apt-get install cmake
sudo apt-get install pkgconfig
閰嶇疆huggingface闀滃儚鍦板潃:
export HF_ENDPOINT=https://hf-mirror.com
涓嬭澆浠g爜搴? 騫跺畨瑁卲ython渚濊禆
git clone https://github.com/ModelTC/llmc.git
cd llmc/
pip install -r requirements.txt
鎵懼埌閲忓寲鏂規硶鐨勯厤緗枃浠? 騫朵綔淇敼
base:
seed: &seed 42
model:
type: Llama
path: /home/paul/.cache/huggingface/models/models--unsloth--llama- 3 -8b-Instruct-lawdata
torch_dtype: auto
quant:
method: RTN
weight:
bit: 8
symmetric: True
granularity: per_group
group_size: 128
need_pack: True
eval:
eval_pos: [ fake_quant ]
name: wikitext2
download: True
path: /home/paul/paulwong/work/workspaces/llmc/dataset
bs: 1
seq_len: 2048
inference_per_block: False
save:
save_vllm: True
save_path: /home/paul/.cache/huggingface/models/models--unsloth--llama- 3-8b-Instruct-lawdata-quantization
鎵懼埌run_llmc.sh, 騫朵綔淇敼
#!/bin/bash
# export CUDA_VISIBLE_DEVICES=0,1
llmc=/home/paul/paulwong/work/workspaces/llmc
export PYTHONPATH=$llmc:$PYTHONPATH
# task_name=awq_w_only
# config=${llmc}/configs/quantization/methods/Awq/awq_w_only.yml
task_name=rtn_for_vllm
config=${llmc}/configs/quantization/backend/vllm/rtn_w8a16.yml
nnodes=1
nproc_per_node=1
find_unused_port() {
while true; do
port=$(shuf -i 10000-60000 -n 1)
if ! ss -tuln | grep -q ":$port "; then
echo "$port"
return 0
fi
done
}
UNUSED_PORT=$(find_unused_port)
MASTER_ADDR=127.0.0.1
MASTER_PORT=$UNUSED_PORT
task_id=$UNUSED_PORT
nohup \
torchrun \
--nnodes $nnodes \
--nproc_per_node $nproc_per_node \
--rdzv_id $task_id \
--rdzv_backend c10d \
--rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
${llmc}/llmc/__main__.py --config $config --task_id $task_id \
> ${task_name}.log 2>&1 &
sleep 2
ps aux | grep '__main__.py' | grep $task_id | awk '{print $2}' > ${task_name}.pid
# You can kill this program by
# xargs kill -9 < xxx.pid
# xxx.pid is ${task_name}.pid file
鎵ц閲忓寲鎿嶄綔
]]>