api_version: v1
variables:
HF_TOKEN:
description: Hugginface token to access to model registry
required: true
default: {HUGGINGFACE_TOKEN}
BASE_MODEL_NAME:
description: Base LLM name to fine-tune
options:
- meta-llama/Meta-Llama-3.1-8B-Instruct
- mistralai/Mistral-Large-Instruct-2407
required: true
default: meta-llama/Meta-Llama-3.1-8B-Instruct
ADAPTER_NAME:
description: Repository name of QLoRA adapter to upload
required: true
default: {LORA_ADAPTER_NAME}
QUANTIZE_MODEL:
description: Whether to quantize trained model for production usage
options:
- "yes"
- "no"
required: true
default: "yes"
QUANTIZED_MODEL_NAME:
description: Repository name of quantized model
required: true
default: {QUANTIZED_MODEL_NAME}
steps:
- key: Fine-tune-with-QLoRA
title: Fine-tune with QLoRA
type: v1/run
description: Fine-tune model with QLoRA
spec:
input_variables:
BASE_MODEL_NAME:
type: pipeline_variable
value: BASE_MODEL_NAME
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
run_spec:
name: Fine-tune with QLoRA
description: Fine-tune model with QLoRA
resources:
cluster: vessl-oci-sanjose
preset: gpu-l4-small-spot
image: quay.io/vessl-ai/cuda:12.4-r4
import:
/code/:
git:
url: github.com/vessl-ai/examples.git
ref: main
export:
/root/output/: vessl-artifact://
run: |-
cd /code/pipelines/pipelines-quickstart/llm-finetuning
pip install torch==2.2.0
pip install -r requirements.txt
python finetune.py \
--base-model-name $BASE_MODEL_NAME \
--checkpoint-path /root/output/checkpoints \
--output-model-name /root/output/adapter \
--train-epochs 4 \
--lora-rank 16
env:
BASE_MODEL_NAME:
source: inject
HF_TOKEN:
source: inject
- key: Quantize-Model
title: Quantize Model?
type: v1/if
depends_on:
- Fine-tune-with-QLoRA
spec:
condition: ${QUANTIZE_MODEL} == yes
- key: Upload-Adapter
title: Upload Adapter
type: v1/run
depends_on:
- Fine-tune-with-QLoRA
spec:
input_variables:
ADAPTER_NAME:
type: pipeline_variable
value: ADAPTER_NAME
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
volumes:
/model/:
source: pipeline-step
source_step:
step_key: Fine-tune-with-QLoRA
volume_claim_name: /root/output/
run_spec:
name: Upload Adapter
description: ""
resources:
cluster: vessl-oci-sanjose
preset: cpu-medium-spot
image: quay.io/vessl-ai/python:3.10-r18
import:
/model/: vessl-artifact://
run: |-
pip install -U "huggingface_hub[cli]"
huggingface-cli upload $ADAPTER_NAME /model/adapter .
env:
ADAPTER_NAME:
source: inject
HF_TOKEN:
source: inject
- key: AWQ-Marlin-Quantization
title: AWQ+Marlin Quantization
type: v1/run
depends_on:
- Quantize-Model:yes
spec:
input_variables:
ADAPTER_NAME:
type: pipeline_variable
value: ADAPTER_NAME
BASE_MODEL_NAME:
type: pipeline_variable
value: BASE_MODEL_NAME
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
run_spec:
name: AWQ+Marlin Quantization
description: ""
resources:
cluster: vessl-oci-sanjose
preset: gpu-l4-small-spot
image: quay.io/vessl-ai/cuda:12.4-r4
import:
/code/:
git:
url: github.com/vessl-ai/examples.git
ref: main
export:
/root/output/: vessl-artifact://
run: |-
cd /code/pipelines/pipelines-quickstart/llm-quantization
pip install torch==2.3.1
pip install -r requirements.txt
python merge_and_quantize.py \
--base-model-name $BASE_MODEL_NAME \
--adapter-name $ADAPTER_NAME \
--quantized-model-name /root/output
env:
ADAPTER_NAME:
source: inject
BASE_MODEL_NAME:
source: inject
HF_TOKEN:
source: inject
- key: Evaluate-Quantized-Model
title: Evaluate Quantized Model
type: v1/run
depends_on:
- AWQ-Marlin-Quantization
spec:
input_variables:
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
QUANTIZED_MODEL_NAME:
type: pipeline_variable
value: QUANTIZED_MODEL_NAME
volumes:
/model/:
source: pipeline-step
source_step:
step_key: AWQ-Marlin-Quantization
volume_claim_name: /root/output/
run_spec:
name: Evaluate Quantized Model
description: ""
resources:
cluster: vessl-oci-sanjose
preset: gpu-l4-small-spot
image: quay.io/vessl-ai/cuda:12.4-r4
import:
/code/:
git:
url: github.com/vessl-ai/examples.git
ref: main
/model/: vessl-artifact://
run: |-
cd /code/pipelines/pipelines-quickstart/llm-finetuning-eval
pip install torch==2.3.1
pip install -r requirements.txt
pip install "git+https://github.com/IST-DASLab/marlin"
python evaluate.py \
--model-name /model \
--quantization awq \
--prompts "What is the capital of France?" "How does a transformer model work?"
env:
HF_TOKEN:
source: inject
QUANTIZED_MODEL_NAME:
source: inject
- key: Accept-Evaluation
title: Accept Evaluation?
type: v1/manual_judgment
description: See the logs from "Evaluate Model" and decide whether to push the fine-tuned model to production.
depends_on:
- Evaluate-Quantized-Model
- Evaluate-Original-Model
spec:
assignee_email_addresses:
- sample_email@foo.bar
- key: Fail
title: Fail
type: v1/fail
depends_on:
- Accept-Evaluation:no
spec: {}
- key: Upload-Quantized-Model
title: Upload Quantized Model
type: v1/run
depends_on:
- Evaluated-Quantized-Model:yes
spec:
input_variables:
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
QUANTIZED_MODEL_NAME:
type: pipeline_variable
value: QUANTIZED_MODEL_NAME
volumes:
/model/:
source: pipeline-step
source_step:
step_key: AWQ-Marlin-Quantization
volume_claim_name: /root/output/
run_spec:
name: Upload Quantized Model
description: ""
resources:
cluster: vessl-oci-sanjose
preset: cpu-medium-spot
image: quay.io/vessl-ai/python:3.10-r18
import:
/model/: vessl-artifact://
run: |-
pip install -U "huggingface_hub[cli]"
huggingface-cli upload $QUANTIZED_MODEL_NAME /model .
env:
HF_TOKEN:
source: inject
QUANTIZED_MODEL_NAME:
source: inject
- key: Notify-Fine-tuning-Completion
title: Notify Fine-tuning Completion
type: v1/notification
depends_on:
- Evaluated-Quantized-Model:no
- Upload-Quantized-Model
spec:
email_addresses:
- sample_email@foo.bar
email_subject: '[llm-finetuning] Fine tuning job has finished'
email_contents: '😉 Fine-tuning has successfully completed. Please check the logs!'
- key: Notify-Fine-tuning-has-failed
title: Notify Fine-tuning has failed
type: v1/notification
depends_on:
- Fail
spec:
email_addresses:
- sample_email@foo.bar
email_subject: '[llm-finetuning] Fine-tuning has failed'
email_contents: '😱 Fine-tuning has failed. Please check the logs.'
- key: Evaluate-Original-Model
title: Evaluate Original Model
type: v1/run
depends_on:
- Quantize-Model:no
spec:
input_variables:
ADAPTER_NAME:
type: pipeline_variable
value: ADAPTER_NAME
HF_TOKEN:
type: pipeline_variable
value: HF_TOKEN
run_spec:
name: Evaluate Original Model
description: ""
resources:
cluster: vessl-oci-sanjose
preset: gpu-l4-small-spot
image: quay.io/vessl-ai/cuda:12.4-r4
import:
/code/:
git:
url: github.com/vessl-ai/examples.git
ref: main
run: |-
cd /code/pipelines/pipelines-quickstart/llm-finetuning-eval
pip install torch==2.3.1
pip install -r requirements.txt
python evaluate.py \
--model-name $ADAPTER_NAME \
--prompts "What is the capital of France?" "How does a transformer model work?"
env:
ADAPTER_NAME:
source: inject
HF_TOKEN:
source: inject
- key: Evaluated-Quantized-Model
title: Evaluated Quantized Model?
type: v1/if
depends_on:
- Accept-Evaluation:yes
spec:
condition: ${QUANTIZE_MODEL} == yes