Skip to content

Commit

Permalink
LoRA Adapters for vLLM & support for s3, gs, oss for pulling adapters…
Browse files Browse the repository at this point in the history
… and models (to cache) from buckets (#304)

* Add `.spec.adapters`
* Support requesting adapters using the pattern: `{"model":
"<model>_<adapter>", ... }`
* Load LoRA adapters into running vLLM containers
* Support updating LoRA adapters without needing to restart vLLM
* Rewrite `.model` to use adapter in chat request body when proxying to
vLLM
* Add adapters to model list
* Add support for `s3://`, `gs://`, `oss://` urls (for adapters and
cache loading)
* Add new cloud credentials to support new urls
* Update docs
* Update Model validation

NOTE:
* Was unable to test `oss://` urls... Had issues opening acct.

FOLLOWUP:
* Need to add adapter e2e tests (have not found a small enough model
with adapters for use in kind cluster)
* Need to update chart values.yaml to include GH-actions-built image for
model loader after merge!!!

Fixes #132, #303
  • Loading branch information
nstogner authored Nov 24, 2024
1 parent 499a6ed commit b12f811
Show file tree
Hide file tree
Showing 81 changed files with 2,168 additions and 391 deletions.
8 changes: 7 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
# Ignore build and test binaries.
bin/
charts/
components/
docs/
manifests/
examples/
manifests/
proposals/
test/
tmp/
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build and Push huggingface-model-loader Docker image
name: Build and Push kubeai-model-loader Docker image
on:
push:
branches:
Expand All @@ -12,10 +12,10 @@ on:
# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: substratusai/huggingface-model-loader
IMAGE_NAME: substratusai/kubeai-model-loader

jobs:
huggingface-model-loader:
kubeai-model-loader:
runs-on: ubuntu-latest
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
Expand Down Expand Up @@ -52,7 +52,7 @@ jobs:
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: ./components/huggingface-model-loader
context: ./components/model-loader
platforms: linux/amd64,linux/arm64
push: ${{ github.event_name == 'push' }}
tags: ${{ steps.meta.outputs.tags }}
Expand Down
8 changes: 8 additions & 0 deletions api/v1/metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ const (
func PVCModelAnnotation(modelName string) string {
return "models.kubeai.org/" + modelName
}

const (
PodAdapterLabelPrefix = "adapter.kubeai.org/"
)

func PodAdapterLabel(adapterID string) string {
return PodAdapterLabelPrefix + adapterID
}
43 changes: 36 additions & 7 deletions api/v1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,34 @@ import (
)

// ModelSpec defines the desired state of Model.
// +kubebuilder:validation:XValidation:rule="!has(self.cacheProfile) || self.url.startsWith(\"hf://\")", message="cacheProfile is only supported with a huggingface url (\"hf://...\") at the moment."
// +kubebuilder:validation:XValidation:rule="!has(self.cacheProfile) || self.url.startsWith(\"hf://\") || self.url.startsWith(\"s3://\") || self.url.startsWith(\"gs://\") || self.url.startsWith(\"oss://\")", message="cacheProfile is only supported with urls of format \"hf://...\", \"s3://...\", \"gs://...\", or \"oss://...\" at the moment."
// +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"s3://\") || has(self.cacheProfile)", message="urls of format \"s3://...\" only supported when using a cacheProfile"
// +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"gs://\") || has(self.cacheProfile)", message="urls of format \"gs://...\" only supported when using a cacheProfile"
// +kubebuilder:validation:XValidation:rule="!self.url.startsWith(\"oss://\") || has(self.cacheProfile)", message="urls of format \"oss://...\" only supported when using a cacheProfile"
// +kubebuilder:validation:XValidation:rule="!has(self.maxReplicas) || self.minReplicas <= self.maxReplicas", message="minReplicas should be less than or equal to maxReplicas."
// +kubebuilder:validation:XValidation:rule="!has(self.adapters) || self.engine == \"VLLM\"", message="adapters only supported with VLLM engine."
type ModelSpec struct {
// URL of the model to be served.
// Currently only the following formats are supported:
// For VLLM & FasterWhisper engines: "hf://<model-repo>/<model-name>"
// For OLlama engine: "ollama://<model>
// Currently the following formats are supported:
//
// For VLLM, FasterWhisper, Infinity engines:
//
// "hf://<repo>/<model>"
// "gs://<bucket>/<path>" (only with cacheProfile)
// "oss://<bucket>/<path>" (only with cacheProfile)
// "s3://<bucket>/<path>" (only with cacheProfile)
//
// For OLlama engine:
//
// "ollama://<model>"
//
// +kubebuilder:validation:Required
// +kubebuilder:validation:XValidation:rule="self == oldSelf", message="url is immutable."
// +kubebuilder:validation:XValidation:rule="self.startsWith(\"hf://\") || self.startsWith(\"ollama://\")", message="url must start with \"hf://\" or \"ollama://\" and not be empty."
// +kubebuilder:validation:XValidation:rule="self.startsWith(\"hf://\") || self.startsWith(\"ollama://\") || self.startsWith(\"s3://\") || self.startsWith(\"gs://\") || self.startsWith(\"oss://\")", message="url must start with \"hf://\", \"ollama://\", \"s3://\", \"gs://\", or \"oss://\" and not be empty."
URL string `json:"url"`

Adapters []Adapter `json:"adapters,omitempty"`

// Features that the model supports.
// Dictates the APIs that are available for the model.
Features []ModelFeature `json:"features"`
Expand Down Expand Up @@ -118,6 +134,16 @@ const (
InfinityEngine = "Infinity"
)

type Adapter struct {
// Name must be a lowercase string with no spaces.
// +kubebuilder:validation:Required
// +kubebuilder:validation:Pattern=^[a-z0-9-]+$
// +kubebuilder:validation:MaxLength=63
Name string `json:"name"`
// +kubebuilder:validation:XValidation:rule="self.startsWith(\"hf://\") || self.startsWith(\"s3://\") || self.startsWith(\"gs://\") || self.startsWith(\"oss://\")", message="adapter url must start with \"hf://\", \"s3://\", \"gs://\", or \"oss://\"."
URL string `json:"url"`
}

// ModelStatus defines the observed state of Model.
type ModelStatus struct {
Replicas ModelStatusReplicas `json:"replicas,omitempty"`
Expand All @@ -133,11 +159,14 @@ type ModelStatusCache struct {
Loaded bool `json:"loaded"`
}

// NOTE: Model name length should be limited to allow for the model name to be used in
// the names of the resources created by the controller.

// Model resources define the ML models that will be served by KubeAI.
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:subresource:scale:specpath=.spec.replicas,statuspath=.status.replicas.all

// Model resources define the ML models that will be served by KubeAI.
// +kubebuilder:validation:XValidation:rule="size(self.metadata.name) <= 40", message="name must not exceed 40 characters."
type Model struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Expand Down
20 changes: 20 additions & 0 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 54 additions & 0 deletions charts/kubeai/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,60 @@ Create the name of the service account to use for model pods
{{- end }}
{{- end }}

{{/*
Create the name of the alibaba secret to use
*/}}
{{- define "kubeai.alibabaSecretName" -}}
{{- if .Values.secrets.alibaba.create -}}
{{- if .Values.secrets.alibaba.name -}}
{{- .Values.secrets.alibaba.name -}}
{{- else }}
{{- (include "kubeai.fullname" .)}}-alibaba
{{- end}}
{{- else }}
{{- if not .Values.secrets.alibaba.name -}}
{{ fail "if secrets.alibaba.create is false, secrets.alibaba.name is required" }}
{{- end }}
{{- .Values.secrets.alibaba.name }}
{{- end }}
{{- end }}

{{/*
Create the name of the aws secret to use
*/}}
{{- define "kubeai.awsSecretName" -}}
{{- if .Values.secrets.aws.create -}}
{{- if .Values.secrets.aws.name -}}
{{- .Values.secrets.aws.name -}}
{{- else }}
{{- (include "kubeai.fullname" .)}}-aws
{{- end}}
{{- else }}
{{- if not .Values.secrets.aws.name -}}
{{ fail "if secrets.aws.create is false, secrets.aws.name is required" }}
{{- end }}
{{- .Values.secrets.aws.name }}
{{- end }}
{{- end }}

{{/*
Create the name of the gcp secret to use
*/}}
{{- define "kubeai.gcpSecretName" -}}
{{- if .Values.secrets.gcp.create -}}
{{- if .Values.secrets.gcp.name -}}
{{- .Values.secrets.gcp.name -}}
{{- else }}
{{- (include "kubeai.fullname" .)}}-gcp
{{- end}}
{{- else }}
{{- if not .Values.secrets.gcp.name -}}
{{ fail "if secrets.gcp.create is false, secrets.gcp.name is required" }}
{{- end }}
{{- .Values.secrets.gcp.name }}
{{- end }}
{{- end }}

{{/*
Create the name of the huggingface secret to use
*/}}
Expand Down
11 changes: 11 additions & 0 deletions charts/kubeai/templates/aws-secret.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{{- if and .Values.secrets.aws.create (and (not (empty .Values.secrets.aws.accessKeyId)) (not (empty .Values.secrets.aws.secretAccessKey))) }}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "kubeai.awsSecretName" . }}
labels:
{{- include "kubeai.labels" . | nindent 4 }}
data:
accessKeyId: {{ .Values.secrets.aws.accessKeyId | b64enc }}
secretAccessKey: {{ .Values.secrets.aws.secretAccessKey | b64enc }}
{{- end }}
7 changes: 5 additions & 2 deletions charts/kubeai/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,18 @@ metadata:
data:
system.yaml: |
secretNames:
alibaba: {{ include "kubeai.alibabaSecretName" . }}
aws: {{ include "kubeai.awsSecretName" . }}
gcp: {{ include "kubeai.gcpSecretName" . }}
huggingface: {{ include "kubeai.huggingfaceSecretName" . }}
resourceProfiles:
{{- .Values.resourceProfiles | toYaml | nindent 6 }}
cacheProfiles:
{{- .Values.cacheProfiles | toYaml | nindent 6 }}
modelServers:
{{- .Values.modelServers | toYaml | nindent 6 }}
modelLoaders:
{{- .Values.modelLoaders | toYaml | nindent 6 }}
modelLoading:
{{- .Values.modelLoading | toYaml | nindent 6 }}
modelRollouts:
{{- .Values.modelRollouts | toYaml | nindent 6 }}
modelServerPods:
Expand Down
63 changes: 55 additions & 8 deletions charts/kubeai/templates/crds/kubeai.org_models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ spec:
spec:
description: ModelSpec defines the desired state of Model.
properties:
adapters:
items:
properties:
name:
description: Name must be a lowercase string with no spaces.
maxLength: 63
pattern: ^[a-z0-9-]+$
type: string
url:
type: string
x-kubernetes-validations:
- message: adapter url must start with "hf://", "s3://", "gs://",
or "oss://".
rule: self.startsWith("hf://") || self.startsWith("s3://")
|| self.startsWith("gs://") || self.startsWith("oss://")
required:
- name
- url
type: object
type: array
args:
description: Args to be added to the server process.
items:
Expand Down Expand Up @@ -138,15 +158,30 @@ spec:
url:
description: |-
URL of the model to be served.
Currently only the following formats are supported:
For VLLM & FasterWhisper engines: "hf://<model-repo>/<model-name>"
For OLlama engine: "ollama://<model>
Currently the following formats are supported:
For VLLM, FasterWhisper, Infinity engines:
"hf://<repo>/<model>"
"gs://<bucket>/<path>" (only with cacheProfile)
"oss://<bucket>/<path>" (only with cacheProfile)
"s3://<bucket>/<path>" (only with cacheProfile)
For OLlama engine:
"ollama://<model>"
type: string
x-kubernetes-validations:
- message: url is immutable.
rule: self == oldSelf
- message: url must start with "hf://" or "ollama://" and not be empty.
rule: self.startsWith("hf://") || self.startsWith("ollama://")
- message: url must start with "hf://", "ollama://", "s3://", "gs://",
or "oss://" and not be empty.
rule: self.startsWith("hf://") || self.startsWith("ollama://") ||
self.startsWith("s3://") || self.startsWith("gs://") || self.startsWith("oss://")
required:
- engine
- features
Expand All @@ -155,11 +190,20 @@ spec:
- url
type: object
x-kubernetes-validations:
- message: cacheProfile is only supported with a huggingface url ("hf://...")
at the moment.
rule: '!has(self.cacheProfile) || self.url.startsWith("hf://")'
- message: cacheProfile is only supported with urls of format "hf://...",
"s3://...", "gs://...", or "oss://..." at the moment.
rule: '!has(self.cacheProfile) || self.url.startsWith("hf://") || self.url.startsWith("s3://")
|| self.url.startsWith("gs://") || self.url.startsWith("oss://")'
- message: urls of format "s3://..." only supported when using a cacheProfile
rule: '!self.url.startsWith("s3://") || has(self.cacheProfile)'
- message: urls of format "gs://..." only supported when using a cacheProfile
rule: '!self.url.startsWith("gs://") || has(self.cacheProfile)'
- message: urls of format "oss://..." only supported when using a cacheProfile
rule: '!self.url.startsWith("oss://") || has(self.cacheProfile)'
- message: minReplicas should be less than or equal to maxReplicas.
rule: '!has(self.maxReplicas) || self.minReplicas <= self.maxReplicas'
- message: adapters only supported with VLLM engine.
rule: '!has(self.adapters) || self.engine == "VLLM"'
status:
description: ModelStatus defines the observed state of Model.
properties:
Expand All @@ -184,6 +228,9 @@ spec:
type: object
type: object
type: object
x-kubernetes-validations:
- message: name must not exceed 40 characters.
rule: size(self.metadata.name) <= 40
served: true
storage: true
subresources:
Expand Down
File renamed without changes.
6 changes: 6 additions & 0 deletions charts/kubeai/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ rules:
- get
- patch
- update
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- "*"
- apiGroups:
- kubeai.org
resources:
Expand Down
Loading

0 comments on commit b12f811

Please sign in to comment.