diff --git a/charts/models/templates/models.yaml b/charts/models/templates/models.yaml index b945ac15..b57067f0 100644 --- a/charts/models/templates/models.yaml +++ b/charts/models/templates/models.yaml @@ -7,7 +7,9 @@ metadata: name: {{ $name }} spec: features: {{ $model.features }} - owner: {{ $model.owner }} + {{- with $model.owner }} + owner: {{ . }} + {{- end }} url: {{ $model.url }} {{- with $model.adapters }} adapters: diff --git a/charts/models/values.yaml b/charts/models/values.yaml index eae2da1f..f83c86f4 100644 --- a/charts/models/values.yaml +++ b/charts/models/values.yaml @@ -27,7 +27,6 @@ catalog: e5-mistral-7b-instruct-cpu: enabled: false features: ["TextEmbedding"] - owner: intfloat url: "hf://intfloat/e5-mistral-7b-instruct" engine: VLLM # TODO: Adjust - the memory associated with this request is too low. @@ -38,14 +37,12 @@ catalog: gemma2-2b-cpu: enabled: false features: ["TextGeneration"] - owner: google url: "ollama://gemma2:2b" engine: OLlama resourceProfile: cpu:2 gemma-2b-it-tpu: enabled: false features: ["TextGeneration"] - owner: google url: "hf://google/gemma-2b-it" engine: VLLM resourceProfile: google-tpu-v5e-1x1:1 @@ -54,7 +51,6 @@ catalog: # gemma2-9b-it-fp8-tpu: # enabled: false # features: ["TextGeneration"] - # owner: neuralmagic # # vLLM logs: "ValueError: fp8 quantization is currently not supported in TPU Backend." # #url: "hf://neuralmagic/gemma-2-9b-it-FP8" # engine: VLLM @@ -64,7 +60,6 @@ catalog: # gemma2-9b-it-int8-tpu: # enabled: false # features: ["TextGeneration"] - # owner: neuralmagic # # vLLM logs: "ValueError: compressed-tensors quantization is currently not supported in TPU Backend." # #url: "hf://neuralmagic/gemma-2-9b-it-quantized.w8a8" # #url: "hf://neuralmagic/gemma-2-9b-it-quantized.w8a16" @@ -76,7 +71,6 @@ catalog: llama-3.1-8b-instruct-cpu: enabled: false features: ["TextGeneration"] - owner: "meta-llama" url: "hf://meta-llama/Meta-Llama-3.1-8B-Instruct" engine: VLLM resourceProfile: cpu:6 @@ -88,7 +82,6 @@ catalog: llama-3.1-8b-instruct-tpu: enabled: false features: ["TextGeneration"] - owner: meta-llama url: "hf://meta-llama/Meta-Llama-3.1-8B-Instruct" engine: VLLM resourceProfile: google-tpu-v5e-2x2:4 @@ -120,7 +113,6 @@ catalog: llama-3.1-8b-instruct-fp8-l4: enabled: false features: ["TextGeneration"] - owner: "neuralmagic" url: "hf://neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8" engine: VLLM resourceProfile: nvidia-gpu-l4:1 @@ -242,14 +234,12 @@ catalog: nomic-embed-text-cpu: enabled: false features: ["TextEmbedding"] - owner: nomic url: "ollama://nomic-embed-text" engine: OLlama resourceProfile: cpu:1 bge-embed-text-cpu: enabled: false features: ["TextEmbedding"] - owner: baai url: "hf://BAAI/bge-small-en-v1.5" engine: Infinity resourceProfile: cpu:1 @@ -257,7 +247,6 @@ catalog: opt-125m-cpu: enabled: false features: ["TextGeneration"] - owner: facebook url: "hf://facebook/opt-125m" engine: VLLM # TODO: Adjust - the memory associated with this request is too low. @@ -265,7 +254,6 @@ catalog: opt-125m-l4: enabled: false features: ["TextGeneration"] - owner: facebook url: "hf://facebook/opt-125m" engine: VLLM resourceProfile: nvidia-gpu-l4:1 @@ -299,14 +287,12 @@ catalog: qwen2-500m-cpu: enabled: false features: ["TextGeneration"] - owner: alibaba url: "ollama://qwen2:0.5b" engine: OLlama resourceProfile: cpu:1 faster-whisper-medium-en-cpu: enabled: false features: ["SpeechToText"] - owner: Systran url: "hf://Systran/faster-whisper-medium.en" engine: FasterWhisper resourceProfile: cpu:1 diff --git a/manifests/models/bge-embed-text-cpu.yaml b/manifests/models/bge-embed-text-cpu.yaml index 34324ac8..8399543a 100644 --- a/manifests/models/bge-embed-text-cpu.yaml +++ b/manifests/models/bge-embed-text-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: bge-embed-text-cpu spec: features: [TextEmbedding] - owner: baai url: hf://BAAI/bge-small-en-v1.5 engine: Infinity resourceProfile: cpu:1 diff --git a/manifests/models/e5-mistral-7b-instruct-cpu.yaml b/manifests/models/e5-mistral-7b-instruct-cpu.yaml index 42205a84..d7098cc0 100644 --- a/manifests/models/e5-mistral-7b-instruct-cpu.yaml +++ b/manifests/models/e5-mistral-7b-instruct-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: e5-mistral-7b-instruct-cpu spec: features: [TextEmbedding] - owner: intfloat url: hf://intfloat/e5-mistral-7b-instruct engine: VLLM args: diff --git a/manifests/models/faster-whisper-medium-en-cpu.yaml b/manifests/models/faster-whisper-medium-en-cpu.yaml index 8dc61e8f..296acd37 100644 --- a/manifests/models/faster-whisper-medium-en-cpu.yaml +++ b/manifests/models/faster-whisper-medium-en-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: faster-whisper-medium-en-cpu spec: features: [SpeechToText] - owner: Systran url: hf://Systran/faster-whisper-medium.en engine: FasterWhisper resourceProfile: cpu:1 diff --git a/manifests/models/gemma-2b-it-tpu.yaml b/manifests/models/gemma-2b-it-tpu.yaml index 3a933b7e..2ac1db2c 100644 --- a/manifests/models/gemma-2b-it-tpu.yaml +++ b/manifests/models/gemma-2b-it-tpu.yaml @@ -5,7 +5,6 @@ metadata: name: gemma-2b-it-tpu spec: features: [TextGeneration] - owner: google url: hf://google/gemma-2b-it engine: VLLM args: diff --git a/manifests/models/gemma2-2b-cpu.yaml b/manifests/models/gemma2-2b-cpu.yaml index 45ce227c..17d0f706 100644 --- a/manifests/models/gemma2-2b-cpu.yaml +++ b/manifests/models/gemma2-2b-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: gemma2-2b-cpu spec: features: [TextGeneration] - owner: google url: ollama://gemma2:2b engine: OLlama resourceProfile: cpu:2 diff --git a/manifests/models/llama-3.1-405b-instruct-fp8-a100-80b.yaml b/manifests/models/llama-3.1-405b-instruct-fp8-a100-80b.yaml index da2940e1..62c4b84c 100644 --- a/manifests/models/llama-3.1-405b-instruct-fp8-a100-80b.yaml +++ b/manifests/models/llama-3.1-405b-instruct-fp8-a100-80b.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-405b-instruct-fp8-a100-80b spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-405b-instruct-fp8-h100.yaml b/manifests/models/llama-3.1-405b-instruct-fp8-h100.yaml index d2e056ae..ad4a0384 100644 --- a/manifests/models/llama-3.1-405b-instruct-fp8-h100.yaml +++ b/manifests/models/llama-3.1-405b-instruct-fp8-h100.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-405b-instruct-fp8-h100 spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Meta-Llama-3.1-405B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml index dd2ef872..5d853488 100644 --- a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml +++ b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-70b-instruct-awq-int4-gh200 spec: features: [TextGeneration] - owner: url: hf://hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4 engine: VLLM args: diff --git a/manifests/models/llama-3.1-70b-instruct-fp8-gh200.yaml b/manifests/models/llama-3.1-70b-instruct-fp8-gh200.yaml index 6e690e3a..f81e4231 100644 --- a/manifests/models/llama-3.1-70b-instruct-fp8-gh200.yaml +++ b/manifests/models/llama-3.1-70b-instruct-fp8-gh200.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-70b-instruct-fp8-gh200 spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-70b-instruct-fp8-h100.yaml b/manifests/models/llama-3.1-70b-instruct-fp8-h100.yaml index 4c308bbd..686855aa 100644 --- a/manifests/models/llama-3.1-70b-instruct-fp8-h100.yaml +++ b/manifests/models/llama-3.1-70b-instruct-fp8-h100.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-70b-instruct-fp8-h100 spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-70b-instruct-fp8-l4.yaml b/manifests/models/llama-3.1-70b-instruct-fp8-l4.yaml index b258c5ec..a45d1bbb 100644 --- a/manifests/models/llama-3.1-70b-instruct-fp8-l4.yaml +++ b/manifests/models/llama-3.1-70b-instruct-fp8-l4.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-70b-instruct-fp8-l4 spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-8b-instruct-cpu.yaml b/manifests/models/llama-3.1-8b-instruct-cpu.yaml index 0e5103dc..6ef3a6ca 100644 --- a/manifests/models/llama-3.1-8b-instruct-cpu.yaml +++ b/manifests/models/llama-3.1-8b-instruct-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-8b-instruct-cpu spec: features: [TextGeneration] - owner: meta-llama url: hf://meta-llama/Meta-Llama-3.1-8B-Instruct engine: VLLM args: diff --git a/manifests/models/llama-3.1-8b-instruct-fp8-l4.yaml b/manifests/models/llama-3.1-8b-instruct-fp8-l4.yaml index 9b5d9533..432297a6 100644 --- a/manifests/models/llama-3.1-8b-instruct-fp8-l4.yaml +++ b/manifests/models/llama-3.1-8b-instruct-fp8-l4.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-8b-instruct-fp8-l4 spec: features: [TextGeneration] - owner: neuralmagic url: hf://neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8 engine: VLLM args: diff --git a/manifests/models/llama-3.1-8b-instruct-tpu.yaml b/manifests/models/llama-3.1-8b-instruct-tpu.yaml index 9efee364..d72314fd 100644 --- a/manifests/models/llama-3.1-8b-instruct-tpu.yaml +++ b/manifests/models/llama-3.1-8b-instruct-tpu.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.1-8b-instruct-tpu spec: features: [TextGeneration] - owner: meta-llama url: hf://meta-llama/Meta-Llama-3.1-8B-Instruct engine: VLLM args: diff --git a/manifests/models/llama-3.2-11b-vision-instruct-l4.yaml b/manifests/models/llama-3.2-11b-vision-instruct-l4.yaml index 339c6986..b66251a3 100644 --- a/manifests/models/llama-3.2-11b-vision-instruct-l4.yaml +++ b/manifests/models/llama-3.2-11b-vision-instruct-l4.yaml @@ -5,7 +5,6 @@ metadata: name: llama-3.2-11b-vision-instruct-l4 spec: features: [TextGeneration] - owner: url: hf://neuralmagic/Llama-3.2-11B-Vision-Instruct-FP8-dynamic engine: VLLM args: diff --git a/manifests/models/nomic-embed-text-cpu.yaml b/manifests/models/nomic-embed-text-cpu.yaml index 3c901a4c..9d3d4b81 100644 --- a/manifests/models/nomic-embed-text-cpu.yaml +++ b/manifests/models/nomic-embed-text-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: nomic-embed-text-cpu spec: features: [TextEmbedding] - owner: nomic url: ollama://nomic-embed-text engine: OLlama resourceProfile: cpu:1 diff --git a/manifests/models/opt-125m-cpu.yaml b/manifests/models/opt-125m-cpu.yaml index 9fb87e57..e49c50f2 100644 --- a/manifests/models/opt-125m-cpu.yaml +++ b/manifests/models/opt-125m-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: opt-125m-cpu spec: features: [TextGeneration] - owner: facebook url: hf://facebook/opt-125m engine: VLLM resourceProfile: cpu:1 diff --git a/manifests/models/opt-125m-l4.yaml b/manifests/models/opt-125m-l4.yaml index d2ee5d43..fa7d6e16 100644 --- a/manifests/models/opt-125m-l4.yaml +++ b/manifests/models/opt-125m-l4.yaml @@ -5,7 +5,6 @@ metadata: name: opt-125m-l4 spec: features: [TextGeneration] - owner: facebook url: hf://facebook/opt-125m engine: VLLM resourceProfile: nvidia-gpu-l4:1 diff --git a/manifests/models/qwen2-500m-cpu.yaml b/manifests/models/qwen2-500m-cpu.yaml index ee7f7b25..fbae035c 100644 --- a/manifests/models/qwen2-500m-cpu.yaml +++ b/manifests/models/qwen2-500m-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: qwen2-500m-cpu spec: features: [TextGeneration] - owner: alibaba url: ollama://qwen2:0.5b engine: OLlama resourceProfile: cpu:1 diff --git a/manifests/models/qwen2.5-7b-cpu.yaml b/manifests/models/qwen2.5-7b-cpu.yaml index 9680a206..989809d5 100644 --- a/manifests/models/qwen2.5-7b-cpu.yaml +++ b/manifests/models/qwen2.5-7b-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: qwen2.5-7b-cpu spec: features: [TextGeneration] - owner: url: ollama://qwen2.5:7b engine: OLlama resourceProfile: cpu:2 diff --git a/manifests/models/qwen2.5-coder-1.5b-cpu.yaml b/manifests/models/qwen2.5-coder-1.5b-cpu.yaml index 2c99feb3..b1c8f2a1 100644 --- a/manifests/models/qwen2.5-coder-1.5b-cpu.yaml +++ b/manifests/models/qwen2.5-coder-1.5b-cpu.yaml @@ -5,7 +5,6 @@ metadata: name: qwen2.5-coder-1.5b-cpu spec: features: [TextGeneration] - owner: url: ollama://qwen2.5-coder:1.5b engine: OLlama resourceProfile: cpu:1 diff --git a/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml b/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml index a29989a6..6212e80d 100644 --- a/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml +++ b/manifests/models/qwen2.5-coder-1.5b-rtx4070-8gb.yaml @@ -5,7 +5,6 @@ metadata: name: qwen2.5-coder-1.5b-rtx4070-8gb spec: features: [TextGeneration] - owner: url: hf://Qwen/Qwen2.5-Coder-1.5B-Instruct engine: VLLM args: