Merge pull request #209 from DefangLabs/ollama

Ollama
DefangLabs · Sep 27, 2024 · 6b5a43e · 6b5a43e
2 parents 30e09cc + 3c9fa78
commit 6b5a43e
Show file tree

Hide file tree

Showing 27 changed files with 7,453 additions and 4 deletions.
diff --git a/samples/ollama/.devcontainer/Dockerfile b/samples/ollama/.devcontainer/Dockerfile
@@ -0,0 +1 @@
+FROM mcr.microsoft.com/devcontainers/python:3.12-bookworm
diff --git a/samples/ollama/.devcontainer/devcontainer.json b/samples/ollama/.devcontainer/devcontainer.json
@@ -0,0 +1,11 @@
+{
+  "build": {
+    "dockerfile": "Dockerfile",
+    "context": ".."
+  },
+  "features": {
+    "ghcr.io/defanglabs/devcontainer-feature/defang-cli:1.0.4": {},
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/node:1": {}
+  }
+}
diff --git a/samples/ollama/.github/workflows/deploy.yaml b/samples/ollama/.github/workflows/deploy.yaml
@@ -0,0 +1,20 @@
+name: Deploy
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      id-token: write
+
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v4
+
+    - name: Deploy
+      uses: DefangLabs/[email protected]
diff --git a/samples/ollama/README.md b/samples/ollama/README.md
@@ -0,0 +1,47 @@
+# Ollama
+
+[![1-click-deploy](https://defang.io/deploy-with-defang.png)](https://portal.defang.dev/redirect?url=https%3A%2F%2Fgithub.com%2Fnew%3Ftemplate_name%3Dsample-ollama-template%26template_owner%3DDefangSamples)
+
+This sample demonstrates how to deploy [Ollama](https://ollama.com/) with Defang, along with a Next.js frontend using the [AI SDK](https://www.npmjs.com/package/ai) for smooth streaming conversations. By default it runs a very small model (`llama3.2:1b`) which can perform well with just a CPU, but we've included lines that you can uncomment in the compose file to enable GPU support and run a larger model like `gemma:7b`. If you want to deploy to a GPU powered instance, you will need to use your own AWS account with [Defang BYOC](https://docs.defang.io/docs/concepts/defang-byoc).
+
+## Prerequisites
+
+1. Download [Defang CLI](https://github.com/DefangLabs/defang)
+2. (Optional) If you are using [Defang BYOC](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html) authenticated with your AWS account
+3. (Optional for local development) [Docker CLI](https://docs.docker.com/engine/install/)
+
+## Development
+
+To run the application locally, you can use the following command:
+
+```bash
+docker compose -f compose.dev.yaml up
+```
+
+## Deployment
+
+> [!NOTE]
+> Download [Defang CLI](https://github.com/DefangLabs/defang)
+
+### Defang Playground
+
+Deploy your application to the defang playground by opening up your terminal and typing `defang up`.
+
+**Keep in mind that the playground does not support GPU instances.**
+
+### BYOC (AWS)
+
+If you want to deploy to your own cloud account, you can use Defang BYOC:
+
+1. [Authenticate your AWS account](https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-configure.html), and that you have properly set your environment variables like `AWS_PROFILE`, `AWS_REGION`, `AWS_ACCESS_KEY_ID`, and `AWS_SECRET_ACCESS_KEY`.
+2. Run `defang up` in a terminal that has access to your AWS environment variables.
+
+---
+
+Title: Ollama
+
+Short Description: Ollama is a tool that lets you easily run large language models.
+
+Tags: AI, LLM, ML, Llama, Mistral, Next.js, AI SDK, 
+
+Languages: Typescript
diff --git a/samples/ollama/compose.dev.yaml b/samples/ollama/compose.dev.yaml
@@ -0,0 +1,15 @@
+services:
+  app:
+    extends:
+      file: compose.yaml
+      service: app
+
+  ui:
+    extends:
+      file: compose.yaml
+      service: ui
+    volumes:
+      - type: bind
+        source: ./ui
+        target: /app
+    command: ["npm", "run", "dev"]
diff --git a/samples/ollama/compose.yaml b/samples/ollama/compose.yaml
@@ -0,0 +1,52 @@
+services:
+  ollama:
+    # uncomment to add your own domain
+    # domainname: example.com
+    build:
+      context: ./ollama
+      dockerfile: Dockerfile
+      shm_size: "16gb"
+    ports:
+      - target: 8000
+        mode: host
+    deploy:
+      resources:
+        reservations:
+          cpus: '2.0'
+          memory: 8192M
+          # Uncomment the next two lines to enable GPU support, for example to use gemma:7b
+          # NOTE: this is only supported in [BYOC](https://docs.defang.io/docs/concepts/defang-byoc)
+          # devices:
+          #   - capabilities: ["gpu"]
+    # By Default we load llama3.2:1b because it can run efficiently on a CPU, but you can select
+    # a different model by setting the LOAD_MODEL environment variable. Check the [list](https://ollama.com/library)
+    # for more models. For example, to load gemma:7b, set the LOAD_MODEL environment variable to gemma:7b below.
+    environment:
+      - LOAD_MODEL=llama3.2:1b
+      # - LOAD_MODEL=gemma:7b
+    healthcheck:
+      # wget or curl required for healthchecks on services with a published port
+      # this gets parsed by Defang and provided to the load balancers as well
+      test: [ "CMD", "curl", "-s", "http://localhost:8000/" ]
+
+  ui:
+    build:
+      context: ui
+      dockerfile: Dockerfile
+    ports:
+      - mode: ingress
+        target: 3000
+        published: 3000
+    deploy:
+      resources:
+        reservations:
+          memory: 256M
+    healthcheck:
+      # wget or curl required for healthchecks on services with a published port
+      # this gets parsed by Defang and provided to the load balancers as well
+      test: [ "CMD", "curl", "-s", "http://localhost:3000/" ]
+    environment:
+      - OLLAMA_ENDPOINT=http://ollama:8000
+      # Make sure the LOAD_MODEL is the same as the app service
+      - LOAD_MODEL=llama3.2:1b
+      # - LOAD_MODEL=gemma:7b
diff --git a/samples/ollama/ollama/.dockerignore b/samples/ollama/ollama/.dockerignore
diff --git a/samples/ollama/ollama/.gitignore b/samples/ollama/ollama/.gitignore
diff --git a/samples/ollama/ollama/Dockerfile b/samples/ollama/ollama/Dockerfile
@@ -0,0 +1,21 @@
+FROM debian:bookworm-slim
+
+# install curl
+RUN apt-get update && apt-get install -y curl && apt-get clean
+
+# Define build-time arguments with default values
+ARG OLLAMA_HOST=0.0.0.0:8000
+ARG OLLAMA_MODELS=/usr/share/ollama/.ollama/models
+
+# Use the build-time arguments to set environment variables
+ENV OLLAMA_HOST=${OLLAMA_HOST}
+ENV OLLAMA_MODELS=${OLLAMA_MODELS}
+ENV LOAD_MODEL=llama3.2:1b
+
+RUN curl -fsSL https://ollama.com/install.sh | sh
+
+WORKDIR /app
+
+COPY ./run.sh ./run.sh
+
+CMD ["./run.sh"]
diff --git a/samples/ollama/ollama/run.sh b/samples/ollama/ollama/run.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+nohup ollama serve >ollama.log 2>&1 &
+time curl --retry 5 --retry-connrefused --retry-delay 0 -sf http://localhost:8000
+
+echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+echo "@ Loading model $LOAD_MODEL @"
+echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+
+# if $LOAD_MODEL is not set, crash the container
+if [ -z "$LOAD_MODEL" ]; then
+    echo "LOAD_MODEL is not set. Exiting..."
+    exit 1
+fi
+
+ollama pull "$LOAD_MODEL"
+
+tail -f ollama.log
diff --git a/samples/ollama/ui/.dockerignore b/samples/ollama/ui/.dockerignore
@@ -0,0 +1,2 @@
+node_modules
+.next
diff --git a/samples/ollama/ui/.eslintrc.json b/samples/ollama/ui/.eslintrc.json
@@ -0,0 +1,3 @@
+{
+  "extends": "next/core-web-vitals"
+}
diff --git a/samples/ollama/ui/.gitignore b/samples/ollama/ui/.gitignore
@@ -0,0 +1,36 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+.yarn/install-state.gz
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# local env files
+.env*.local
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/samples/ollama/ui/Dockerfile b/samples/ollama/ui/Dockerfile
@@ -0,0 +1,17 @@
+FROM node:bookworm-slim
+
+RUN apt-get update && apt-get install -y curl && apt-get clean
+
+WORKDIR /app
+
+COPY package.json package-lock.json ./
+
+RUN npm ci
+
+COPY . .
+
+RUN npm run build
+
+EXPOSE 3000
+
+CMD [ "npm", "run", "start" ]
diff --git a/samples/ollama/ui/next.config.mjs b/samples/ollama/ui/next.config.mjs
@@ -0,0 +1,4 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {};
+
+export default nextConfig;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		FROM mcr.microsoft.com/devcontainers/python:3.12-bookworm