Skip to content

Commit

Permalink
Replace Gzip with PGzip (#266)
Browse files Browse the repository at this point in the history
* Replace Gzip with optimized PGzip. Add concurrency option.

* Add shortened timeout for 'dc down' too.

* Add NaturalNumberZero to allow zero.

* Add test for concurrency=0

* Rename to GZIP_PARALLELISM

* Fix block size. Fix compression level. Fix CI.

* Refactor compression writer fetching. Renamed WholeNumber
  • Loading branch information
MaxJa4 authored Sep 3, 2023
1 parent 1e39ac4 commit 336c5be
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 17 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@ You can populate below template according to your requirements and use it as you

# BACKUP_COMPRESSION="gz"

# Parallelism level for "gz" (Gzip) compression.
# Defines how many blocks of data are concurrently processed.
# Higher values result in faster compression. No effect on decompression
# Default = 1. Setting this to 0 will use all available threads.

# GZIP_PARALLELISM=1

# The name of the backup file including the extension.
# Format verbs will be replaced as in `strftime`. Omitting them
# will result in the same filename for every backup run, which means previous
Expand Down
53 changes: 37 additions & 16 deletions cmd/backup/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@ package main

import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"os"
"path"
"path/filepath"
"runtime"
"strings"

"github.com/klauspost/pgzip"

"github.com/klauspost/compress/zstd"
)

func createArchive(files []string, inputFilePath, outputFilePath string, compression string) error {
func createArchive(files []string, inputFilePath, outputFilePath string, compression string, compressionConcurrency int) error {
inputFilePath = stripTrailingSlashes(inputFilePath)
inputFilePath, outputFilePath, err := makeAbsolute(inputFilePath, outputFilePath)
if err != nil {
Expand All @@ -29,7 +31,7 @@ func createArchive(files []string, inputFilePath, outputFilePath string, compres
return fmt.Errorf("createArchive: error creating output file path: %w", err)
}

if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression); err != nil {
if err := compress(files, outputFilePath, filepath.Dir(inputFilePath), compression, compressionConcurrency); err != nil {
return fmt.Errorf("createArchive: error creating archive: %w", err)
}

Expand All @@ -53,26 +55,17 @@ func makeAbsolute(inputFilePath, outputFilePath string) (string, string, error)
return inputFilePath, outputFilePath, err
}

func compress(paths []string, outFilePath, subPath string, algo string) error {
func compress(paths []string, outFilePath, subPath string, algo string, concurrency int) error {
file, err := os.Create(outFilePath)
var compressWriter io.WriteCloser
if err != nil {
return fmt.Errorf("compress: error creating out file: %w", err)
}

prefix := path.Dir(outFilePath)
switch algo {
case "gz":
compressWriter = gzip.NewWriter(file)
case "zst":
compressWriter, err = zstd.NewWriter(file)
if err != nil {
return fmt.Errorf("compress: zstd error: %w", err)
}
default:
return fmt.Errorf("compress: unsupported compression algorithm: %s", algo)
compressWriter, err := getCompressionWriter(file, algo, concurrency)
if err != nil {
return fmt.Errorf("compress: error getting compression writer: %w", err)
}

tarWriter := tar.NewWriter(compressWriter)

for _, p := range paths {
Expand All @@ -99,6 +92,34 @@ func compress(paths []string, outFilePath, subPath string, algo string) error {
return nil
}

func getCompressionWriter(file *os.File, algo string, concurrency int) (io.WriteCloser, error) {
switch algo {
case "gz":
w, err := pgzip.NewWriterLevel(file, 5)
if err != nil {
return nil, fmt.Errorf("getCompressionWriter: gzip error: %w", err)
}

if concurrency == 0 {
concurrency = runtime.GOMAXPROCS(0)
}

if err := w.SetConcurrency(1<<20, concurrency); err != nil {
return nil, fmt.Errorf("getCompressionWriter: error setting concurrency: %w", err)
}

return w, nil
case "zst":
compressWriter, err := zstd.NewWriter(file)
if err != nil {
return nil, fmt.Errorf("getCompressionWriter: zstd error: %w", err)
}
return compressWriter, nil
default:
return nil, fmt.Errorf("getCompressionWriter: unsupported compression algorithm: %s", algo)
}
}

func writeTarball(path string, tarWriter *tar.Writer, prefix string) error {
fileInfo, err := os.Lstat(path)
if err != nil {
Expand Down
21 changes: 21 additions & 0 deletions cmd/backup/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type Config struct {
AwsIamRoleEndpoint string `split_words:"true"`
AwsPartSize int64 `split_words:"true"`
BackupCompression CompressionType `split_words:"true" default:"gz"`
GzipParallelism WholeNumber `split_words:"true" default:"1"`
BackupSources string `split_words:"true" default:"/backup"`
BackupFilename string `split_words:"true" default:"backup-%Y-%m-%dT%H-%M-%S.{{ .Extension }}"`
BackupFilenameExpand bool `split_words:"true"`
Expand Down Expand Up @@ -131,6 +132,7 @@ func (r *RegexpDecoder) Decode(v string) error {
return nil
}

// NaturalNumber is a type that can be used to decode a positive, non-zero natural number
type NaturalNumber int

func (n *NaturalNumber) Decode(v string) error {
Expand All @@ -148,3 +150,22 @@ func (n *NaturalNumber) Decode(v string) error {
func (n *NaturalNumber) Int() int {
return int(*n)
}

// WholeNumber is a type that can be used to decode a positive whole number, including zero
type WholeNumber int

func (n *WholeNumber) Decode(v string) error {
asInt, err := strconv.Atoi(v)
if err != nil {
return fmt.Errorf("config: error converting %s to int", v)
}
if asInt < 0 {
return fmt.Errorf("config: expected a whole, positive number, including zero. Got %d", asInt)
}
*n = WholeNumber(asInt)
return nil
}

func (n *WholeNumber) Int() int {
return int(*n)
}
2 changes: 1 addition & 1 deletion cmd/backup/script.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,7 @@ func (s *script) createArchive() error {
return fmt.Errorf("createArchive: error walking filesystem tree: %w", err)
}

if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String()); err != nil {
if err := createArchive(filesEligibleForBackup, backupSources, tarFile, s.c.BackupCompression.String(), s.c.GzipParallelism.Int()); err != nil {
return fmt.Errorf("createArchive: error compressing backup folder: %w", err)
}

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ require (
github.com/google/uuid v1.3.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/cpuid/v2 v2.2.5 // indirect
github.com/klauspost/pgzip v1.2.6
github.com/kr/fs v0.1.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,8 @@ github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQs
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg=
github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/fs v0.1.0 h1:Jskdu9ieNAYnjxsi0LbQp1ulIKZV1LAFgK1tWhpZgl8=
Expand Down
42 changes: 42 additions & 0 deletions test/pgzip/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/sh

set -e

cd $(dirname $0)
. ../util.sh
current_test=$(basename $(pwd))

docker network create test_network
docker volume create app_data

LOCAL_DIR=$(mktemp -d)

docker run -d -q \
--name offen \
--network test_network \
-v app_data:/var/opt/offen/ \
offen/offen:latest

sleep 5

docker run --rm -q \
--network test_network \
-v app_data:/backup/app_data \
-v $LOCAL_DIR:/archive \
-v /var/run/docker.sock:/var/run/docker.sock \
--env BACKUP_COMPRESSION=gz \
--env GZIP_PARALLELISM=0 \
--env BACKUP_FILENAME='test.{{ .Extension }}' \
--entrypoint backup \
offen/docker-volume-backup:${TEST_VERSION:-canary}

tmp_dir=$(mktemp -d)
tar -xvf "$LOCAL_DIR/test.tar.gz" -C $tmp_dir
if [ ! -f "$tmp_dir/backup/app_data/offen.db" ]; then
fail "Could not find expected file in untared archive."
fi
pass "Found relevant files in untared local backup."

# This test does not stop containers during backup. This is happening on
# purpose in order to cover this setup as well.
expect_running_containers "1"
3 changes: 3 additions & 0 deletions test/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ docker() {
up)
shift
command docker compose up --timeout 3 "$@";;
down)
shift
command docker compose down --timeout 3 "$@";;
*)
command docker compose "$@";;
esac
Expand Down

0 comments on commit 336c5be

Please sign in to comment.