Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/stretchr/testify v1.11.1
go.uber.org/mock v0.6.0
golang.org/x/net v0.49.0
gopkg.in/yaml.v3 v3.0.1
oras.land/oras-go/v2 v2.6.0
)

Expand All @@ -25,5 +26,4 @@ require (
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
google.golang.org/protobuf v1.36.10 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
135 changes: 135 additions & 0 deletions oci/skills/gzip.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
// SPDX-License-Identifier: Apache-2.0

package skills

import (
"bytes"
"compress/gzip"
"fmt"
"io"
"time"
)

// gzipOSUnknown is the OS value for "unknown" in gzip headers (RFC 1952).
// Using this value ensures cross-platform reproducibility.
const gzipOSUnknown = 255

// GzipOptions configures reproducible gzip compression.
type GzipOptions struct {
// Level is the compression level (defaults to gzip.BestCompression).
Level int

// Epoch is the modification time to use in the gzip header.
// If zero, uses Unix epoch (1970-01-01) for reproducibility.
Epoch time.Time
}

// DefaultGzipOptions returns default options for reproducible gzip compression.
func DefaultGzipOptions() GzipOptions {
return GzipOptions{
Level: gzip.BestCompression,
Epoch: time.Unix(0, 0).UTC(),
}
}

// Compress creates a reproducible gzip compressed byte slice.
// Headers are explicitly controlled for reproducibility:
// - ModTime: uses opts.Epoch (defaults to Unix epoch)
// - Name: empty (no filename)
// - Comment: empty
// - OS: 255 (unknown) for cross-platform consistency
func Compress(data []byte, opts GzipOptions) ([]byte, error) {
if opts.Level == 0 {
opts.Level = gzip.BestCompression
}

// Use Unix epoch if no epoch specified
epoch := opts.Epoch
if epoch.IsZero() {
epoch = time.Unix(0, 0).UTC()
}

var buf bytes.Buffer
gw, err := gzip.NewWriterLevel(&buf, opts.Level)
if err != nil {
return nil, fmt.Errorf("creating gzip writer: %w", err)
}

// Explicitly set header fields for reproducibility
gw.ModTime = epoch
gw.Name = ""
gw.Comment = ""
gw.OS = gzipOSUnknown

if _, err := gw.Write(data); err != nil {
return nil, fmt.Errorf("writing gzip data: %w", err)
}

if err := gw.Close(); err != nil {
return nil, fmt.Errorf("closing gzip writer: %w", err)
}

return buf.Bytes(), nil
}

// MaxDecompressedSize is the maximum size of decompressed data (100MB).
// This prevents decompression bombs.
const MaxDecompressedSize = 100 * 1024 * 1024

// Decompress decompresses gzip data.
func Decompress(data []byte) ([]byte, error) {
return DecompressWithLimit(data, MaxDecompressedSize)
}

// DecompressWithLimit decompresses gzip data with a size limit.
func DecompressWithLimit(data []byte, maxSize int64) ([]byte, error) {
gr, err := gzip.NewReader(bytes.NewReader(data))
if err != nil {
return nil, fmt.Errorf("creating gzip reader: %w", err)
}
defer func() { _ = gr.Close() }()

// Limit read size to prevent decompression bombs
limitedReader := io.LimitReader(gr, maxSize+1)
result, err := io.ReadAll(limitedReader)
if err != nil {
return nil, fmt.Errorf("reading gzip data: %w", err)
}

if int64(len(result)) > maxSize {
return nil, fmt.Errorf("decompressed data exceeds maximum size of %d bytes", maxSize)
}

return result, nil
}

// CompressTar creates a reproducible .tar.gz from the given files.
func CompressTar(files []FileEntry, tarOpts TarOptions, gzipOpts GzipOptions) ([]byte, error) {
tarData, err := CreateTar(files, tarOpts)
if err != nil {
return nil, fmt.Errorf("creating tar: %w", err)
}

gzipData, err := Compress(tarData, gzipOpts)
if err != nil {
return nil, fmt.Errorf("compressing tar: %w", err)
}

return gzipData, nil
}

// DecompressTar extracts files from a .tar.gz archive.
func DecompressTar(data []byte) ([]FileEntry, error) {
tarData, err := Decompress(data)
if err != nil {
return nil, fmt.Errorf("decompressing gzip: %w", err)
}

files, err := ExtractTar(tarData)
if err != nil {
return nil, fmt.Errorf("extracting tar: %w", err)
}

return files, nil
}
190 changes: 190 additions & 0 deletions oci/skills/gzip_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
// SPDX-License-Identifier: Apache-2.0

package skills

import (
"bytes"
"compress/gzip"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestCompress_Reproducible(t *testing.T) {
t.Parallel()

data := []byte("test data for compression")
opts := DefaultGzipOptions()

gz1, err := Compress(data, opts)
require.NoError(t, err)

gz2, err := Compress(data, opts)
require.NoError(t, err)

assert.Equal(t, gz1, gz2, "Compress should produce identical output for same input")
}

func TestCompress_HeaderFieldsForReproducibility(t *testing.T) {
t.Parallel()

data := []byte("test data")
epoch := time.Unix(1234567890, 0).UTC()
opts := GzipOptions{
Level: gzip.BestCompression,
Epoch: epoch,
}

compressed, err := Compress(data, opts)
require.NoError(t, err)

gr, err := gzip.NewReader(bytes.NewReader(compressed))
require.NoError(t, err)
defer gr.Close()

assert.True(t, gr.ModTime.Equal(epoch), "ModTime should match epoch")
assert.Empty(t, gr.Name, "Name should be empty")
assert.Empty(t, gr.Comment, "Comment should be empty")
assert.Equal(t, byte(gzipOSUnknown), gr.OS, "OS should be 255 (unknown)")
}

func TestCompress_DifferentEpochs(t *testing.T) {
t.Parallel()

data := []byte("test data")

tests := []struct {
name string
epoch1 time.Time
epoch2 time.Time
wantEqual bool
}{
{
name: "same epoch produces same output",
epoch1: time.Unix(1609459200, 0).UTC(),
epoch2: time.Unix(1609459200, 0).UTC(),
wantEqual: true,
},
{
name: "different epochs produce different output",
epoch1: time.Unix(0, 0).UTC(),
epoch2: time.Unix(1000000, 0).UTC(),
wantEqual: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

opts1 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch1}
opts2 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch2}

gz1, err := Compress(data, opts1)
require.NoError(t, err)

gz2, err := Compress(data, opts2)
require.NoError(t, err)

if tt.wantEqual {
assert.Equal(t, gz1, gz2)
} else {
assert.NotEqual(t, gz1, gz2)
}
})
}
}

func TestCompress_SameEpochAlwaysReproducible(t *testing.T) {
t.Parallel()

data := []byte("test data for reproducibility check")
epoch := time.Unix(1609459200, 0).UTC()
opts := GzipOptions{Level: gzip.BestCompression, Epoch: epoch}

results := make([][]byte, 5)
for i := range results {
var err error
results[i], err = Compress(data, opts)
require.NoError(t, err)
}

for i := 1; i < len(results); i++ {
assert.Equal(t, results[0], results[i], "iteration %d should match", i)
}
}

func TestCompressDecompress_RoundTrip(t *testing.T) {
t.Parallel()

original := []byte("test data for round trip")
opts := DefaultGzipOptions()

compressed, err := Compress(original, opts)
require.NoError(t, err)

decompressed, err := Decompress(compressed)
require.NoError(t, err)

assert.Equal(t, original, decompressed)
}

func TestDecompressWithLimit_RejectsOversized(t *testing.T) {
t.Parallel()

// Create compressed data that exceeds the limit when decompressed
data := bytes.Repeat([]byte("x"), 1024)
compressed, err := Compress(data, DefaultGzipOptions())
require.NoError(t, err)

_, err = DecompressWithLimit(compressed, 100)
assert.Error(t, err)
assert.Contains(t, err.Error(), "exceeds maximum size")
}

func TestCompressTar_Reproducible(t *testing.T) {
t.Parallel()

files := []FileEntry{
{Path: "b.txt", Content: []byte("content b")},
{Path: "a.txt", Content: []byte("content a")},
}

tarOpts := DefaultTarOptions()
gzipOpts := DefaultGzipOptions()

gz1, err := CompressTar(files, tarOpts, gzipOpts)
require.NoError(t, err)

gz2, err := CompressTar(files, tarOpts, gzipOpts)
require.NoError(t, err)

assert.Equal(t, gz1, gz2, "CompressTar should produce identical output")
}

func TestCompressTar_RoundTrip(t *testing.T) {
t.Parallel()

originalFiles := []FileEntry{
{Path: "a.txt", Content: []byte("content a")},
{Path: "dir/b.txt", Content: []byte("content b")},
}

tarOpts := DefaultTarOptions()
gzipOpts := DefaultGzipOptions()

compressed, err := CompressTar(originalFiles, tarOpts, gzipOpts)
require.NoError(t, err)

extractedFiles, err := DecompressTar(compressed)
require.NoError(t, err)

require.Len(t, extractedFiles, len(originalFiles))
for i, f := range extractedFiles {
assert.Equal(t, originalFiles[i].Path, f.Path)
assert.Equal(t, originalFiles[i].Content, f.Content)
}
}
Loading
Loading