Replace the MD5 hashing of images with xxHash - hugo - [fork] hugo port for 9front
 (HTM) git clone git@git.drkhsh.at/hugo.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit d5eda13cb2e57998210b66e080dc96e95b38e5f0
 (DIR) parent 8b5d796989cf0798ee61003159ba8b332675bdf2
 (HTM) Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Tue, 30 Jul 2024 12:52:54 +0200
       
       Replace the MD5 hashing of images with xxHash
       
       Note that we only use this for change detection.
       
       The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective:
       
       ```
       name          old time/op    new time/op    delta
       HashImage-10    9.45µs ±21%   10.89µs ± 1%     ~     (p=0.343 n=4+4)
       
       name          old alloc/op   new alloc/op   delta
       HashImage-10      144B ± 0%        8B ± 0%  -94.44%  (p=0.029 n=4+4)
       
       name          old allocs/op  new allocs/op  delta
       HashImage-10      4.00 ± 0%      1.00 ± 0%  -75.00%  (p=0.029 n=4+4)
       ```
       
       Diffstat:
         A common/hashing/hashing.go           |      86 ++++++++++++++++++++++++++++++
         A common/hashing/hashing_test.go      |      79 +++++++++++++++++++++++++++++++
         M helpers/general.go                  |      13 +++----------
         M resources/image.go                  |       2 +-
         M resources/resource.go               |      13 +++++++------
         M tpl/hash/hash.go                    |      12 ++----------
         M tpl/tplimpl/template_ast_transform… |       4 ++--
       
       7 files changed, 180 insertions(+), 29 deletions(-)
       ---
 (DIR) diff --git a/common/hashing/hashing.go b/common/hashing/hashing.go
       @@ -0,0 +1,86 @@
       +// Copyright 2024 The Hugo Authors. All rights reserved.
       +//
       +// Licensed under the Apache License, Version 2.0 (the "License");
       +// you may not use this file except in compliance with the License.
       +// You may obtain a copy of the License at
       +// http://www.apache.org/licenses/LICENSE-2.0
       +//
       +// Unless required by applicable law or agreed to in writing, software
       +// distributed under the License is distributed on an "AS IS" BASIS,
       +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       +// See the License for the specific language governing permissions and
       +// limitations under the License.
       +
       +// Package hashing provides common hashing utilities.
       +package hashing
       +
       +import (
       +        "encoding/hex"
       +        "io"
       +        "sync"
       +
       +        "github.com/cespare/xxhash/v2"
       +)
       +
       +// XXHashFromReader calculates the xxHash for the given reader.
       +func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) {
       +        h := getXxHashReadFrom()
       +        defer putXxHashReadFrom(h)
       +
       +        size, err := io.Copy(h, r)
       +        if err != nil {
       +                return 0, 0, err
       +        }
       +        return h.Sum64(), size, nil
       +}
       +
       +// XXHashFromString calculates the xxHash for the given string.
       +func XXHashFromString(s string) (uint64, error) {
       +        h := xxhash.New()
       +        h.WriteString(s)
       +        return h.Sum64(), nil
       +}
       +
       +// XxHashFromStringHexEncoded calculates the xxHash for the given string
       +// and returns the hash as a hex encoded string.
       +func XxHashFromStringHexEncoded(f string) string {
       +        h := xxhash.New()
       +        h.WriteString(f)
       +        hash := h.Sum(nil)
       +        return hex.EncodeToString(hash)
       +}
       +
       +type xxhashReadFrom struct {
       +        buff []byte
       +        *xxhash.Digest
       +}
       +
       +func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) {
       +        for {
       +                n, err := r.Read(x.buff)
       +                if n > 0 {
       +                        x.Digest.Write(x.buff[:n])
       +                }
       +                if err != nil {
       +                        if err == io.EOF {
       +                                err = nil
       +                        }
       +                        return int64(n), err
       +                }
       +        }
       +}
       +
       +var xXhashReadFromPool = sync.Pool{
       +        New: func() any {
       +                return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)}
       +        },
       +}
       +
       +func getXxHashReadFrom() *xxhashReadFrom {
       +        return xXhashReadFromPool.Get().(*xxhashReadFrom)
       +}
       +
       +func putXxHashReadFrom(h *xxhashReadFrom) {
       +        h.Reset()
       +        xXhashReadFromPool.Put(h)
       +}
 (DIR) diff --git a/common/hashing/hashing_test.go b/common/hashing/hashing_test.go
       @@ -0,0 +1,79 @@
       +// Copyright 2024 The Hugo Authors. All rights reserved.
       +//
       +// Licensed under the Apache License, Version 2.0 (the "License");
       +// you may not use this file except in compliance with the License.
       +// You may obtain a copy of the License at
       +// http://www.apache.org/licenses/LICENSE-2.0
       +//
       +// Unless required by applicable law or agreed to in writing, software
       +// distributed under the License is distributed on an "AS IS" BASIS,
       +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       +// See the License for the specific language governing permissions and
       +// limitations under the License.
       +
       +package hashing
       +
       +import (
       +        "strings"
       +        "testing"
       +
       +        "github.com/cespare/xxhash/v2"
       +        qt "github.com/frankban/quicktest"
       +)
       +
       +func TestXxHashFromReader(t *testing.T) {
       +        c := qt.New(t)
       +        s := "Hello World"
       +        r := strings.NewReader(s)
       +        got, size, err := XXHashFromReader(r)
       +        c.Assert(err, qt.IsNil)
       +        c.Assert(size, qt.Equals, int64(len(s)))
       +        c.Assert(got, qt.Equals, uint64(7148569436472236994))
       +}
       +
       +func TestXxHashFromString(t *testing.T) {
       +        c := qt.New(t)
       +        s := "Hello World"
       +        got, err := XXHashFromString(s)
       +        c.Assert(err, qt.IsNil)
       +        c.Assert(got, qt.Equals, uint64(7148569436472236994))
       +}
       +
       +func TestXxHashFromStringHexEncoded(t *testing.T) {
       +        c := qt.New(t)
       +        s := "The quick brown fox jumps over the lazy dog"
       +        got := XxHashFromStringHexEncoded(s)
       +        // Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog
       +        c.Assert(got, qt.Equals, "0b242d361fda71bc")
       +}
       +
       +func BenchmarkXXHashFromReader(b *testing.B) {
       +        r := strings.NewReader("Hello World")
       +        b.ResetTimer()
       +        for i := 0; i < b.N; i++ {
       +                XXHashFromReader(r)
       +                r.Seek(0, 0)
       +        }
       +}
       +
       +func BenchmarkXXHashFromString(b *testing.B) {
       +        s := "Hello World"
       +        b.ResetTimer()
       +        for i := 0; i < b.N; i++ {
       +                XXHashFromString(s)
       +        }
       +}
       +
       +func BenchmarkXXHashFromStringHexEncoded(b *testing.B) {
       +        s := "The quick brown fox jumps over the lazy dog"
       +        b.ResetTimer()
       +        for i := 0; i < b.N; i++ {
       +                XxHashFromStringHexEncoded(s)
       +        }
       +}
       +
       +func xxHashFromString(f string) uint64 {
       +        h := xxhash.New()
       +        h.WriteString(f)
       +        return h.Sum64()
       +}
 (DIR) diff --git a/helpers/general.go b/helpers/general.go
       @@ -27,12 +27,11 @@ import (
                "unicode"
                "unicode/utf8"
        
       -        "github.com/cespare/xxhash/v2"
       +        bp "github.com/gohugoio/hugo/bufferpool"
       +
                "github.com/spf13/afero"
        
                "github.com/jdkato/prose/transform"
       -
       -        bp "github.com/gohugoio/hugo/bufferpool"
        )
        
        // FilePathSeparator as defined by os.Separator.
       @@ -258,13 +257,7 @@ func SliceToLower(s []string) []string {
                return l
        }
        
       -// XxHashString takes a string and returns its xxHash hash.
       -func XxHashString(f string) string {
       -        h := xxhash.New()
       -        h.WriteString(f)
       -        hash := h.Sum(nil)
       -        return hex.EncodeToString(hash)
       -}
       +// XXHashFromReader creates a xxHash hash from the given reader.
        
        // MD5String takes a string and returns its MD5 hash.
        func MD5String(f string) string {
 (DIR) diff --git a/resources/image.go b/resources/image.go
       @@ -493,7 +493,7 @@ func (i *imageResource) relTargetPathFromConfig(conf images.ImageConfig) interna
                }
        
                h := i.hash()
       -        idStr := fmt.Sprintf("_hu%s_%d", h, i.size())
       +        idStr := fmt.Sprintf("_hu%d_%d", h, i.size())
        
                // Do not change for no good reason.
                const md5Threshold = 100
 (DIR) diff --git a/resources/resource.go b/resources/resource.go
       @@ -26,6 +26,7 @@ import (
                "github.com/gohugoio/hugo/identity"
                "github.com/gohugoio/hugo/resources/internal"
        
       +        "github.com/gohugoio/hugo/common/hashing"
                "github.com/gohugoio/hugo/common/herrors"
                "github.com/gohugoio/hugo/common/paths"
        
       @@ -307,7 +308,7 @@ type fileInfo interface {
        }
        
        type hashProvider interface {
       -        hash() string
       +        hash() uint64
        }
        
        var _ resource.StaleInfo = (*StaleValue[any])(nil)
       @@ -403,7 +404,7 @@ func (l *genericResource) size() int64 {
                return l.h.size
        }
        
       -func (l *genericResource) hash() string {
       +func (l *genericResource) hash() uint64 {
                if err := l.h.init(l); err != nil {
                        panic(err)
                }
       @@ -628,7 +629,7 @@ type targetPather interface {
        }
        
        type resourceHash struct {
       -        value    string
       +        value    uint64
                size     int64
                initOnce sync.Once
        }
       @@ -636,7 +637,7 @@ type resourceHash struct {
        func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
                var initErr error
                r.initOnce.Do(func() {
       -                var hash string
       +                var hash uint64
                        var size int64
                        f, err := l.ReadSeekCloser()
                        if err != nil {
       @@ -656,6 +657,6 @@ func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error {
                return initErr
        }
        
       -func hashImage(r io.ReadSeeker) (string, int64, error) {
       -        return helpers.MD5FromReaderFast(r)
       +func hashImage(r io.ReadSeeker) (uint64, int64, error) {
       +        return hashing.XXHashFromReader(r)
        }
 (DIR) diff --git a/tpl/hash/hash.go b/tpl/hash/hash.go
       @@ -16,10 +16,9 @@ package hash
        
        import (
                "context"
       -        "encoding/hex"
                "hash/fnv"
        
       -        "github.com/cespare/xxhash/v2"
       +        "github.com/gohugoio/hugo/common/hashing"
                "github.com/gohugoio/hugo/deps"
                "github.com/gohugoio/hugo/tpl/internal"
                "github.com/spf13/cast"
       @@ -51,14 +50,7 @@ func (ns *Namespace) XxHash(v any) (string, error) {
                        return "", err
                }
        
       -        hasher := xxhash.New()
       -
       -        _, err = hasher.WriteString(conv)
       -        if err != nil {
       -                return "", err
       -        }
       -        hash := hasher.Sum(nil)
       -        return hex.EncodeToString(hash), nil
       +        return hashing.XxHashFromStringHexEncoded(conv), nil
        }
        
        const name = "hash"
 (DIR) diff --git a/tpl/tplimpl/template_ast_transformers.go b/tpl/tplimpl/template_ast_transformers.go
       @@ -18,12 +18,12 @@ import (
                "fmt"
                "strings"
        
       -        "github.com/gohugoio/hugo/helpers"
                htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate"
                texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate"
        
                "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse"
        
       +        "github.com/gohugoio/hugo/common/hashing"
                "github.com/gohugoio/hugo/common/maps"
                "github.com/gohugoio/hugo/tpl"
                "github.com/mitchellh/mapstructure"
       @@ -254,7 +254,7 @@ func (c *templateContext) handleDefer(withNode *parse.WithNode) {
                        c.err = errors.New("resources.PostProcess cannot be used in a deferred template")
                        return
                }
       -        innerHash := helpers.XxHashString(s)
       +        innerHash := hashing.XxHashFromStringHexEncoded(s)
                deferredID := tpl.HugoDeferredTemplatePrefix + innerHash
        
                c.deferNodes[deferredID] = inner