Improve shortcode indentation handling - hugo - [fork] hugo port for 9front
 (HTM) git clone git@git.drkhsh.at/hugo.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit d2cfaede5be420c7d8b701d97b98bc61b87e46d5
 (DIR) parent 322d19a81fedbf423a047bdf286499d2e25d14be
 (HTM) Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Sat, 28 May 2022 13:18:50 +0200
       
       Improve shortcode indentation handling
       
       * Record the leading whitespace (tabs, spaces) before the shortcode when parsing the page.
       * Apply that indentation to the rendered result of shortcodes without inner content (where the user will apply indentation).
       
       Fixes #9946
       
       Diffstat:
         M common/text/transform.go            |      14 ++++++++++++++
         M common/text/transform_test.go       |      18 ++++++++++++++++++
         M hugolib/shortcode.go                |      27 +++++++++++++++++++++++++++
         M hugolib/shortcode_test.go           |      73 +++++++++++++++++++++++++++++++
         M parser/pageparser/item.go           |      12 +++++++++++-
         M parser/pageparser/itemtype_string.… |      31 +++++++++++++++++++++++++++++--
         M parser/pageparser/pagelexer.go      |      29 ++++++++++++++++++++++++++++-
         M parser/pageparser/pageparser.go     |       5 +++++
         M parser/pageparser/pageparser_short… |       3 +++
       
       9 files changed, 208 insertions(+), 4 deletions(-)
       ---
 (DIR) diff --git a/common/text/transform.go b/common/text/transform.go
       @@ -61,3 +61,17 @@ func Puts(s string) string {
                }
                return s + "\n"
        }
       +
       +// VisitLinesAfter calls the given function for each line, including newlines, in the given string.
       +func VisitLinesAfter(s string, fn func(line string)) {
       +        high := strings.Index(s, "\n")
       +        for high != -1 {
       +                fn(s[:high+1])
       +                s = s[high+1:]
       +                high = strings.Index(s, "\n")
       +        }
       +
       +        if s != "" {
       +                fn(s)
       +        }
       +}
 (DIR) diff --git a/common/text/transform_test.go b/common/text/transform_test.go
       @@ -41,3 +41,21 @@ func TestPuts(t *testing.T) {
                c.Assert(Puts("\nA\n"), qt.Equals, "\nA\n")
                c.Assert(Puts(""), qt.Equals, "")
        }
       +
       +func TestVisitLinesAfter(t *testing.T) {
       +        const lines = `line 1
       +line 2
       +
       +line 3`
       +
       +        var collected []string
       +
       +        VisitLinesAfter(lines, func(s string) {
       +                collected = append(collected, s)
       +        })
       +
       +        c := qt.New(t)
       +
       +        c.Assert(collected, qt.DeepEquals, []string{"line 1\n", "line 2\n", "\n", "line 3"})
       +
       +}
 (DIR) diff --git a/hugolib/shortcode.go b/hugolib/shortcode.go
       @@ -170,6 +170,8 @@ type shortcode struct {
                ordinal   int
                err       error
        
       +        indentation string // indentation from source.
       +
                info   tpl.Info       // One of the output formats (arbitrary)
                templs []tpl.Template // All output formats
        
       @@ -398,6 +400,22 @@ func renderShortcode(
                        return "", false, fe
                }
        
       +        if len(sc.inner) == 0 && len(sc.indentation) > 0 {
       +                b := bp.GetBuffer()
       +                i := 0
       +                text.VisitLinesAfter(result, func(line string) {
       +                        // The first line is correctly indented.
       +                        if i > 0 {
       +                                b.WriteString(sc.indentation)
       +                        }
       +                        i++
       +                        b.WriteString(line)
       +                })
       +
       +                result = b.String()
       +                bp.PutBuffer(b)
       +        }
       +
                return result, hasVariants, err
        }
        
       @@ -447,6 +465,15 @@ func (s *shortcodeHandler) extractShortcode(ordinal, level int, pt *pageparser.I
                }
                sc := &shortcode{ordinal: ordinal}
        
       +        // Back up one to identify any indentation.
       +        if pt.Pos() > 0 {
       +                pt.Backup()
       +                item := pt.Next()
       +                if item.IsIndentation() {
       +                        sc.indentation = string(item.Val)
       +                }
       +        }
       +
                cnt := 0
                nestedOrdinal := 0
                nextLevel := level + 1
 (DIR) diff --git a/hugolib/shortcode_test.go b/hugolib/shortcode_test.go
       @@ -942,3 +942,76 @@ title: "p1"
                `)
        
        }
       +
       +func TestShortcodePreserveIndentation(t *testing.T) {
       +        t.Parallel()
       +
       +        files := `
       +-- config.toml --
       +-- content/p1.md --
       +---
       +title: "p1"
       +---
       +
       +## List With Indented Shortcodes
       +
       +1. List 1
       +    {{% mark1 %}}
       +        1. Item Mark1 1
       +        1. Item Mark1 2
       +        {{% mark2 %}}
       +        {{% /mark1 %}}
       +-- layouts/shortcodes/mark1.md --
       +{{ .Inner }}
       +-- layouts/shortcodes/mark2.md --
       +1. Item Mark2 1
       +1. Item Mark2 2
       +   1. Item Mark2 2-1
       +1. Item Mark2 3
       +-- layouts/_default/single.html --
       +{{ .Content }}
       +`
       +
       +        b := NewIntegrationTestBuilder(
       +                IntegrationTestConfig{
       +                        T:           t,
       +                        TxtarString: files,
       +                        Running:     true,
       +                },
       +        ).Build()
       +
       +        b.AssertFileContent("public/p1/index.html", "<ol>\n<li>\n<p>List 1</p>\n<ol>\n<li>Item Mark1 1</li>\n<li>Item Mark1 2</li>\n<li>Item Mark2 1</li>\n<li>Item Mark2 2\n<ol>\n<li>Item Mark2 2-1</li>\n</ol>\n</li>\n<li>Item Mark2 3</li>\n</ol>\n</li>\n</ol>")
       +
       +}
       +
       +func TestShortcodeCodeblockIndent(t *testing.T) {
       +        t.Parallel()
       +
       +        files := `
       +-- config.toml --
       +-- content/p1.md --
       +---
       +title: "p1"
       +---
       +
       +## Code block
       +
       +    {{% code %}}
       +
       +-- layouts/shortcodes/code.md --
       +echo "foo";
       +-- layouts/_default/single.html --
       +{{ .Content }}
       +`
       +
       +        b := NewIntegrationTestBuilder(
       +                IntegrationTestConfig{
       +                        T:           t,
       +                        TxtarString: files,
       +                        Running:     true,
       +                },
       +        ).Build()
       +
       +        b.AssertFileContent("public/p1/index.html", "<pre><code>echo &quot;foo&quot;;\n</code></pre>")
       +
       +}
 (DIR) diff --git a/parser/pageparser/item.go b/parser/pageparser/item.go
       @@ -18,6 +18,8 @@ import (
                "fmt"
                "regexp"
                "strconv"
       +
       +        "github.com/yuin/goldmark/util"
        )
        
        type Item struct {
       @@ -64,7 +66,11 @@ func (i Item) ValTyped() any {
        }
        
        func (i Item) IsText() bool {
       -        return i.Type == tText
       +        return i.Type == tText || i.Type == tIndentation
       +}
       +
       +func (i Item) IsIndentation() bool {
       +        return i.Type == tIndentation
        }
        
        func (i Item) IsNonWhitespace() bool {
       @@ -125,6 +131,8 @@ func (i Item) String() string {
                        return "EOF"
                case i.Type == tError:
                        return string(i.Val)
       +        case i.Type == tIndentation:
       +                return fmt.Sprintf("%s:[%s]", i.Type, util.VisualizeSpaces(i.Val))
                case i.Type > tKeywordMarker:
                        return fmt.Sprintf("<%s>", i.Val)
                case len(i.Val) > 50:
       @@ -159,6 +167,8 @@ const (
                tScParam
                tScParamVal
        
       +        tIndentation
       +
                tText // plain text
        
                // preserved for later - keywords come after this
 (DIR) diff --git a/parser/pageparser/itemtype_string.go b/parser/pageparser/itemtype_string.go
       @@ -4,9 +4,36 @@ package pageparser
        
        import "strconv"
        
       -const _ItemType_name = "tErrortEOFTypeHTMLStartTypeLeadSummaryDividerTypeFrontMatterYAMLTypeFrontMatterTOMLTypeFrontMatterJSONTypeFrontMatterORGTypeEmojiTypeIgnoretLeftDelimScNoMarkuptRightDelimScNoMarkuptLeftDelimScWithMarkuptRightDelimScWithMarkuptScClosetScNametScNameInlinetScParamtScParamValtTexttKeywordMarker"
       +func _() {
       +        // An "invalid array index" compiler error signifies that the constant values have changed.
       +        // Re-run the stringer command to generate them again.
       +        var x [1]struct{}
       +        _ = x[tError-0]
       +        _ = x[tEOF-1]
       +        _ = x[TypeLeadSummaryDivider-2]
       +        _ = x[TypeFrontMatterYAML-3]
       +        _ = x[TypeFrontMatterTOML-4]
       +        _ = x[TypeFrontMatterJSON-5]
       +        _ = x[TypeFrontMatterORG-6]
       +        _ = x[TypeEmoji-7]
       +        _ = x[TypeIgnore-8]
       +        _ = x[tLeftDelimScNoMarkup-9]
       +        _ = x[tRightDelimScNoMarkup-10]
       +        _ = x[tLeftDelimScWithMarkup-11]
       +        _ = x[tRightDelimScWithMarkup-12]
       +        _ = x[tScClose-13]
       +        _ = x[tScName-14]
       +        _ = x[tScNameInline-15]
       +        _ = x[tScParam-16]
       +        _ = x[tScParamVal-17]
       +        _ = x[tIndentation-18]
       +        _ = x[tText-19]
       +        _ = x[tKeywordMarker-20]
       +}
       +
       +const _ItemType_name = "tErrortEOFTypeLeadSummaryDividerTypeFrontMatterYAMLTypeFrontMatterTOMLTypeFrontMatterJSONTypeFrontMatterORGTypeEmojiTypeIgnoretLeftDelimScNoMarkuptRightDelimScNoMarkuptLeftDelimScWithMarkuptRightDelimScWithMarkuptScClosetScNametScNameInlinetScParamtScParamValtIndentationtTexttKeywordMarker"
        
       -var _ItemType_index = [...]uint16{0, 6, 10, 23, 45, 64, 83, 102, 120, 129, 139, 159, 180, 202, 225, 233, 240, 253, 261, 272, 277, 291}
       +var _ItemType_index = [...]uint16{0, 6, 10, 32, 51, 70, 89, 107, 116, 126, 146, 167, 189, 212, 220, 227, 240, 248, 259, 271, 276, 290}
        
        func (i ItemType) String() string {
                if i < 0 || i >= ItemType(len(_ItemType_index)-1) {
 (DIR) diff --git a/parser/pageparser/pagelexer.go b/parser/pageparser/pagelexer.go
       @@ -120,6 +120,7 @@ func (l *pageLexer) next() rune {
                runeValue, runeWidth := utf8.DecodeRune(l.input[l.pos:])
                l.width = runeWidth
                l.pos += l.width
       +
                return runeValue
        }
        
       @@ -137,8 +138,34 @@ func (l *pageLexer) backup() {
        
        // sends an item back to the client.
        func (l *pageLexer) emit(t ItemType) {
       +        defer func() {
       +                l.start = l.pos
       +        }()
       +
       +        if t == tText {
       +                // Identify any trailing whitespace/intendation.
       +                // We currently only care about the last one.
       +                for i := l.pos - 1; i >= l.start; i-- {
       +                        b := l.input[i]
       +                        if b != ' ' && b != '\t' && b != '\r' && b != '\n' {
       +                                break
       +                        }
       +                        if i == l.start && b != '\n' {
       +                                l.items = append(l.items, Item{tIndentation, l.start, l.input[l.start:l.pos], false})
       +                                return
       +                        } else if b == '\n' && i < l.pos-1 {
       +                                l.items = append(l.items, Item{t, l.start, l.input[l.start : i+1], false})
       +                                l.items = append(l.items, Item{tIndentation, i + 1, l.input[i+1 : l.pos], false})
       +                                return
       +                        } else if b == '\n' && i == l.pos-1 {
       +                                break
       +                        }
       +
       +                }
       +        }
       +
                l.items = append(l.items, Item{t, l.start, l.input[l.start:l.pos], false})
       -        l.start = l.pos
       +
        }
        
        // sends a string item back to the client.
 (DIR) diff --git a/parser/pageparser/pageparser.go b/parser/pageparser/pageparser.go
       @@ -149,6 +149,11 @@ func (t *Iterator) Backup() {
                t.lastPos--
        }
        
       +// Pos returns the current position in the input.
       +func (t *Iterator) Pos() int {
       +        return t.lastPos
       +}
       +
        // check for non-error and non-EOF types coming next
        func (t *Iterator) IsValueNext() bool {
                i := t.Peek()
 (DIR) diff --git a/parser/pageparser/pageparser_shortcode_test.go b/parser/pageparser/pageparser_shortcode_test.go
       @@ -51,6 +51,9 @@ var shortCodeLexerTests = []lexerTest{
        
                {"simple with markup", `{{% sc1 %}}`, []Item{tstLeftMD, tstSC1, tstRightMD, tstEOF}},
                {"with spaces", `{{<     sc1     >}}`, []Item{tstLeftNoMD, tstSC1, tstRightNoMD, tstEOF}},
       +        {"indented on new line", "Hello\n    {{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
       +        {"indented on new line tab", "Hello\n\t{{% sc1 %}}", []Item{nti(tText, "Hello\n"), nti(tIndentation, "\t"), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
       +        {"indented on first line", "    {{% sc1 %}}", []Item{nti(tIndentation, "    "), tstLeftMD, tstSC1, tstRightMD, tstEOF}},
                {"mismatched rightDelim", `{{< sc1 %}}`, []Item{
                        tstLeftNoMD, tstSC1,
                        nti(tError, "unrecognized character in shortcode action: U+0025 '%'. Note: Parameters with non-alphanumeric args must be quoted"),