helpers: Consolidate MakeSegment vs MakePathSanitized - hugo - [fork] hugo port for 9front
 (HTM) git clone git@git.drkhsh.at/hugo.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit e421696d02bfb8764ae57238e211ce0e85e9782e
 (DIR) parent 4b4af2c52e658d516dd4bfaf59fef4f054dabec3
 (HTM) Author: Bjørn Erik Pedersen <bjorn.erik.pedersen@gmail.com>
       Date:   Wed,  3 Oct 2018 10:14:45 +0200
       
       helpers: Consolidate MakeSegment vs MakePathSanitized
       
       In short:
       
       * Avoid double tolower in MakeSegment
       * Use MakePathSanitized for taxonomies in pageToPermalinkTitle; this matches what MakeSegment does.
       * Move the "double hyphen and space" logic into UnicodeSanitize
       
       The last bullet may be slightly breaking for some that now does not get the "--" in some URLs, but we need to reduce the amount of URL logic.
       
       See #4926
       
       Diffstat:
         M helpers/path.go                     |      38 ++++++++++++-------------------
         M helpers/path_test.go                |       3 ++-
         M hugolib/permalinks.go               |       4 ++--
       
       3 files changed, 19 insertions(+), 26 deletions(-)
       ---
 (DIR) diff --git a/helpers/path.go b/helpers/path.go
       @@ -81,26 +81,8 @@ var segmentReplacer = strings.NewReplacer("/", "-", "#", "-")
        // segment.  MakeSegment is similar to MakePath but disallows the '/' and
        // '#' characters because of their reserved meaning in URIs.
        func (p *PathSpec) MakeSegment(s string) string {
       -        s = p.MakePathSanitized(strings.Trim(segmentReplacer.Replace(s), "- "))
       +        return p.MakePathSanitized(segmentReplacer.Replace(s))
        
       -        var pos int
       -        var last byte
       -        b := make([]byte, len(s))
       -
       -        for i := 0; i < len(s); i++ {
       -                // consolidate dashes
       -                if s[i] == '-' && last == '-' {
       -                        continue
       -                }
       -
       -                b[pos], last = s[i], s[i]
       -                pos++
       -        }
       -
       -        if p.DisablePathToLower {
       -                return string(b[:pos])
       -        }
       -        return strings.ToLower(string(b[:pos]))
        }
        
        // MakePath takes a string with any characters and replace it
       @@ -109,7 +91,7 @@ func (p *PathSpec) MakeSegment(s string) string {
        // whilst preserving the original casing of the string.
        // E.g. Social Media -> Social-Media
        func (p *PathSpec) MakePath(s string) string {
       -        return p.UnicodeSanitize(strings.Replace(strings.TrimSpace(s), " ", "-", -1))
       +        return p.UnicodeSanitize(s)
        }
        
        // MakePathSanitized creates a Unicode-sanitized string, with the spaces replaced
       @@ -148,15 +130,25 @@ func ishex(c rune) bool {
        // a predefined set of special Unicode characters.
        // If RemovePathAccents configuration flag is enabled, Uniccode accents
        // are also removed.
       +// Spaces will be replaced with a single hyphen, and sequential hyphens will be reduced to one.
        func (p *PathSpec) UnicodeSanitize(s string) string {
                source := []rune(s)
                target := make([]rune, 0, len(source))
       +        var prependHyphen bool
        
                for i, r := range source {
       -                if r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]) {
       -                        target = append(target, r)
       -                } else if unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) || r == '.' || r == '/' || r == '\\' || r == '_' || r == '-' || r == '#' || r == '+' || r == '~' {
       +                isAllowed := r == '.' || r == '/' || r == '\\' || r == '_' || r == '#' || r == '+' || r == '~'
       +                isAllowed = isAllowed || unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r)
       +                isAllowed = isAllowed || (r == '%' && i+2 < len(source) && ishex(source[i+1]) && ishex(source[i+2]))
       +
       +                if isAllowed {
       +                        if prependHyphen {
       +                                target = append(target, '-')
       +                                prependHyphen = false
       +                        }
                                target = append(target, r)
       +                } else if len(target) > 0 && (r == '-' || unicode.IsSpace(r)) {
       +                        prependHyphen = true
                        }
                }
        
 (DIR) diff --git a/helpers/path_test.go b/helpers/path_test.go
       @@ -51,7 +51,8 @@ func TestMakeSegment(t *testing.T) {
                        {"Your #1 Fan", "your-1-fan"},
                        {"Red & Blue", "red-blue"},
                        {"double//slash", "double-slash"},
       -                {"My // Taxonomy", "my-taxonomy"},
       +                {"triple///slash", "triple-slash"},
       +                {"-my/way-", "my-way"},
                }
        
                for _, test := range tests {
 (DIR) diff --git a/hugolib/permalinks.go b/hugolib/permalinks.go
       @@ -152,10 +152,10 @@ func pageToPermalinkDate(p *Page, dateField string) (string, error) {
        
        // pageToPermalinkTitle returns the URL-safe form of the title
        func pageToPermalinkTitle(p *Page, _ string) (string, error) {
       -        if p.Kind == "taxonomy" {
       +        if p.Kind == KindTaxonomy {
                        // Taxonomies are allowed to have '/' characters, so don't normalize
                        // them with MakeSegment.
       -                return p.s.PathSpec.URLize(p.title), nil
       +                return p.s.PathSpec.MakePathSanitized(p.title), nil
                }
        
                return p.s.PathSpec.MakeSegment(p.title), nil