// Copyright 2023 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Patterns for ServeMux routing. package http import ( "errors" "fmt" "net/url" "strings" "unicode" ) // A pattern is something that can be matched against an HTTP request. // It has an optional method, an optional host, and a path. type pattern struct { str string // original string method string host string // The representation of a path differs from the surface syntax, which // simplifies most algorithms. // // Paths ending in '/' are represented with an anonymous "..." wildcard. // For example, the path "a/" is represented as a literal segment "a" followed // by a segment with multi==true. // // Paths ending in "{$}" are represented with the literal segment "/". // For example, the path "a/{$}" is represented as a literal segment "a" followed // by a literal segment "/". segments []segment loc string // source location of registering call, for helpful messages } func (p *pattern) String() string { return p.str } func (p *pattern) lastSegment() segment { return p.segments[len(p.segments)-1] } // A segment is a pattern piece that matches one or more path segments, or // a trailing slash. // // If wild is false, it matches a literal segment, or, if s == "/", a trailing slash. // Examples: // // "a" => segment{s: "a"} // "/{$}" => segment{s: "/"} // // If wild is true and multi is false, it matches a single path segment. // Example: // // "{x}" => segment{s: "x", wild: true} // // If both wild and multi are true, it matches all remaining path segments. // Example: // // "{rest...}" => segment{s: "rest", wild: true, multi: true} type segment struct { s string // literal or wildcard name or "/" for "/{$}". wild bool multi bool // "..." wildcard } // parsePattern parses a string into a Pattern. // The string's syntax is // // [METHOD] [HOST]/[PATH] // // where: // - METHOD is an HTTP method // - HOST is a hostname // - PATH consists of slash-separated segments, where each segment is either // a literal or a wildcard of the form "{name}", "{name...}", or "{$}". // // METHOD, HOST and PATH are all optional; that is, the string can be "/". // If METHOD is present, it must be followed by a single space. // Wildcard names must be valid Go identifiers. // The "{$}" and "{name...}" wildcard must occur at the end of PATH. // PATH may end with a '/'. // Wildcard names in a path must be distinct. func parsePattern(s string) (_ *pattern, err error) { if len(s) == 0 { return nil, errors.New("empty pattern") } off := 0 // offset into string defer func() { if err != nil { err = fmt.Errorf("at offset %d: %w", off, err) } }() method, rest, found := strings.Cut(s, " ") if !found { rest = method method = "" } if method != "" && !validMethod(method) { return nil, fmt.Errorf("invalid method %q", method) } p := &pattern{str: s, method: method} if found { off = len(method) + 1 } i := strings.IndexByte(rest, '/') if i < 0 { return nil, errors.New("host/path missing /") } p.host = rest[:i] rest = rest[i:] if j := strings.IndexByte(p.host, '{'); j >= 0 { off += j return nil, errors.New("host contains '{' (missing initial '/'?)") } // At this point, rest is the path. off += i // An unclean path with a method that is not CONNECT can never match, // because paths are cleaned before matching. if method != "" && method != "CONNECT" && rest != cleanPath(rest) { return nil, errors.New("non-CONNECT pattern with unclean path can never match") } seenNames := map[string]bool{} // remember wildcard names to catch dups for len(rest) > 0 { // Invariant: rest[0] == '/'. rest = rest[1:] off = len(s) - len(rest) if len(rest) == 0 { // Trailing slash. p.segments = append(p.segments, segment{wild: true, multi: true}) break } i := strings.IndexByte(rest, '/') if i < 0 { i = len(rest) } var seg string seg, rest = rest[:i], rest[i:] if i := strings.IndexByte(seg, '{'); i < 0 { // Literal. seg = pathUnescape(seg) p.segments = append(p.segments, segment{s: seg}) } else { // Wildcard. if i != 0 { return nil, errors.New("bad wildcard segment (must start with '{')") } if seg[len(seg)-1] != '}' { return nil, errors.New("bad wildcard segment (must end with '}')") } name := seg[1 : len(seg)-1] if name == "$" { if len(rest) != 0 { return nil, errors.New("{$} not at end") } p.segments = append(p.segments, segment{s: "/"}) break } name, multi := strings.CutSuffix(name, "...") if multi && len(rest) != 0 { return nil, errors.New("{...} wildcard not at end") } if name == "" { return nil, errors.New("empty wildcard") } if !isValidWildcardName(name) { return nil, fmt.Errorf("bad wildcard name %q", name) } if seenNames[name] { return nil, fmt.Errorf("duplicate wildcard name %q", name) } seenNames[name] = true p.segments = append(p.segments, segment{s: name, wild: true, multi: multi}) } } return p, nil } func isValidWildcardName(s string) bool { if s == "" { return false } // Valid Go identifier. for i, c := range s { if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) { return false } } return true } func pathUnescape(path string) string { u, err := url.PathUnescape(path) if err != nil { // Invalidly escaped path; use the original return path } return u } // relationship is a relationship between two patterns, p1 and p2. type relationship string const ( equivalent relationship = "equivalent" // both match the same requests moreGeneral relationship = "moreGeneral" // p1 matches everything p2 does & more moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more disjoint relationship = "disjoint" // there is no request that both match overlaps relationship = "overlaps" // there is a request that both match, but neither is more specific ) // conflictsWith reports whether p1 conflicts with p2, that is, whether // there is a request that both match but where neither is higher precedence // than the other. // // Precedence is defined by two rules: // 1. Patterns with a host win over patterns without a host. // 2. Patterns whose method and path is more specific win. One pattern is more // specific than another if the second matches all the (method, path) pairs // of the first and more. // // If rule 1 doesn't apply, then two patterns conflict if their relationship // is either equivalence (they match the same set of requests) or overlap // (they both match some requests, but neither is more specific than the other). func (p1 *pattern) conflictsWith(p2 *pattern) bool { if p1.host != p2.host { // Either one host is empty and the other isn't, in which case the // one with the host wins by rule 1, or neither host is empty // and they differ, so they won't match the same paths. return false } rel := p1.comparePathsAndMethods(p2) return rel == equivalent || rel == overlaps } func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship { mrel := p1.compareMethods(p2) // Optimization: avoid a call to comparePaths. if mrel == disjoint { return disjoint } prel := p1.comparePaths(p2) return combineRelationships(mrel, prel) } // compareMethods determines the relationship between the method // part of patterns p1 and p2. // // A method can either be empty, "GET", or something else. // The empty string matches any method, so it is the most general. // "GET" matches both GET and HEAD. // Anything else matches only itself. func (p1 *pattern) compareMethods(p2 *pattern) relationship { if p1.method == p2.method { return equivalent } if p1.method == "" { // p1 matches any method, but p2 does not, so p1 is more general. return moreGeneral } if p2.method == "" { return moreSpecific } if p1.method == "GET" && p2.method == "HEAD" { // p1 matches GET and HEAD; p2 matches only HEAD. return moreGeneral } if p2.method == "GET" && p1.method == "HEAD" { return moreSpecific } return disjoint } // comparePaths determines the relationship between the path // part of two patterns. func (p1 *pattern) comparePaths(p2 *pattern) relationship { // Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it // can only match paths with the same number of segments. if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi { return disjoint } // Consider corresponding segments in the two path patterns. var segs1, segs2 []segment rel := equivalent for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0])) if rel == disjoint { return rel } } // We've reached the end of the corresponding segments of the patterns. // If they have the same number of segments, then we've already determined // their relationship. if len(segs1) == 0 && len(segs2) == 0 { return rel } // Otherwise, the only way they could fail to be disjoint is if the shorter // pattern ends in a multi. In that case, that multi is more general // than the remainder of the longer pattern, so combine those two relationships. if len(segs1) < len(segs2) && p1.lastSegment().multi { return combineRelationships(rel, moreGeneral) } if len(segs2) < len(segs1) && p2.lastSegment().multi { return combineRelationships(rel, moreSpecific) } return disjoint } // compareSegments determines the relationship between two segments. func compareSegments(s1, s2 segment) relationship { if s1.multi && s2.multi { return equivalent } if s1.multi { return moreGeneral } if s2.multi { return moreSpecific } if s1.wild && s2.wild { return equivalent } if s1.wild { if s2.s == "/" { // A single wildcard doesn't match a trailing slash. return disjoint } return moreGeneral } if s2.wild { if s1.s == "/" { return disjoint } return moreSpecific } // Both literals. if s1.s == s2.s { return equivalent } return disjoint } // combineRelationships determines the overall relationship of two patterns // given the relationships of a partition of the patterns into two parts. // // For example, if p1 is more general than p2 in one way but equivalent // in the other, then it is more general overall. // // Or if p1 is more general in one way and more specific in the other, then // they overlap. func combineRelationships(r1, r2 relationship) relationship { switch r1 { case equivalent: return r2 case disjoint: return disjoint case overlaps: if r2 == disjoint { return disjoint } return overlaps case moreGeneral, moreSpecific: switch r2 { case equivalent: return r1 case inverseRelationship(r1): return overlaps default: return r2 } default: panic(fmt.Sprintf("unknown relationship %q", r1)) } } // If p1 has relationship `r` to p2, then // p2 has inverseRelationship(r) to p1. func inverseRelationship(r relationship) relationship { switch r { case moreSpecific: return moreGeneral case moreGeneral: return moreSpecific default: return r } } // isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard. func isLitOrSingle(seg segment) bool { if seg.wild { return !seg.multi } return seg.s != "/" } // describeConflict returns an explanation of why two patterns conflict. func describeConflict(p1, p2 *pattern) string { mrel := p1.compareMethods(p2) prel := p1.comparePaths(p2) rel := combineRelationships(mrel, prel) if rel == equivalent { return fmt.Sprintf("%s matches the same requests as %s", p1, p2) } if rel != overlaps { panic("describeConflict called with non-conflicting patterns") } if prel == overlaps { return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q. But neither is more specific than the other. %[1]s matches %[4]q, but %[2]s doesn't. %[2]s matches %[5]q, but %[1]s doesn't.`, p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1)) } if mrel == moreGeneral && prel == moreSpecific { return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2) } if mrel == moreSpecific && prel == moreGeneral { return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2) } return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel) } // writeMatchingPath writes to b a path that matches the segments. func writeMatchingPath(b *strings.Builder, segs []segment) { for _, s := range segs { writeSegment(b, s) } } func writeSegment(b *strings.Builder, s segment) { b.WriteByte('/') if !s.multi && s.s != "/" { b.WriteString(s.s) } } // commonPath returns a path that both p1 and p2 match. // It assumes there is such a path. func commonPath(p1, p2 *pattern) string { var b strings.Builder var segs1, segs2 []segment for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { if s1 := segs1[0]; s1.wild { writeSegment(&b, segs2[0]) } else { writeSegment(&b, s1) } } if len(segs1) > 0 { writeMatchingPath(&b, segs1) } else if len(segs2) > 0 { writeMatchingPath(&b, segs2) } return b.String() } // differencePath returns a path that p1 matches and p2 doesn't. // It assumes there is such a path. func differencePath(p1, p2 *pattern) string { var b strings.Builder var segs1, segs2 []segment for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { s1 := segs1[0] s2 := segs2[0] if s1.multi && s2.multi { // From here the patterns match the same paths, so we must have found a difference earlier. b.WriteByte('/') return b.String() } if s1.multi && !s2.multi { // s1 ends in a "..." wildcard but s2 does not. // A trailing slash will distinguish them, unless s2 ends in "{$}", // in which case any segment will do; prefer the wildcard name if // it has one. b.WriteByte('/') if s2.s == "/" { if s1.s != "" { b.WriteString(s1.s) } else { b.WriteString("x") } } return b.String() } if !s1.multi && s2.multi { writeSegment(&b, s1) } else if s1.wild && s2.wild { // Both patterns will match whatever we put here; use // the first wildcard name. writeSegment(&b, s1) } else if s1.wild && !s2.wild { // s1 is a wildcard, s2 is a literal. // Any segment other than s2.s will work. // Prefer the wildcard name, but if it's the same as the literal, // tweak the literal. if s1.s != s2.s { writeSegment(&b, s1) } else { b.WriteByte('/') b.WriteString(s2.s + "x") } } else if !s1.wild && s2.wild { writeSegment(&b, s1) } else { // Both are literals. A precondition of this function is that the // patterns overlap, so they must be the same literal. Use it. if s1.s != s2.s { panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s)) } writeSegment(&b, s1) } } if len(segs1) > 0 { // p1 is longer than p2, and p2 does not end in a multi. // Anything that matches the rest of p1 will do. writeMatchingPath(&b, segs1) } else if len(segs2) > 0 { writeMatchingPath(&b, segs2) } return b.String() }