forked from gitea/gitea
Refactor markup rendering to accept general "protocol:" prefix (#29276)
Follow #29024 Major changes: * refactor validLinksPattern to fullURLPattern and add comments, now it accepts "protocol:" prefix * rename `IsLink*` to `IsFullURL*`, and remove unnecessray "mailto:" check * fix some comments (by the way) * rename EmojiShortCodeRegex -> emojiShortCodeRegex (by the way)
This commit is contained in:
parent
4e536edaea
commit
6130522aa8
|
@ -53,38 +53,38 @@ var (
|
||||||
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
|
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
|
||||||
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
|
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
|
||||||
|
|
||||||
// anySHA1Pattern splits url containing SHA into parts
|
// anyHashPattern splits url containing SHA into parts
|
||||||
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
|
anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
|
||||||
|
|
||||||
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
|
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
|
||||||
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
|
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
|
||||||
|
|
||||||
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
|
// fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..."
|
||||||
|
fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`)
|
||||||
|
|
||||||
// While this email regex is definitely not perfect and I'm sure you can come up
|
// emailRegex is definitely not perfect with edge cases,
|
||||||
// with edge cases, it is still accepted by the CommonMark specification, as
|
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
|
||||||
// well as the HTML5 spec:
|
|
||||||
// http://spec.commonmark.org/0.28/#email-address
|
// http://spec.commonmark.org/0.28/#email-address
|
||||||
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
|
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
|
||||||
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
|
emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
|
||||||
|
|
||||||
// blackfriday extensions create IDs like fn:user-content-footnote
|
// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
|
||||||
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
|
blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
|
||||||
|
|
||||||
// EmojiShortCodeRegex find emoji by alias like :smile:
|
// emojiShortCodeRegex find emoji by alias like :smile:
|
||||||
EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
|
emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
|
||||||
)
|
)
|
||||||
|
|
||||||
// CSS class for action keywords (e.g. "closes: #1")
|
// CSS class for action keywords (e.g. "closes: #1")
|
||||||
const keywordClass = "issue-keyword"
|
const keywordClass = "issue-keyword"
|
||||||
|
|
||||||
// IsLink reports whether link fits valid format.
|
// IsFullURLBytes reports whether link fits valid format.
|
||||||
func IsLink(link []byte) bool {
|
func IsFullURLBytes(link []byte) bool {
|
||||||
return validLinksPattern.Match(link)
|
return fullURLPattern.Match(link)
|
||||||
}
|
}
|
||||||
|
|
||||||
func IsLinkStr(link string) bool {
|
func IsFullURLString(link string) bool {
|
||||||
return validLinksPattern.MatchString(link)
|
return fullURLPattern.MatchString(link)
|
||||||
}
|
}
|
||||||
|
|
||||||
// regexp for full links to issues/pulls
|
// regexp for full links to issues/pulls
|
||||||
|
@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) {
|
||||||
if attr.Key != "src" {
|
if attr.Key != "src" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
|
if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
|
||||||
attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
|
attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val)
|
||||||
}
|
}
|
||||||
attr.Val = camoHandleLink(attr.Val)
|
attr.Val = camoHandleLink(attr.Val)
|
||||||
|
@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
|
if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
|
||||||
// There is no equal in this argument; this is a mandatory arg
|
// There is no equal in this argument; this is a mandatory arg
|
||||||
if props["name"] == "" {
|
if props["name"] == "" {
|
||||||
if IsLinkStr(v) {
|
if IsFullURLString(v) {
|
||||||
// If we clearly see it is a link, we save it so
|
// If we clearly see it is a link, we save it so
|
||||||
|
|
||||||
// But first we need to ensure, that if both mandatory args provided
|
// But first we need to ensure, that if both mandatory args provided
|
||||||
|
@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
DataAtom: atom.A,
|
DataAtom: atom.A,
|
||||||
}
|
}
|
||||||
childNode.Parent = linkNode
|
childNode.Parent = linkNode
|
||||||
absoluteLink := IsLinkStr(link)
|
absoluteLink := IsFullURLString(link)
|
||||||
if !absoluteLink {
|
if !absoluteLink {
|
||||||
if image {
|
if image {
|
||||||
link = strings.ReplaceAll(link, " ", "+")
|
link = strings.ReplaceAll(link, " ", "+")
|
||||||
|
@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
|
||||||
start := 0
|
start := 0
|
||||||
next := node.NextSibling
|
next := node.NextSibling
|
||||||
for node != nil && node != next && start < len(node.Data) {
|
for node != nil && node != next && start < len(node.Data) {
|
||||||
m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
|
m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -204,6 +204,15 @@ func TestRender_links(t *testing.T) {
|
||||||
test(
|
test(
|
||||||
"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download",
|
"magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download",
|
||||||
`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download</a></p>`)
|
`<p><a href="magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download" rel="nofollow">magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download</a></p>`)
|
||||||
|
test(
|
||||||
|
`[link](https://example.com)`,
|
||||||
|
`<p><a href="https://example.com" rel="nofollow">link</a></p>`)
|
||||||
|
test(
|
||||||
|
`[link](mailto:test@example.com)`,
|
||||||
|
`<p><a href="mailto:test@example.com" rel="nofollow">link</a></p>`)
|
||||||
|
test(
|
||||||
|
`[link](javascript:xss)`,
|
||||||
|
`<p>link</p>`)
|
||||||
|
|
||||||
// Test that should *not* be turned into URL
|
// Test that should *not* be turned into URL
|
||||||
test(
|
test(
|
||||||
|
@ -673,3 +682,9 @@ func TestIssue18471(t *testing.T) {
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
|
assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsFullURL(t *testing.T) {
|
||||||
|
assert.True(t, markup.IsFullURLString("https://example.com"))
|
||||||
|
assert.True(t, markup.IsFullURLString("mailto:test@example.com"))
|
||||||
|
assert.False(t, markup.IsFullURLString("/foo:bar"))
|
||||||
|
}
|
||||||
|
|
|
@ -26,8 +26,6 @@ import (
|
||||||
"github.com/yuin/goldmark/util"
|
"github.com/yuin/goldmark/util"
|
||||||
)
|
)
|
||||||
|
|
||||||
var byteMailto = []byte("mailto:")
|
|
||||||
|
|
||||||
// ASTTransformer is a default transformer of the goldmark tree.
|
// ASTTransformer is a default transformer of the goldmark tree.
|
||||||
type ASTTransformer struct{}
|
type ASTTransformer struct{}
|
||||||
|
|
||||||
|
@ -84,7 +82,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
|
||||||
// 2. If they're not wrapped with a link they need a link wrapper
|
// 2. If they're not wrapped with a link they need a link wrapper
|
||||||
|
|
||||||
// Check if the destination is a real link
|
// Check if the destination is a real link
|
||||||
if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
|
if len(v.Destination) > 0 && !markup.IsFullURLBytes(v.Destination) {
|
||||||
v.Destination = []byte(giteautil.URLJoin(
|
v.Destination = []byte(giteautil.URLJoin(
|
||||||
ctx.Links.ResolveMediaLink(ctx.IsWiki),
|
ctx.Links.ResolveMediaLink(ctx.IsWiki),
|
||||||
strings.TrimLeft(string(v.Destination), "/"),
|
strings.TrimLeft(string(v.Destination), "/"),
|
||||||
|
@ -130,23 +128,17 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
|
||||||
case *ast.Link:
|
case *ast.Link:
|
||||||
// Links need their href to munged to be a real value
|
// Links need their href to munged to be a real value
|
||||||
link := v.Destination
|
link := v.Destination
|
||||||
if len(link) > 0 && !markup.IsLink(link) &&
|
isAnchorFragment := len(link) > 0 && link[0] == '#'
|
||||||
link[0] != '#' && !bytes.HasPrefix(link, byteMailto) {
|
if !isAnchorFragment && !markup.IsFullURLBytes(link) {
|
||||||
// special case: this is not a link, a hash link or a mailto:, so it's a
|
base := ctx.Links.Base
|
||||||
// relative URL
|
|
||||||
|
|
||||||
var base string
|
|
||||||
if ctx.IsWiki {
|
if ctx.IsWiki {
|
||||||
base = ctx.Links.WikiLink()
|
base = ctx.Links.WikiLink()
|
||||||
} else if ctx.Links.HasBranchInfo() {
|
} else if ctx.Links.HasBranchInfo() {
|
||||||
base = ctx.Links.SrcLink()
|
base = ctx.Links.SrcLink()
|
||||||
} else {
|
|
||||||
base = ctx.Links.Base
|
|
||||||
}
|
}
|
||||||
|
|
||||||
link = []byte(giteautil.URLJoin(base, string(link)))
|
link = []byte(giteautil.URLJoin(base, string(link)))
|
||||||
}
|
}
|
||||||
if len(link) > 0 && link[0] == '#' {
|
if isAnchorFragment {
|
||||||
link = []byte("#user-content-" + string(link)[1:])
|
link = []byte("#user-content-" + string(link)[1:])
|
||||||
}
|
}
|
||||||
v.Destination = link
|
v.Destination = link
|
||||||
|
|
|
@ -136,8 +136,7 @@ type Writer struct {
|
||||||
func (r *Writer) resolveLink(kind, link string) string {
|
func (r *Writer) resolveLink(kind, link string) string {
|
||||||
link = strings.TrimPrefix(link, "file:")
|
link = strings.TrimPrefix(link, "file:")
|
||||||
if !strings.HasPrefix(link, "#") && // not a URL fragment
|
if !strings.HasPrefix(link, "#") && // not a URL fragment
|
||||||
!markup.IsLinkStr(link) && // not an absolute URL
|
!markup.IsFullURLString(link) {
|
||||||
!strings.HasPrefix(link, "mailto:") {
|
|
||||||
if kind == "regular" {
|
if kind == "regular" {
|
||||||
// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]"
|
// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]"
|
||||||
// so we need to try to guess the link kind again here
|
// so we need to try to guess the link kind again here
|
||||||
|
|
Loading…
Reference in New Issue