forked from gitea/gitea
134 lines
3.2 KiB
Go
134 lines
3.2 KiB
Go
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package mdstripper
|
|
|
|
import (
|
|
"bytes"
|
|
"sync"
|
|
|
|
"io"
|
|
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/markup/common"
|
|
|
|
"github.com/yuin/goldmark"
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/extension"
|
|
"github.com/yuin/goldmark/parser"
|
|
"github.com/yuin/goldmark/renderer"
|
|
"github.com/yuin/goldmark/renderer/html"
|
|
"github.com/yuin/goldmark/text"
|
|
)
|
|
|
|
type stripRenderer struct {
|
|
links []string
|
|
empty bool
|
|
}
|
|
|
|
func (r *stripRenderer) Render(w io.Writer, source []byte, doc ast.Node) error {
|
|
return ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
|
if !entering {
|
|
return ast.WalkContinue, nil
|
|
}
|
|
switch v := n.(type) {
|
|
case *ast.Text:
|
|
if !v.IsRaw() {
|
|
_, prevSibIsText := n.PreviousSibling().(*ast.Text)
|
|
coalesce := prevSibIsText
|
|
r.processString(
|
|
w,
|
|
v.Text(source),
|
|
coalesce)
|
|
if v.SoftLineBreak() {
|
|
r.doubleSpace(w)
|
|
}
|
|
}
|
|
return ast.WalkContinue, nil
|
|
case *ast.Link:
|
|
r.processLink(w, v.Destination)
|
|
return ast.WalkSkipChildren, nil
|
|
case *ast.AutoLink:
|
|
r.processLink(w, v.URL(source))
|
|
return ast.WalkSkipChildren, nil
|
|
}
|
|
return ast.WalkContinue, nil
|
|
})
|
|
}
|
|
|
|
func (r *stripRenderer) doubleSpace(w io.Writer) {
|
|
if !r.empty {
|
|
_, _ = w.Write([]byte{'\n'})
|
|
}
|
|
}
|
|
|
|
func (r *stripRenderer) processString(w io.Writer, text []byte, coalesce bool) {
|
|
// Always break-up words
|
|
if !coalesce {
|
|
r.doubleSpace(w)
|
|
}
|
|
_, _ = w.Write(text)
|
|
r.empty = false
|
|
}
|
|
|
|
func (r *stripRenderer) processLink(w io.Writer, link []byte) {
|
|
// Links are processed out of band
|
|
r.links = append(r.links, string(link))
|
|
}
|
|
|
|
// GetLinks returns the list of link data collected while parsing
|
|
func (r *stripRenderer) GetLinks() []string {
|
|
return r.links
|
|
}
|
|
|
|
// AddOptions adds given option to this renderer.
|
|
func (r *stripRenderer) AddOptions(...renderer.Option) {
|
|
// no-op
|
|
}
|
|
|
|
// StripMarkdown parses markdown content by removing all markup and code blocks
|
|
// in order to extract links and other references
|
|
func StripMarkdown(rawBytes []byte) (string, []string) {
|
|
buf, links := StripMarkdownBytes(rawBytes)
|
|
return string(buf), links
|
|
}
|
|
|
|
var stripParser parser.Parser
|
|
var once = sync.Once{}
|
|
|
|
// StripMarkdownBytes parses markdown content by removing all markup and code blocks
|
|
// in order to extract links and other references
|
|
func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
|
|
once.Do(func() {
|
|
gdMarkdown := goldmark.New(
|
|
goldmark.WithExtensions(extension.Table,
|
|
extension.Strikethrough,
|
|
extension.TaskList,
|
|
extension.DefinitionList,
|
|
common.FootnoteExtension,
|
|
common.Linkify,
|
|
),
|
|
goldmark.WithParserOptions(
|
|
parser.WithAttribute(),
|
|
parser.WithAutoHeadingID(),
|
|
),
|
|
goldmark.WithRendererOptions(
|
|
html.WithUnsafe(),
|
|
),
|
|
)
|
|
stripParser = gdMarkdown.Parser()
|
|
})
|
|
stripper := &stripRenderer{
|
|
links: make([]string, 0, 10),
|
|
empty: true,
|
|
}
|
|
reader := text.NewReader(rawBytes)
|
|
doc := stripParser.Parse(reader)
|
|
var buf bytes.Buffer
|
|
if err := stripper.Render(&buf, rawBytes, doc); err != nil {
|
|
log.Error("Unable to strip: %v", err)
|
|
}
|
|
return buf.Bytes(), stripper.GetLinks()
|
|
}
|