From b24e8d38af21fc1857b6aa66351627e3b1761608 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sun, 6 Mar 2022 16:41:54 +0800 Subject: [PATCH] Support ignore all santize for external renderer (#18984) * Support ignore all santize for external renderer * Update docs * Apply suggestions from code review Co-authored-by: silverwind * Fix doc Co-authored-by: silverwind Co-authored-by: 6543 <6543@obermui.de> --- custom/conf/app.example.ini | 2 ++ .../doc/advanced/config-cheat-sheet.en-us.md | 4 +-- .../doc/advanced/config-cheat-sheet.zh-cn.md | 27 ++++++++++++++ modules/markup/csv/csv.go | 5 +++ modules/markup/external/external.go | 5 +++ modules/markup/markdown/markdown.go | 5 +++ modules/markup/orgmode/orgmode.go | 5 +++ modules/markup/renderer.go | 36 +++++++++++++------ modules/setting/markup.go | 14 ++++---- 9 files changed, 84 insertions(+), 19 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index 42d17567159e..ad58e6bda36d 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -2125,6 +2125,8 @@ PATH = ;RENDER_COMMAND = "asciidoc --out-file=- -" ;; Don't pass the file on STDIN, pass the filename as argument instead. ;IS_INPUT_FILE = false +; Don't filter html tags and attributes if true +;DISABLE_SANITIZER = false ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md index 59b8fc31f0d8..70bc2ee8293e 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md +++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md @@ -1003,13 +1003,13 @@ IS_INPUT_FILE = false command. Multiple extensions needs a comma as splitter. - RENDER\_COMMAND: External command to render all matching extensions. - IS\_INPUT\_FILE: **false** Input is not a standard input but a file param followed `RENDER_COMMAND`. +- DISABLE_SANITIZER: **false** Don't filter html tags and attributes if true. Don't change this to true except you know what that means. Two special environment variables are passed to the render command: - `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links. - `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths. - -Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. +If `DISABLE_SANITIZER` is false, Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc. ```ini [markup.sanitizer.TeX] diff --git a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md index 7db7fe705afe..600e54a85e54 100644 --- a/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md +++ b/docs/content/doc/advanced/config-cheat-sheet.zh-cn.md @@ -318,6 +318,33 @@ IS_INPUT_FILE = false - FILE_EXTENSIONS: 关联的文档的扩展名,多个扩展名用都好分隔。 - RENDER_COMMAND: 工具的命令行命令及参数。 - IS_INPUT_FILE: 输入方式是最后一个参数为文件路径还是从标准输入读取。 +- DISABLE_SANITIZER: **false** 如果为 true 则不过滤 HTML 标签和属性。除非你知道这意味着什么,否则不要设置为 true。 + +以下两个环境变量将会被传递给渲染命令: + +- `GITEA_PREFIX_SRC`:包含当前的`src`路径的URL前缀,可以被用于链接的前缀。 +- `GITEA_PREFIX_RAW`:包含当前的`raw`路径的URL前缀,可以被用于图片的前缀。 + +如果 `DISABLE_SANITIZER` 为 false,则 Gitea 支持自定义渲染 HTML 的净化策略。以下例子将用 pandoc 支持 KaTeX 输出。 + +```ini +[markup.sanitizer.TeX] +; Pandoc renders TeX segments as s with the "math" class, optionally +; with "inline" or "display" classes depending on context. +ELEMENT = span +ALLOW_ATTR = class +REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+ +ALLOW_DATA_URI_IMAGES = true +``` + +- `ELEMENT`: 将要被应用到该策略的 HTML 元素,不能为空。 +- `ALLOW_ATTR`: 将要被应用到该策略的属性,不能为空。 +- `REGEXP`: 正则表达式,用来匹配属性的内容。如果为空,则跟属性内容无关。 +- `ALLOW_DATA_URI_IMAGES`: **false** 允许 data uri 图片 (``)。 + +多个净化规则可以被同时定义,只要section名称最后一位不重复即可。如: `[markup.sanitizer.TeX-2]`。 +为了针对一种渲染类型进行一个特殊的净化策略,必须使用形如 `[markup.sanitizer.asciidoc.rule-1]` 的方式来命名 seciton。 +如果此规则没有匹配到任何渲染类型,它将会被应用到所有的渲染类型。 ## Time (`time`) diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index de32c57a64f1..17c3fe6f4f25 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -46,6 +46,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { } } +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { + return false +} + func writeField(w io.Writer, element, class, field string) error { if _, err := io.WriteString(w, "<"); err != nil { return err diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 3acb601067df..4fdd4315bc3e 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -54,6 +54,11 @@ func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return p.MarkupSanitizerRules } +// SanitizerDisabled disabled sanitize if return true +func (p *Renderer) SanitizerDisabled() bool { + return p.DisableSanitizer +} + func envMark(envName string) string { if runtime.GOOS == "windows" { return "%" + envName + "%" diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index b45b9c8b8ae6..320c2f7f8278 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -221,6 +221,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { + return false +} + // Render implements markup.Renderer func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return render(ctx, input, output) diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 8aa5f45ee244..2f394b992b22 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -47,6 +47,11 @@ func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{} } +// SanitizerDisabled disabled sanitize if return true +func (Renderer) SanitizerDisabled() bool { + return false +} + // Render renders orgmode rawbytes to HTML func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { htmlWriter := org.NewHTMLWriter() diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index 0ac0daaea966..cf8b9bace70b 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -81,6 +81,7 @@ type Renderer interface { Extensions() []string NeedPostProcess() bool SanitizerRules() []setting.MarkupSanitizerRule + SanitizerDisabled() bool Render(ctx *RenderContext, input io.Reader, output io.Writer) error } @@ -127,6 +128,12 @@ func RenderString(ctx *RenderContext, content string) (string, error) { return buf.String(), nil } +type nopCloser struct { + io.Writer +} + +func (nopCloser) Close() error { return nil } + func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { var wg sync.WaitGroup var err error @@ -136,18 +143,25 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr _ = pw.Close() }() - pr2, pw2 := io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() + var pr2 io.ReadCloser + var pw2 io.WriteCloser - wg.Add(1) - go func() { - err = SanitizeReader(pr2, renderer.Name(), output) - _ = pr2.Close() - wg.Done() - }() + if !renderer.SanitizerDisabled() { + pr2, pw2 = io.Pipe() + defer func() { + _ = pr2.Close() + _ = pw2.Close() + }() + + wg.Add(1) + go func() { + err = SanitizeReader(pr2, renderer.Name(), output) + _ = pr2.Close() + wg.Done() + }() + } else { + pw2 = nopCloser{output} + } wg.Add(1) go func() { diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 09b86b9b1a40..5fb6af683833 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -29,6 +29,7 @@ type MarkupRenderer struct { IsInputFile bool NeedPostProcess bool MarkupSanitizerRules []MarkupSanitizerRule + DisableSanitizer bool } // MarkupSanitizerRule defines the policy for whitelisting attributes on @@ -144,11 +145,12 @@ func newMarkupRenderer(name string, sec *ini.Section) { } ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{ - Enabled: sec.Key("ENABLED").MustBool(false), - MarkupName: name, - FileExtensions: exts, - Command: command, - IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), - NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true), + Enabled: sec.Key("ENABLED").MustBool(false), + MarkupName: name, + FileExtensions: exts, + Command: command, + IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), + NeedPostProcess: sec.Key("NEED_POSTPROCESS").MustBool(true), + DisableSanitizer: sec.Key("DISABLE_SANITIZER").MustBool(false), }) }