From 127eb439d74ed3f5992221ccc7db1d031c3177dd Mon Sep 17 00:00:00 2001 From: James Cracknell Date: Sun, 8 Mar 2015 22:14:50 -0600 Subject: [PATCH] Exclude HTML tags from Markdown post-processing HTML tags are no longer processed for special links, etc Contents of , and
 are not processed for special links
Processing for special links is done after Markdown conversion
---
 modules/base/markdown.go | 56 +++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/modules/base/markdown.go b/modules/base/markdown.go
index 4a7adc8a4864..aeb0aa7d5deb 100644
--- a/modules/base/markdown.go
+++ b/modules/base/markdown.go
@@ -7,6 +7,7 @@ package base
 import (
 	"bytes"
 	"fmt"
+	"io"
 	"net/http"
 	"path"
 	"path/filepath"
@@ -16,6 +17,8 @@ import (
 	"github.com/russross/blackfriday"
 
 	"github.com/gogits/gogs/modules/setting"
+
+	"golang.org/x/net/html"
 )
 
 func isletter(c byte) bool {
@@ -217,12 +220,57 @@ func RenderRawMarkdown(body []byte, urlPrefix string) []byte {
 }
 
 func RenderMarkdown(rawBytes []byte, urlPrefix string) []byte {
-	body := RenderSpecialLink(rawBytes, urlPrefix)
-	body = RenderRawMarkdown(body, urlPrefix)
-	body = Sanitizer.SanitizeBytes(body)
-	return body
+	result := RenderRawMarkdown(rawBytes, urlPrefix)
+	result = PostProcessMarkdown(result, urlPrefix)
+	result = Sanitizer.SanitizeBytes(result)
+	return result
 }
 
 func RenderMarkdownString(raw, urlPrefix string) string {
 	return string(RenderMarkdown([]byte(raw), urlPrefix))
 }
+
+func PostProcessMarkdown(rawHtml []byte, urlPrefix string) []byte {
+	var buf bytes.Buffer
+	tokenizer := html.NewTokenizer(bytes.NewReader(rawHtml))
+	for html.ErrorToken != tokenizer.Next() {
+
+			// A parse error has occurred, so return the original input unmodified
+			return rawHtml
+
+		token := tokenizer.Token()
+		switch token.Type {
+			case html.TextToken:
+				text := []byte(token.String())
+				text = RenderSpecialLink(text, urlPrefix)
+
+				buf.Write(text)
+
+			case html.StartTagToken:
+				buf.WriteString(token.String())
+
+				tagName := token.Data
+				// If this is an excluded tag, we skip processing all output until a close tag is encountered
+				if strings.EqualFold("a", tagName) || strings.EqualFold("code", tagName) || strings.EqualFold("pre", tagName) {
+					for html.ErrorToken != tokenizer.Next() {
+						token = tokenizer.Token()
+						// Copy the token to the output verbatim
+						buf.WriteString(token.String())
+						// If this is the close tag, we are done
+						if html.EndTagToken == token.Type && strings.EqualFold(tagName, token.Data) { break }
+					}
+				}
+
+			default:
+				buf.WriteString(token.String())
+		}
+	}
+
+	if io.EOF == tokenizer.Err() {
+		return buf.Bytes()
+	}
+
+	// If we are not at the end of the input, then some other parsing error has occurred, so return
+	// the input verbatim.
+	return rawHtml
+}