Fix several render issues (#14986)
* Fix an issue with panics related to attributes * Wrap goldmark render in a recovery function * Reduce memory use in render emoji * Use a pipe for rendering goldmark - still needs more work and a limiter Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
		
							parent
							
								
									044cd4d016
								
							
						
					
					
						commit
						ed31ddc29a
					
				
					 6 changed files with 211 additions and 61 deletions
				
			
		|  | @ -30,6 +30,9 @@ var ( | |||
| 	// aliasMap provides a map of the alias to its emoji data.
 | ||||
| 	aliasMap map[string]int | ||||
| 
 | ||||
| 	// emptyReplacer is the string replacer for emoji codes.
 | ||||
| 	emptyReplacer *strings.Replacer | ||||
| 
 | ||||
| 	// codeReplacer is the string replacer for emoji codes.
 | ||||
| 	codeReplacer *strings.Replacer | ||||
| 
 | ||||
|  | @ -49,6 +52,7 @@ func loadMap() { | |||
| 
 | ||||
| 		// process emoji codes and aliases
 | ||||
| 		codePairs := make([]string, 0) | ||||
| 		emptyPairs := make([]string, 0) | ||||
| 		aliasPairs := make([]string, 0) | ||||
| 
 | ||||
| 		// sort from largest to small so we match combined emoji first
 | ||||
|  | @ -64,6 +68,7 @@ func loadMap() { | |||
| 			// setup codes
 | ||||
| 			codeMap[e.Emoji] = i | ||||
| 			codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":") | ||||
| 			emptyPairs = append(emptyPairs, e.Emoji, e.Emoji) | ||||
| 
 | ||||
| 			// setup aliases
 | ||||
| 			for _, a := range e.Aliases { | ||||
|  | @ -77,6 +82,7 @@ func loadMap() { | |||
| 		} | ||||
| 
 | ||||
| 		// create replacers
 | ||||
| 		emptyReplacer = strings.NewReplacer(emptyPairs...) | ||||
| 		codeReplacer = strings.NewReplacer(codePairs...) | ||||
| 		aliasReplacer = strings.NewReplacer(aliasPairs...) | ||||
| 	}) | ||||
|  | @ -127,38 +133,53 @@ func ReplaceAliases(s string) string { | |||
| 	return aliasReplacer.Replace(s) | ||||
| } | ||||
| 
 | ||||
| type rememberSecondWriteWriter struct { | ||||
| 	pos        int | ||||
| 	idx        int | ||||
| 	end        int | ||||
| 	writecount int | ||||
| } | ||||
| 
 | ||||
| func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) { | ||||
| 	n.writecount++ | ||||
| 	if n.writecount == 2 { | ||||
| 		n.idx = n.pos | ||||
| 		n.end = n.pos + len(p) | ||||
| 	} | ||||
| 	n.pos += len(p) | ||||
| 	return len(p), nil | ||||
| } | ||||
| 
 | ||||
| func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) { | ||||
| 	n.writecount++ | ||||
| 	if n.writecount == 2 { | ||||
| 		n.idx = n.pos | ||||
| 		n.end = n.pos + len(s) | ||||
| 	} | ||||
| 	n.pos += len(s) | ||||
| 	return len(s), nil | ||||
| } | ||||
| 
 | ||||
| // FindEmojiSubmatchIndex returns index pair of longest emoji in a string
 | ||||
| func FindEmojiSubmatchIndex(s string) []int { | ||||
| 	loadMap() | ||||
| 	found := make(map[int]int) | ||||
| 	keys := make([]int, 0) | ||||
| 	secondWriteWriter := rememberSecondWriteWriter{} | ||||
| 
 | ||||
| 	//see if there are any emoji in string before looking for position of specific ones
 | ||||
| 	//no performance difference when there is a match but 10x faster when there are not
 | ||||
| 	if s == ReplaceCodes(s) { | ||||
| 	// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
 | ||||
| 	// we can be lazy here.
 | ||||
| 	//
 | ||||
| 	// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
 | ||||
| 	// submatch is simply the second thing that is written to WriteString in the writer.
 | ||||
| 	//
 | ||||
| 	// Therefore we can simply take the index of the second write as our first emoji
 | ||||
| 	//
 | ||||
| 	// FIXME: just copy the trie implementation from strings.NewReplacer
 | ||||
| 	_, _ = emptyReplacer.WriteString(&secondWriteWriter, s) | ||||
| 
 | ||||
| 	// if we wrote less than twice then we never "replaced"
 | ||||
| 	if secondWriteWriter.writecount < 2 { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	// get index of first emoji occurrence while also checking for longest combination
 | ||||
| 	for j := range GemojiData { | ||||
| 		i := strings.Index(s, GemojiData[j].Emoji) | ||||
| 		if i != -1 { | ||||
| 			if _, ok := found[i]; !ok { | ||||
| 				if len(keys) == 0 || i < keys[0] { | ||||
| 					found[i] = j | ||||
| 					keys = []int{i} | ||||
| 				} | ||||
| 				if i == 0 { | ||||
| 					break | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if len(keys) > 0 { | ||||
| 		index := keys[0] | ||||
| 		return []int{index, index + len(GemojiData[found[index]].Emoji)} | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| 	return []int{secondWriteWriter.idx, secondWriteWriter.end} | ||||
| } | ||||
|  |  | |||
|  | @ -8,6 +8,8 @@ package emoji | |||
| import ( | ||||
| 	"reflect" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/assert" | ||||
| ) | ||||
| 
 | ||||
| func TestDumpInfo(t *testing.T) { | ||||
|  | @ -65,3 +67,34 @@ func TestReplacers(t *testing.T) { | |||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestFindEmojiSubmatchIndex(t *testing.T) { | ||||
| 	type testcase struct { | ||||
| 		teststring string | ||||
| 		expected   []int | ||||
| 	} | ||||
| 
 | ||||
| 	testcases := []testcase{ | ||||
| 		{ | ||||
| 			"\U0001f44d", | ||||
| 			[]int{0, len("\U0001f44d")}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"\U0001f44d +1 \U0001f44d \U0001f37a", | ||||
| 			[]int{0, 4}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			" \U0001f44d", | ||||
| 			[]int{1, 1 + len("\U0001f44d")}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			string([]byte{'\u0001'}) + "\U0001f44d", | ||||
| 			[]int{1, 1 + len("\U0001f44d")}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, kase := range testcases { | ||||
| 		actual := FindEmojiSubmatchIndex(kase.teststring) | ||||
| 		assert.Equal(t, kase.expected, actual) | ||||
| 	} | ||||
| } | ||||
|  |  | |||
|  | @ -313,41 +313,27 @@ func RenderEmoji( | |||
| 	return ctx.postProcess(rawHTML) | ||||
| } | ||||
| 
 | ||||
| var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`) | ||||
| var nulCleaner = strings.NewReplacer("\000", "") | ||||
| 
 | ||||
| func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) { | ||||
| 	if ctx.procs == nil { | ||||
| 		ctx.procs = defaultProcessors | ||||
| 	} | ||||
| 
 | ||||
| 	// give a generous extra 50 bytes
 | ||||
| 	res := make([]byte, 0, len(rawHTML)+50) | ||||
| 
 | ||||
| 	res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50)) | ||||
| 	// prepend "<html><body>"
 | ||||
| 	res = append(res, "<html><body>"...) | ||||
| 	_, _ = res.WriteString("<html><body>") | ||||
| 
 | ||||
| 	// Strip out nuls - they're always invalid
 | ||||
| 	start := bytes.IndexByte(rawHTML, '\000') | ||||
| 	if start >= 0 { | ||||
| 		res = append(res, rawHTML[:start]...) | ||||
| 		start++ | ||||
| 		for start < len(rawHTML) { | ||||
| 			end := bytes.IndexByte(rawHTML[start:], '\000') | ||||
| 			if end < 0 { | ||||
| 				res = append(res, rawHTML[start:]...) | ||||
| 				break | ||||
| 			} else if end > 0 { | ||||
| 				res = append(res, rawHTML[start:start+end]...) | ||||
| 			} | ||||
| 			start += end + 1 | ||||
| 		} | ||||
| 	} else { | ||||
| 		res = append(res, rawHTML...) | ||||
| 	} | ||||
| 	_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("<$1")))) | ||||
| 
 | ||||
| 	// close the tags
 | ||||
| 	res = append(res, "</body></html>"...) | ||||
| 	_, _ = res.WriteString("</body></html>") | ||||
| 
 | ||||
| 	// parse the HTML
 | ||||
| 	nodes, err := html.ParseFragment(bytes.NewReader(res), nil) | ||||
| 	nodes, err := html.ParseFragment(res, nil) | ||||
| 	if err != nil { | ||||
| 		return nil, &postProcessError{"invalid HTML", err} | ||||
| 	} | ||||
|  | @ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) { | |||
| 	// Create buffer in which the data will be placed again. We know that the
 | ||||
| 	// length will be at least that of res; to spare a few alloc+copy, we
 | ||||
| 	// reuse res, resetting its length to 0.
 | ||||
| 	buf := bytes.NewBuffer(res[:0]) | ||||
| 	res.Reset() | ||||
| 	// Render everything to buf.
 | ||||
| 	for _, node := range nodes { | ||||
| 		err = html.Render(buf, node) | ||||
| 		err = html.Render(res, node) | ||||
| 		if err != nil { | ||||
| 			return nil, &postProcessError{"error rendering processed HTML", err} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// Everything done successfully, return parsed data.
 | ||||
| 	return buf.Bytes(), nil | ||||
| 	return res.Bytes(), nil | ||||
| } | ||||
| 
 | ||||
| func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) { | ||||
|  |  | |||
|  | @ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa | |||
| 					header.ID = util.BytesToReadOnlyString(id.([]byte)) | ||||
| 				} | ||||
| 				toc = append(toc, header) | ||||
| 			} else { | ||||
| 				for _, attr := range v.Attributes() { | ||||
| 					if _, ok := attr.Value.([]byte); !ok { | ||||
| 						v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value))) | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
| 		case *ast.Image: | ||||
| 			// Images need two things:
 | ||||
|  |  | |||
|  | @ -6,7 +6,8 @@ | |||
| package markdown | ||||
| 
 | ||||
| import ( | ||||
| 	"bytes" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 
 | ||||
|  | @ -18,7 +19,7 @@ import ( | |||
| 
 | ||||
| 	chromahtml "github.com/alecthomas/chroma/formatters/html" | ||||
| 	"github.com/yuin/goldmark" | ||||
| 	"github.com/yuin/goldmark-highlighting" | ||||
| 	highlighting "github.com/yuin/goldmark-highlighting" | ||||
| 	meta "github.com/yuin/goldmark-meta" | ||||
| 	"github.com/yuin/goldmark/extension" | ||||
| 	"github.com/yuin/goldmark/parser" | ||||
|  | @ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey() | |||
| var isWikiKey = parser.NewContextKey() | ||||
| var renderMetasKey = parser.NewContextKey() | ||||
| 
 | ||||
| type closesWithError interface { | ||||
| 	io.WriteCloser | ||||
| 	CloseWithError(err error) error | ||||
| } | ||||
| 
 | ||||
| type limitWriter struct { | ||||
| 	w     closesWithError | ||||
| 	sum   int64 | ||||
| 	limit int64 | ||||
| } | ||||
| 
 | ||||
| // Write implements the standard Write interface:
 | ||||
| func (l *limitWriter) Write(data []byte) (int, error) { | ||||
| 	leftToWrite := l.limit - l.sum | ||||
| 	if leftToWrite < int64(len(data)) { | ||||
| 		n, err := l.w.Write(data[:leftToWrite]) | ||||
| 		l.sum += int64(n) | ||||
| 		if err != nil { | ||||
| 			return n, err | ||||
| 		} | ||||
| 		_ = l.w.Close() | ||||
| 		return n, fmt.Errorf("Rendered content too large - truncating render") | ||||
| 	} | ||||
| 	n, err := l.w.Write(data) | ||||
| 	l.sum += int64(n) | ||||
| 	return n, err | ||||
| } | ||||
| 
 | ||||
| // Close closes the writer
 | ||||
| func (l *limitWriter) Close() error { | ||||
| 	return l.w.Close() | ||||
| } | ||||
| 
 | ||||
| // CloseWithError closes the writer
 | ||||
| func (l *limitWriter) CloseWithError(err error) error { | ||||
| 	return l.w.CloseWithError(err) | ||||
| } | ||||
| 
 | ||||
| // NewGiteaParseContext creates a parser.Context with the gitea context set
 | ||||
| func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context { | ||||
| 	pc := parser.NewContext(parser.WithIDs(newPrefixedIDs())) | ||||
|  | @ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool | |||
| 	return pc | ||||
| } | ||||
| 
 | ||||
| // render renders Markdown to HTML without handling special links.
 | ||||
| func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte { | ||||
| // actualRender renders Markdown to HTML without handling special links.
 | ||||
| func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte { | ||||
| 	once.Do(func() { | ||||
| 		converter = goldmark.New( | ||||
| 			goldmark.WithExtensions(extension.Table, | ||||
|  | @ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown | |||
| 
 | ||||
| 	}) | ||||
| 
 | ||||
| 	pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown) | ||||
| 	var buf bytes.Buffer | ||||
| 	if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil { | ||||
| 		log.Error("Unable to render: %v", err) | ||||
| 	rd, wr := io.Pipe() | ||||
| 	defer func() { | ||||
| 		_ = rd.Close() | ||||
| 		_ = wr.Close() | ||||
| 	}() | ||||
| 
 | ||||
| 	lw := &limitWriter{ | ||||
| 		w:     wr, | ||||
| 		limit: setting.UI.MaxDisplayFileSize * 3, | ||||
| 	} | ||||
| 	return markup.SanitizeReader(&buf).Bytes() | ||||
| 
 | ||||
| 	// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
 | ||||
| 	go func() { | ||||
| 		defer func() { | ||||
| 			err := recover() | ||||
| 			if err == nil { | ||||
| 				return | ||||
| 			} | ||||
| 
 | ||||
| 			log.Warn("Unable to render markdown due to panic in goldmark: %v", err) | ||||
| 			if log.IsDebug() { | ||||
| 				log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) | ||||
| 			} | ||||
| 			_ = lw.CloseWithError(fmt.Errorf("%v", err)) | ||||
| 		}() | ||||
| 
 | ||||
| 		pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown) | ||||
| 		if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil { | ||||
| 			log.Error("Unable to render: %v", err) | ||||
| 			_ = lw.CloseWithError(err) | ||||
| 			return | ||||
| 		} | ||||
| 		_ = lw.Close() | ||||
| 	}() | ||||
| 	return markup.SanitizeReader(rd).Bytes() | ||||
| } | ||||
| 
 | ||||
| func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) { | ||||
| 	defer func() { | ||||
| 		err := recover() | ||||
| 		if err == nil { | ||||
| 			return | ||||
| 		} | ||||
| 
 | ||||
| 		log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes") | ||||
| 		if log.IsDebug() { | ||||
| 			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) | ||||
| 		} | ||||
| 		ret = markup.SanitizeBytes(body) | ||||
| 	}() | ||||
| 	return actualRender(body, urlPrefix, metas, wikiMarkdown) | ||||
| } | ||||
| 
 | ||||
| var ( | ||||
|  |  | |||
|  | @ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) { | |||
| 	test(t, "A\n\n\nB\nC\n", 2) | ||||
| } | ||||
| 
 | ||||
| func TestMarkdownRenderRaw(t *testing.T) { | ||||
| 	testcases := [][]byte{ | ||||
| 		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
 | ||||
| 			0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60, | ||||
| 			0x5b, | ||||
| 		}, | ||||
| 		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
 | ||||
| 			0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60, | ||||
| 		}, | ||||
| 		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
 | ||||
| 			0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, testcase := range testcases { | ||||
| 		_ = RenderRaw(testcase, "", false) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func TestRenderSiblingImages_Issue12925(t *testing.T) { | ||||
| 	testcase := ` | ||||
|  | ||||
|  | @ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) { | |||
| ` | ||||
| 	res := string(RenderRaw([]byte(testcase), "", false)) | ||||
| 	assert.Equal(t, expected, res) | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue