gitea/vendor/github.com/yuin/goldmark/parser/html_block.go

package parser

import (
	"bytes"
	"regexp"
	"strings"

	"github.com/yuin/goldmark/ast"
	"github.com/yuin/goldmark/text"
	"github.com/yuin/goldmark/util"
)

var allowedBlockTags = map[string]bool{
	"address":    true,
	"article":    true,
	"aside":      true,
	"base":       true,
	"basefont":   true,
	"blockquote": true,
	"body":       true,
	"caption":    true,
	"center":     true,
	"col":        true,
	"colgroup":   true,
	"dd":         true,
	"details":    true,
	"dialog":     true,
	"dir":        true,
	"div":        true,
	"dl":         true,
	"dt":         true,
	"fieldset":   true,
	"figcaption": true,
	"figure":     true,
	"footer":     true,
	"form":       true,
	"frame":      true,
	"frameset":   true,
	"h1":         true,
	"h2":         true,
	"h3":         true,
	"h4":         true,
	"h5":         true,
	"h6":         true,
	"head":       true,
	"header":     true,
	"hr":         true,
	"html":       true,
	"iframe":     true,
	"legend":     true,
	"li":         true,
	"link":       true,
	"main":       true,
	"menu":       true,
	"menuitem":   true,
	"meta":       true,
	"nav":        true,
	"noframes":   true,
	"ol":         true,
	"optgroup":   true,
	"option":     true,
	"p":          true,
	"param":      true,
	"section":    true,
	"source":     true,
	"summary":    true,
	"table":      true,
	"tbody":      true,
	"td":         true,
	"tfoot":      true,
	"th":         true,
	"thead":      true,
	"title":      true,
	"tr":         true,
	"track":      true,
	"ul":         true,
}

var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script|pre|style)(?:\s.*|>.*|/>.*|)\n?$`)
var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.*</(?:script|pre|style)>.*`)

var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
var htmlBlockType2Close = []byte{'-', '-', '>'}

var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
var htmlBlockType3Close = []byte{'?', '>'}

var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
var htmlBlockType4Close = []byte{'>'}

var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
var htmlBlockType5Close = []byte{']', ']', '>'}

var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.*|>.*|/>.*|)\n?$`)

var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `*)(:?>|/>)\s*\n?$`)

type htmlBlockParser struct {
}

var defaultHTMLBlockParser = &htmlBlockParser{}

// NewHTMLBlockParser return a new BlockParser that can parse html
// blocks.
func NewHTMLBlockParser() BlockParser {
	return defaultHTMLBlockParser
}

func (b *htmlBlockParser) Trigger() []byte {
	return []byte{'<'}
}

func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {
	var node *ast.HTMLBlock
	line, segment := reader.PeekLine()
	last := pc.LastOpenedBlock().Node
	if pos := pc.BlockOffset(); pos < 0 || line[pos] != '<' {
		return nil, NoChildren
	}

	if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {
		node = ast.NewHTMLBlock(ast.HTMLBlockType1)
	} else if htmlBlockType2OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType2)
	} else if htmlBlockType3OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType3)
	} else if htmlBlockType4OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType4)
	} else if htmlBlockType5OpenRegexp.Match(line) {
		node = ast.NewHTMLBlock(ast.HTMLBlockType5)
	} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {
		isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))
		hasAttr := match[6] != match[7]
		tagName := strings.ToLower(string(line[match[4]:match[5]]))
		_, ok := allowedBlockTags[tagName]
		if ok {
			node = ast.NewHTMLBlock(ast.HTMLBlockType6)
		} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph
			node = ast.NewHTMLBlock(ast.HTMLBlockType7)
		}
	}
	if node == nil {
		if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {
			tagName := string(line[match[2]:match[3]])
			_, ok := allowedBlockTags[strings.ToLower(tagName)]
			if ok {
				node = ast.NewHTMLBlock(ast.HTMLBlockType6)
			}
		}
	}
	if node != nil {
		reader.Advance(segment.Len() - 1)
		node.Lines().Append(segment)
		return node, NoChildren
	}
	return nil, NoChildren
}

func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {
	htmlBlock := node.(*ast.HTMLBlock)
	lines := htmlBlock.Lines()
	line, segment := reader.PeekLine()
	var closurePattern []byte

	switch htmlBlock.HTMLBlockType {
	case ast.HTMLBlockType1:
		if lines.Len() == 1 {
			firstLine := lines.At(0)
			if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {
				return Close
			}
		}
		if htmlBlockType1CloseRegexp.Match(line) {
			htmlBlock.ClosureLine = segment
			reader.Advance(segment.Len() - 1)
			return Close
		}
	case ast.HTMLBlockType2:
		closurePattern = htmlBlockType2Close
		fallthrough
	case ast.HTMLBlockType3:
		if closurePattern == nil {
			closurePattern = htmlBlockType3Close
		}
		fallthrough
	case ast.HTMLBlockType4:
		if closurePattern == nil {
			closurePattern = htmlBlockType4Close
		}
		fallthrough
	case ast.HTMLBlockType5:
		if closurePattern == nil {
			closurePattern = htmlBlockType5Close
		}

		if lines.Len() == 1 {
			firstLine := lines.At(0)
			if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {
				return Close
			}
		}
		if bytes.Contains(line, closurePattern) {
			htmlBlock.ClosureLine = segment
			reader.Advance(segment.Len() - 1)
			return Close
		}

	case ast.HTMLBlockType6, ast.HTMLBlockType7:
		if util.IsBlank(line) {
			return Close
		}
	}
	node.Lines().Append(segment)
	reader.Advance(segment.Len() - 1)
	return Continue | NoChildren
}

func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {
	// nothing to do
}

func (b *htmlBlockParser) CanInterruptParagraph() bool {
	return true
}

func (b *htmlBlockParser) CanAcceptIndentedLine() bool {
	return false
}
Change markdown rendering from blackfriday to goldmark (#9533) * Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH <lauris@nix.lv> 2019-12-31 01:53:28 +00:00			`package parser`

			`import (`
			`"bytes"`
			`"regexp"`
			`"strings"`

			`"github.com/yuin/goldmark/ast"`
			`"github.com/yuin/goldmark/text"`
			`"github.com/yuin/goldmark/util"`
			`)`

			`var allowedBlockTags = map[string]bool{`
			`"address": true,`
			`"article": true,`
			`"aside": true,`
			`"base": true,`
			`"basefont": true,`
			`"blockquote": true,`
			`"body": true,`
			`"caption": true,`
			`"center": true,`
			`"col": true,`
			`"colgroup": true,`
			`"dd": true,`
			`"details": true,`
			`"dialog": true,`
			`"dir": true,`
			`"div": true,`
			`"dl": true,`
			`"dt": true,`
			`"fieldset": true,`
			`"figcaption": true,`
			`"figure": true,`
			`"footer": true,`
			`"form": true,`
			`"frame": true,`
			`"frameset": true,`
			`"h1": true,`
			`"h2": true,`
			`"h3": true,`
			`"h4": true,`
			`"h5": true,`
			`"h6": true,`
			`"head": true,`
			`"header": true,`
			`"hr": true,`
			`"html": true,`
			`"iframe": true,`
			`"legend": true,`
			`"li": true,`
			`"link": true,`
			`"main": true,`
			`"menu": true,`
			`"menuitem": true,`
			`"meta": true,`
			`"nav": true,`
			`"noframes": true,`
			`"ol": true,`
			`"optgroup": true,`
			`"option": true,`
			`"p": true,`
			`"param": true,`
			`"section": true,`
			`"source": true,`
			`"summary": true,`
			`"table": true,`
			`"tbody": true,`
			`"td": true,`
			`"tfoot": true,`
			`"th": true,`
			`"thead": true,`
			`"title": true,`
			`"tr": true,`
			`"track": true,`
			`"ul": true,`
			`}`

			var htmlBlockType1OpenRegexp = regexp.MustCompile(`(?i)^[ ]{0,3}<(script\|pre\|style)(?:\s.\|>.\|/>.*\|)\n?$`)
			var htmlBlockType1CloseRegexp = regexp.MustCompile(`(?i)^.</(?:script\|pre\|style)>.`)

			var htmlBlockType2OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<!\-\-`)
			`var htmlBlockType2Close = []byte{'-', '-', '>'}`

			var htmlBlockType3OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\?`)
			`var htmlBlockType3Close = []byte{'?', '>'}`

			var htmlBlockType4OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<![A-Z]+.*\n?$`)
			`var htmlBlockType4Close = []byte{'>'}`

			var htmlBlockType5OpenRegexp = regexp.MustCompile(`^[ ]{0,3}<\!\[CDATA\[`)
			`var htmlBlockType5Close = []byte{']', ']', '>'}`

			var htmlBlockType6Regexp = regexp.MustCompile(`^[ ]{0,3}</?([a-zA-Z0-9]+)(?:\s.\|>.\|/>.*\|)\n?$`)

			var htmlBlockType7Regexp = regexp.MustCompile(`^[ ]{0,3}<(/)?([a-zA-Z0-9]+)(` + attributePattern + `)(:?>\|/>)\s\n?$`)

			`type htmlBlockParser struct {`
			`}`

			`var defaultHTMLBlockParser = &htmlBlockParser{}`

			`// NewHTMLBlockParser return a new BlockParser that can parse html`
			`// blocks.`
			`func NewHTMLBlockParser() BlockParser {`
			`return defaultHTMLBlockParser`
			`}`

			`func (b *htmlBlockParser) Trigger() []byte {`
			`return []byte{'<'}`
			`}`

			`func (b *htmlBlockParser) Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State) {`
			`var node *ast.HTMLBlock`
			`line, segment := reader.PeekLine()`
			`last := pc.LastOpenedBlock().Node`
			`if pos := pc.BlockOffset(); pos < 0 \|\| line[pos] != '<' {`
			`return nil, NoChildren`
			`}`

			`if m := htmlBlockType1OpenRegexp.FindSubmatchIndex(line); m != nil {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType1)`
			`} else if htmlBlockType2OpenRegexp.Match(line) {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType2)`
			`} else if htmlBlockType3OpenRegexp.Match(line) {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType3)`
			`} else if htmlBlockType4OpenRegexp.Match(line) {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType4)`
			`} else if htmlBlockType5OpenRegexp.Match(line) {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType5)`
			`} else if match := htmlBlockType7Regexp.FindSubmatchIndex(line); match != nil {`
			`isCloseTag := match[2] > -1 && bytes.Equal(line[match[2]:match[3]], []byte("/"))`
			`hasAttr := match[6] != match[7]`
			`tagName := strings.ToLower(string(line[match[4]:match[5]]))`
			`_, ok := allowedBlockTags[tagName]`
			`if ok {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType6)`
			`} else if tagName != "script" && tagName != "style" && tagName != "pre" && !ast.IsParagraph(last) && !(isCloseTag && hasAttr) { // type 7 can not interrupt paragraph`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType7)`
			`}`
			`}`
			`if node == nil {`
			`if match := htmlBlockType6Regexp.FindSubmatchIndex(line); match != nil {`
			`tagName := string(line[match[2]:match[3]])`
			`_, ok := allowedBlockTags[strings.ToLower(tagName)]`
			`if ok {`
			`node = ast.NewHTMLBlock(ast.HTMLBlockType6)`
			`}`
			`}`
			`}`
			`if node != nil {`
			`reader.Advance(segment.Len() - 1)`
			`node.Lines().Append(segment)`
			`return node, NoChildren`
			`}`
			`return nil, NoChildren`
			`}`

			`func (b *htmlBlockParser) Continue(node ast.Node, reader text.Reader, pc Context) State {`
			`htmlBlock := node.(*ast.HTMLBlock)`
			`lines := htmlBlock.Lines()`
			`line, segment := reader.PeekLine()`
			`var closurePattern []byte`

			`switch htmlBlock.HTMLBlockType {`
			`case ast.HTMLBlockType1:`
			`if lines.Len() == 1 {`
			`firstLine := lines.At(0)`
			`if htmlBlockType1CloseRegexp.Match(firstLine.Value(reader.Source())) {`
			`return Close`
			`}`
			`}`
			`if htmlBlockType1CloseRegexp.Match(line) {`
			`htmlBlock.ClosureLine = segment`
			`reader.Advance(segment.Len() - 1)`
			`return Close`
			`}`
			`case ast.HTMLBlockType2:`
			`closurePattern = htmlBlockType2Close`
			`fallthrough`
			`case ast.HTMLBlockType3:`
			`if closurePattern == nil {`
			`closurePattern = htmlBlockType3Close`
			`}`
			`fallthrough`
			`case ast.HTMLBlockType4:`
			`if closurePattern == nil {`
			`closurePattern = htmlBlockType4Close`
			`}`
			`fallthrough`
			`case ast.HTMLBlockType5:`
			`if closurePattern == nil {`
			`closurePattern = htmlBlockType5Close`
			`}`

			`if lines.Len() == 1 {`
			`firstLine := lines.At(0)`
			`if bytes.Contains(firstLine.Value(reader.Source()), closurePattern) {`
			`return Close`
			`}`
			`}`
			`if bytes.Contains(line, closurePattern) {`
			`htmlBlock.ClosureLine = segment`
			`reader.Advance(segment.Len() - 1)`
			`return Close`
			`}`

			`case ast.HTMLBlockType6, ast.HTMLBlockType7:`
			`if util.IsBlank(line) {`
			`return Close`
			`}`
			`}`
			`node.Lines().Append(segment)`
			`reader.Advance(segment.Len() - 1)`
			`return Continue \| NoChildren`
			`}`

			`func (b *htmlBlockParser) Close(node ast.Node, reader text.Reader, pc Context) {`
			`// nothing to do`
			`}`

			`func (b *htmlBlockParser) CanInterruptParagraph() bool {`
			`return true`
			`}`

			`func (b *htmlBlockParser) CanAcceptIndentedLine() bool {`
			`return false`
			`}`