diff --git a/go.mod b/go.mod index 2f81c9b16..5d22b8274 100644 --- a/go.mod +++ b/go.mod @@ -79,11 +79,9 @@ require ( github.com/prometheus/procfs v0.0.4 // indirect github.com/quasoft/websspi v1.0.0 github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001 // indirect - github.com/russross/blackfriday/v2 v2.0.1 github.com/satori/go.uuid v1.2.0 github.com/sergi/go-diff v1.0.0 github.com/shurcooL/httpfs v0.0.0-20190527155220-6a4d4a70508b // indirect - github.com/shurcooL/sanitized_anchor_name v1.0.0 // indirect github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd github.com/steveyen/gtreap v0.0.0-20150807155958-0abe01ef9be2 // indirect github.com/stretchr/testify v1.4.0 @@ -95,6 +93,7 @@ require ( github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141 github.com/urfave/cli v1.20.0 github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 + github.com/yuin/goldmark v1.1.19 go.etcd.io/bbolt v1.3.3 // indirect golang.org/x/crypto v0.0.0-20191227163750-53104e6ec876 golang.org/x/net v0.0.0-20191101175033-0deb6923b6d9 diff --git a/go.sum b/go.sum index a6f65167f..247630d47 100644 --- a/go.sum +++ b/go.sum @@ -462,16 +462,12 @@ github.com/remyoudompheng/bigfft v0.0.0-20190321074620-2f0d2b0e0001/go.mod h1:qq github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/russross/blackfriday v1.5.2 h1:HyvC0ARfnZBqnXwABFeSZHpKvJHJJfPz81GNueLj0oo= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= -github.com/russross/blackfriday/v2 v2.0.1 h1:lPqVAte+HuHNfhJ/0LC98ESWRz8afy9tM/0RK8m9o+Q= -github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shurcooL/httpfs v0.0.0-20190527155220-6a4d4a70508b h1:4kg1wyftSKxLtnPAvcRWakIPpokB9w780/KwrNLnfPA= github.com/shurcooL/httpfs v0.0.0-20190527155220-6a4d4a70508b/go.mod h1:ZY1cvUeJuFPAdZ/B6v7RHavJWZn2YPVFQ1OSXhCGOkg= -github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= -github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd h1:ug7PpSOB5RBPK1Kg6qskGBoP3Vnj/aNYFTznWvlkGo0= github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd/go.mod h1:TrYk7fJVaAttu97ZZKrO9UbRa8izdowaMIZcxYMbVaw= github.com/siddontang/go v0.0.0-20180604090527-bdc77568d726/go.mod h1:3yhqj7WBBfRhbBlzyOC3gUxftwsU0u8gqevxwIHQpMw= @@ -550,6 +546,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 h1:HsIQ6yAjfjQ3IxPGrTusxp6Qxn92gNVq2x5CbvQvx3w= github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53/go.mod h1:f6elajwZV+xceiaqgRL090YzLEDGSbqr3poGL3ZgXYo= +github.com/yuin/goldmark v1.1.19 h1:0s2/60x0XsFCXHeFut+F3azDVAAyIMyUfJRbRexiTYs= +github.com/yuin/goldmark v1.1.19/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs= github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0= go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= diff --git a/modules/markup/common/footnote.go b/modules/markup/common/footnote.go new file mode 100644 index 000000000..ad4cd7f2e --- /dev/null +++ b/modules/markup/common/footnote.go @@ -0,0 +1,507 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +package common + +import ( + "bytes" + "fmt" + "os" + "strconv" + "unicode" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// CleanValue will clean a value to make it safe to be an id +// This function is quite different from the original goldmark function +// and more closely matches the output from the shurcooL sanitizer +// In particular Unicode letters and numbers are a lot more than a-zA-Z0-9... +func CleanValue(value []byte) []byte { + value = bytes.TrimSpace(value) + rs := bytes.Runes(value) + result := make([]rune, 0, len(rs)) + needsDash := false + for _, r := range rs { + switch { + case unicode.IsLetter(r) || unicode.IsNumber(r): + if needsDash && len(result) > 0 { + result = append(result, '-') + } + needsDash = false + result = append(result, unicode.ToLower(r)) + default: + needsDash = true + } + } + return []byte(string(result)) +} + +// Most of what follows is a subtly changed version of github.com/yuin/goldmark/extension/footnote.go + +// A FootnoteLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteLink is a NodeKind of the FootnoteLink node. +var KindFootnoteLink = ast.NewNodeKind("GiteaFootnoteLink") + +// Kind implements Node.Kind. +func (n *FootnoteLink) Kind() ast.NodeKind { + return KindFootnoteLink +} + +// NewFootnoteLink returns a new FootnoteLink node. +func NewFootnoteLink(index int, name []byte) *FootnoteLink { + return &FootnoteLink{ + Index: index, + Name: name, + } +} + +// A FootnoteBackLink struct represents a link to a footnote of Markdown +// (PHP Markdown Extra) text. +type FootnoteBackLink struct { + ast.BaseInline + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *FootnoteBackLink) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteBackLink is a NodeKind of the FootnoteBackLink node. +var KindFootnoteBackLink = ast.NewNodeKind("GiteaFootnoteBackLink") + +// Kind implements Node.Kind. +func (n *FootnoteBackLink) Kind() ast.NodeKind { + return KindFootnoteBackLink +} + +// NewFootnoteBackLink returns a new FootnoteBackLink node. +func NewFootnoteBackLink(index int, name []byte) *FootnoteBackLink { + return &FootnoteBackLink{ + Index: index, + Name: name, + } +} + +// A Footnote struct represents a footnote of Markdown +// (PHP Markdown Extra) text. +type Footnote struct { + ast.BaseBlock + Ref []byte + Index int + Name []byte +} + +// Dump implements Node.Dump. +func (n *Footnote) Dump(source []byte, level int) { + m := map[string]string{} + m["Index"] = fmt.Sprintf("%v", n.Index) + m["Ref"] = fmt.Sprintf("%s", n.Ref) + m["Name"] = fmt.Sprintf("%v", n.Name) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnote is a NodeKind of the Footnote node. +var KindFootnote = ast.NewNodeKind("GiteaFootnote") + +// Kind implements Node.Kind. +func (n *Footnote) Kind() ast.NodeKind { + return KindFootnote +} + +// NewFootnote returns a new Footnote node. +func NewFootnote(ref []byte) *Footnote { + return &Footnote{ + Ref: ref, + Index: -1, + Name: ref, + } +} + +// A FootnoteList struct represents footnotes of Markdown +// (PHP Markdown Extra) text. +type FootnoteList struct { + ast.BaseBlock + Count int +} + +// Dump implements Node.Dump. +func (n *FootnoteList) Dump(source []byte, level int) { + m := map[string]string{} + m["Count"] = fmt.Sprintf("%v", n.Count) + ast.DumpHelper(n, source, level, m, nil) +} + +// KindFootnoteList is a NodeKind of the FootnoteList node. +var KindFootnoteList = ast.NewNodeKind("GiteaFootnoteList") + +// Kind implements Node.Kind. +func (n *FootnoteList) Kind() ast.NodeKind { + return KindFootnoteList +} + +// NewFootnoteList returns a new FootnoteList node. +func NewFootnoteList() *FootnoteList { + return &FootnoteList{ + Count: 0, + } +} + +var footnoteListKey = parser.NewContextKey() + +type footnoteBlockParser struct { +} + +var defaultFootnoteBlockParser = &footnoteBlockParser{} + +// NewFootnoteBlockParser returns a new parser.BlockParser that can parse +// footnotes of the Markdown(PHP Markdown Extra) text. +func NewFootnoteBlockParser() parser.BlockParser { + return defaultFootnoteBlockParser +} + +func (b *footnoteBlockParser) Trigger() []byte { + return []byte{'['} +} + +func (b *footnoteBlockParser) Open(parent ast.Node, reader text.Reader, pc parser.Context) (ast.Node, parser.State) { + line, segment := reader.PeekLine() + pos := pc.BlockOffset() + if pos < 0 || line[pos] != '[' { + return nil, parser.NoChildren + } + pos++ + if pos > len(line)-1 || line[pos] != '^' { + return nil, parser.NoChildren + } + open := pos + 1 + closes := 0 + closure := util.FindClosure(line[pos+1:], '[', ']', false, false) + closes = pos + 1 + closure + next := closes + 1 + if closure > -1 { + if next >= len(line) || line[next] != ':' { + return nil, parser.NoChildren + } + } else { + return nil, parser.NoChildren + } + padding := segment.Padding + label := reader.Value(text.NewSegment(segment.Start+open-padding, segment.Start+closes-padding)) + if util.IsBlank(label) { + return nil, parser.NoChildren + } + item := NewFootnote(label) + + pos = next + 1 - padding + if pos >= len(line) { + reader.Advance(pos) + return item, parser.NoChildren + } + reader.AdvanceAndSetPadding(pos, padding) + return item, parser.HasChildren +} + +func (b *footnoteBlockParser) Continue(node ast.Node, reader text.Reader, pc parser.Context) parser.State { + line, _ := reader.PeekLine() + if util.IsBlank(line) { + return parser.Continue | parser.HasChildren + } + childpos, padding := util.IndentPosition(line, reader.LineOffset(), 4) + if childpos < 0 { + return parser.Close + } + reader.AdvanceAndSetPadding(childpos, padding) + return parser.Continue | parser.HasChildren +} + +func (b *footnoteBlockParser) Close(node ast.Node, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + list = NewFootnoteList() + pc.Set(footnoteListKey, list) + node.Parent().InsertBefore(node.Parent(), node, list) + } + node.Parent().RemoveChild(node.Parent(), node) + list.AppendChild(list, node) +} + +func (b *footnoteBlockParser) CanInterruptParagraph() bool { + return true +} + +func (b *footnoteBlockParser) CanAcceptIndentedLine() bool { + return false +} + +type footnoteParser struct { +} + +var defaultFootnoteParser = &footnoteParser{} + +// NewFootnoteParser returns a new parser.InlineParser that can parse +// footnote links of the Markdown(PHP Markdown Extra) text. +func NewFootnoteParser() parser.InlineParser { + return defaultFootnoteParser +} + +func (s *footnoteParser) Trigger() []byte { + // footnote syntax probably conflict with the image syntax. + // So we need trigger this parser with '!'. + return []byte{'!', '['} +} + +func (s *footnoteParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + line, segment := block.PeekLine() + pos := 1 + if len(line) > 0 && line[0] == '!' { + pos++ + } + if pos >= len(line) || line[pos] != '^' { + return nil + } + pos++ + if pos >= len(line) { + return nil + } + open := pos + closure := util.FindClosure(line[pos:], '[', ']', false, false) + if closure < 0 { + return nil + } + closes := pos + closure + value := block.Value(text.NewSegment(segment.Start+open, segment.Start+closes)) + block.Advance(closes + 1) + + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } + if list == nil { + return nil + } + index := 0 + name := []byte{} + for def := list.FirstChild(); def != nil; def = def.NextSibling() { + d := def.(*Footnote) + if bytes.Equal(d.Ref, value) { + if d.Index < 0 { + list.Count++ + d.Index = list.Count + val := CleanValue(d.Name) + if len(val) == 0 { + val = []byte(strconv.Itoa(d.Index)) + } + d.Name = pc.IDs().Generate(val, KindFootnote) + } + index = d.Index + name = d.Name + break + } + } + if index == 0 { + return nil + } + + return NewFootnoteLink(index, name) +} + +type footnoteASTTransformer struct { +} + +var defaultFootnoteASTTransformer = &footnoteASTTransformer{} + +// NewFootnoteASTTransformer returns a new parser.ASTTransformer that +// insert a footnote list to the last of the document. +func NewFootnoteASTTransformer() parser.ASTTransformer { + return defaultFootnoteASTTransformer +} + +func (a *footnoteASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + var list *FootnoteList + if tlist := pc.Get(footnoteListKey); tlist != nil { + list = tlist.(*FootnoteList) + } else { + return + } + pc.Set(footnoteListKey, nil) + for footnote := list.FirstChild(); footnote != nil; { + var container ast.Node = footnote + next := footnote.NextSibling() + if fc := container.LastChild(); fc != nil && ast.IsParagraph(fc) { + container = fc + } + footnoteNode := footnote.(*Footnote) + index := footnoteNode.Index + name := footnoteNode.Name + if index < 0 { + list.RemoveChild(list, footnote) + } else { + container.AppendChild(container, NewFootnoteBackLink(index, name)) + } + footnote = next + } + list.SortChildren(func(n1, n2 ast.Node) int { + if n1.(*Footnote).Index < n2.(*Footnote).Index { + return -1 + } + return 1 + }) + if list.Count <= 0 { + list.Parent().RemoveChild(list.Parent(), list) + return + } + + node.AppendChild(node, list) +} + +// FootnoteHTMLRenderer is a renderer.NodeRenderer implementation that +// renders FootnoteLink nodes. +type FootnoteHTMLRenderer struct { + html.Config +} + +// NewFootnoteHTMLRenderer returns a new FootnoteHTMLRenderer. +func NewFootnoteHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &FootnoteHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *FootnoteHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(KindFootnoteLink, r.renderFootnoteLink) + reg.Register(KindFootnoteBackLink, r.renderFootnoteBackLink) + reg.Register(KindFootnote, r.renderFootnote) + reg.Register(KindFootnoteList, r.renderFootnoteList) +} + +func (r *FootnoteHTMLRenderer) renderFootnoteLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteLink) + n.Dump(source, 0) + is := strconv.Itoa(n.Index) + _, _ = w.WriteString(``) + _, _ = w.WriteString(is) + _, _ = w.WriteString(``) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteBackLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if entering { + n := node.(*FootnoteBackLink) + fmt.Fprintf(os.Stdout, "source:\n%s\n", string(n.Text(source))) + _, _ = w.WriteString(` `) + _, _ = w.WriteString("↩︎") + _, _ = w.WriteString(``) + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnote(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + n := node.(*Footnote) + if entering { + fmt.Fprintf(os.Stdout, "source:\n%s\n", string(n.Text(source))) + _, _ = w.WriteString(`
  • \n") + } else { + _, _ = w.WriteString("
  • \n") + } + return ast.WalkContinue, nil +} + +func (r *FootnoteHTMLRenderer) renderFootnoteList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + tag := "div" + if entering { + _, _ = w.WriteString("<") + _, _ = w.WriteString(tag) + _, _ = w.WriteString(` class="footnotes" role="doc-endnotes"`) + if node.Attributes() != nil { + html.RenderAttributes(w, node, html.GlobalAttributeFilter) + } + _ = w.WriteByte('>') + if r.Config.XHTML { + _, _ = w.WriteString("\n
    \n") + } else { + _, _ = w.WriteString("\n
    \n") + } + _, _ = w.WriteString("
      \n") + } else { + _, _ = w.WriteString("
    \n") + _, _ = w.WriteString("\n") + } + return ast.WalkContinue, nil +} + +type footnoteExtension struct{} + +// FootnoteExtension represents the Gitea Footnote +var FootnoteExtension = &footnoteExtension{} + +// Extend extends the markdown converter with the Gitea Footnote parser +func (e *footnoteExtension) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithBlockParsers( + util.Prioritized(NewFootnoteBlockParser(), 999), + ), + parser.WithInlineParsers( + util.Prioritized(NewFootnoteParser(), 101), + ), + parser.WithASTTransformers( + util.Prioritized(NewFootnoteASTTransformer(), 999), + ), + ) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(NewFootnoteHTMLRenderer(), 500), + )) +} diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go new file mode 100644 index 000000000..3a47686f1 --- /dev/null +++ b/modules/markup/common/html.go @@ -0,0 +1,19 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package common + +import ( + "mvdan.cc/xurls/v2" +) + +var ( + // NOTE: All below regex matching do not perform any extra validation. + // Thus a link is produced even if the linked entity does not exist. + // While fast, this is also incorrect and lead to false positives. + // TODO: fix invalid linking issue + + // LinkRegex is a regexp matching a valid link + LinkRegex, _ = xurls.StrictMatchingScheme("https?://") +) diff --git a/modules/markup/common/linkify.go b/modules/markup/common/linkify.go new file mode 100644 index 000000000..6ae70fba3 --- /dev/null +++ b/modules/markup/common/linkify.go @@ -0,0 +1,156 @@ +// Copyright 2019 Yusuke Inuzuka +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +// Most of this file is a subtly changed version of github.com/yuin/goldmark/extension/linkify.go + +package common + +import ( + "bytes" + "regexp" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) + +type linkifyParser struct { +} + +var defaultLinkifyParser = &linkifyParser{} + +// NewLinkifyParser return a new InlineParser can parse +// text that seems like a URL. +func NewLinkifyParser() parser.InlineParser { + return defaultLinkifyParser +} + +func (s *linkifyParser) Trigger() []byte { + // ' ' indicates any white spaces and a line head + return []byte{' ', '*', '_', '~', '('} +} + +var protoHTTP = []byte("http:") +var protoHTTPS = []byte("https:") +var protoFTP = []byte("ftp:") +var domainWWW = []byte("www.") + +func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node { + if pc.IsInLinkLabel() { + return nil + } + line, segment := block.PeekLine() + consumes := 0 + start := segment.Start + c := line[0] + // advance if current position is not a line head. + if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' { + consumes++ + start++ + line = line[1:] + } + + var m []int + var protocol []byte + var typ ast.AutoLinkType = ast.AutoLinkURL + if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { + m = LinkRegex.FindSubmatchIndex(line) + } + if m == nil && bytes.HasPrefix(line, domainWWW) { + m = wwwURLRegxp.FindSubmatchIndex(line) + protocol = []byte("http") + } + if m != nil { + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } else if lastChar == ')' { + closing := 0 + for i := m[1] - 1; i >= m[0]; i-- { + if line[i] == ')' { + closing++ + } else if line[i] == '(' { + closing-- + } + } + if closing > 0 { + m[1] -= closing + } + } else if lastChar == ';' { + i := m[1] - 2 + for ; i >= m[0]; i-- { + if util.IsAlphaNumeric(line[i]) { + continue + } + break + } + if i != m[1]-2 { + if line[i] == '&' { + m[1] -= m[1] - i + } + } + } + } + if m == nil { + if len(line) > 0 && util.IsPunct(line[0]) { + return nil + } + typ = ast.AutoLinkEmail + stop := util.FindEmailIndex(line) + if stop < 0 { + return nil + } + at := bytes.IndexByte(line, '@') + m = []int{0, stop, at, stop - 1} + if m == nil || bytes.IndexByte(line[m[2]:m[3]], '.') < 0 { + return nil + } + lastChar := line[m[1]-1] + if lastChar == '.' { + m[1]-- + } + if m[1] < len(line) { + nextChar := line[m[1]] + if nextChar == '-' || nextChar == '_' { + return nil + } + } + } + if m == nil { + return nil + } + if consumes != 0 { + s := segment.WithStop(segment.Start + 1) + ast.MergeOrAppendTextSegment(parent, s) + } + consumes += m[1] + block.Advance(consumes) + n := ast.NewTextSegment(text.NewSegment(start, start+m[1])) + link := ast.NewAutoLink(typ, n) + link.Protocol = protocol + return link +} + +func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) { + // nothing to do +} + +type linkify struct { +} + +// Linkify is an extension that allow you to parse text that seems like a URL. +var Linkify = &linkify{} + +func (e *linkify) Extend(m goldmark.Markdown) { + m.Parser().AddOptions( + parser.WithInlineParsers( + util.Prioritized(NewLinkifyParser(), 999), + ), + ) +} diff --git a/modules/markup/html.go b/modules/markup/html.go index b10da40fc..2c6773bce 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/references" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" @@ -57,8 +58,6 @@ var ( // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))") - linkRegex, _ = xurls.StrictMatchingScheme("https?://") - // blackfriday extensions create IDs like fn:user-content-footnote blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) ) @@ -118,7 +117,7 @@ func CustomLinkURLSchemes(schemes []string) { } withAuth = append(withAuth, s) } - linkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) + common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) } // IsSameDomain checks if given url string has the same hostname as current Gitea instance @@ -509,6 +508,12 @@ func shortLinkProcessorFull(ctx *postProcessCtx, node *html.Node, noLink bool) { (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { const lenQuote = len("‘") val = val[lenQuote : len(val)-lenQuote] + } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || + (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { + val = val[1 : len(val)-1] + } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { + const lenQuote = len("‘") + val = val[1 : len(val)-lenQuote] } props[key] = val } @@ -803,7 +808,7 @@ func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) { // linkProcessor creates links for any HTTP or HTTPS URL not captured by // markdown. func linkProcessor(ctx *postProcessCtx, node *html.Node) { - m := linkRegex.FindStringIndex(node.Data) + m := common.LinkRegex.FindStringIndex(node.Data) if m == nil { return } @@ -832,7 +837,7 @@ func genDefaultLinkProcessor(defaultLink string) processor { // descriptionLinkProcessor creates links for DescriptionHTML func descriptionLinkProcessor(ctx *postProcessCtx, node *html.Node) { - m := linkRegex.FindStringIndex(node.Data) + m := common.LinkRegex.FindStringIndex(node.Data) if m == nil { return } diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 07747e97e..91ef320b4 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -323,6 +323,6 @@ func TestRender_ShortLinks(t *testing.T) { `

    `) test( "

    [[foobar]]

    ", - `

    [[foobar]]

    `, - `

    [[foobar]]

    `) + `

    [[foobar]]

    `, + `

    [[foobar]]

    `) } diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go new file mode 100644 index 000000000..2a2a9dce6 --- /dev/null +++ b/modules/markup/markdown/goldmark.go @@ -0,0 +1,178 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package markdown + +import ( + "bytes" + "fmt" + "strings" + + "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" + giteautil "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" + east "github.com/yuin/goldmark/extension/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +var byteMailto = []byte("mailto:") + +// GiteaASTTransformer is a default transformer of the goldmark tree. +type GiteaASTTransformer struct{} + +// Transform transforms the given AST tree. +func (g *GiteaASTTransformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + _ = ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + switch v := n.(type) { + case *ast.Image: + // Images need two things: + // + // 1. Their src needs to munged to be a real value + // 2. If they're not wrapped with a link they need a link wrapper + + // Check if the destination is a real link + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) { + prefix := pc.Get(urlPrefixKey).(string) + if pc.Get(isWikiKey).(bool) { + prefix = giteautil.URLJoin(prefix, "wiki", "raw") + } + prefix = strings.Replace(prefix, "/src/", "/media/", 1) + + lnk := string(link) + lnk = giteautil.URLJoin(prefix, lnk) + lnk = strings.Replace(lnk, " ", "+", -1) + link = []byte(lnk) + } + v.Destination = link + + parent := n.Parent() + // Create a link around image only if parent is not already a link + if _, ok := parent.(*ast.Link); !ok && parent != nil { + wrap := ast.NewLink() + wrap.Destination = link + wrap.Title = v.Title + parent.ReplaceChild(parent, n, wrap) + wrap.AppendChild(wrap, n) + } + case *ast.Link: + // Links need their href to munged to be a real value + link := v.Destination + if len(link) > 0 && !markup.IsLink(link) && + link[0] != '#' && !bytes.HasPrefix(link, byteMailto) { + // special case: this is not a link, a hash link or a mailto:, so it's a + // relative URL + lnk := string(link) + if pc.Get(isWikiKey).(bool) { + lnk = giteautil.URLJoin("wiki", lnk) + } + link = []byte(giteautil.URLJoin(pc.Get(urlPrefixKey).(string), lnk)) + } + v.Destination = link + } + return ast.WalkContinue, nil + }) +} + +type prefixedIDs struct { + values map[string]bool +} + +// Generate generates a new element id. +func (p *prefixedIDs) Generate(value []byte, kind ast.NodeKind) []byte { + dft := []byte("id") + if kind == ast.KindHeading { + dft = []byte("heading") + } + return p.GenerateWithDefault(value, dft) +} + +// Generate generates a new element id. +func (p *prefixedIDs) GenerateWithDefault(value []byte, dft []byte) []byte { + result := common.CleanValue(value) + if len(result) == 0 { + result = dft + } + if !bytes.HasPrefix(result, []byte("user-content-")) { + result = append([]byte("user-content-"), result...) + } + if _, ok := p.values[util.BytesToReadOnlyString(result)]; !ok { + p.values[util.BytesToReadOnlyString(result)] = true + return result + } + for i := 1; ; i++ { + newResult := fmt.Sprintf("%s-%d", result, i) + if _, ok := p.values[newResult]; !ok { + p.values[newResult] = true + return []byte(newResult) + } + } +} + +// Put puts a given element id to the used ids table. +func (p *prefixedIDs) Put(value []byte) { + p.values[util.BytesToReadOnlyString(value)] = true +} + +func newPrefixedIDs() *prefixedIDs { + return &prefixedIDs{ + values: map[string]bool{}, + } +} + +// NewTaskCheckBoxHTMLRenderer creates a TaskCheckBoxHTMLRenderer to render tasklists +// in the gitea form. +func NewTaskCheckBoxHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { + r := &TaskCheckBoxHTMLRenderer{ + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// TaskCheckBoxHTMLRenderer is a renderer.NodeRenderer implementation that +// renders checkboxes in list items. +// Overrides the default goldmark one to present the gitea format +type TaskCheckBoxHTMLRenderer struct { + html.Config +} + +// RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. +func (r *TaskCheckBoxHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(east.KindTaskCheckBox, r.renderTaskCheckBox) +} + +func (r *TaskCheckBoxHTMLRenderer) renderTaskCheckBox(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + n := node.(*east.TaskCheckBox) + + end := ">" + if r.XHTML { + end = " />" + } + var err error + if n.IsChecked { + _, err = w.WriteString(``) + } else { + _, err = w.WriteString(``) + } + if err != nil { + return ast.WalkStop, err + } + return ast.WalkContinue, nil +} diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index f1e44a8fb..5230fca4d 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -7,161 +7,83 @@ package markdown import ( "bytes" - "io" - "strings" + "sync" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" + giteautil "code.gitea.io/gitea/modules/util" - "github.com/russross/blackfriday/v2" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" ) -// Renderer is a extended version of underlying render object. -type Renderer struct { - blackfriday.Renderer - URLPrefix string - IsWiki bool +var converter goldmark.Markdown +var once = sync.Once{} + +var urlPrefixKey = parser.NewContextKey() +var isWikiKey = parser.NewContextKey() + +// NewGiteaParseContext creates a parser.Context with the gitea context set +func NewGiteaParseContext(urlPrefix string, isWiki bool) parser.Context { + pc := parser.NewContext(parser.WithIDs(newPrefixedIDs())) + pc.Set(urlPrefixKey, urlPrefix) + pc.Set(isWikiKey, isWiki) + return pc } -var byteMailto = []byte("mailto:") - -var htmlEscaper = [256][]byte{ - '&': []byte("&"), - '<': []byte("<"), - '>': []byte(">"), - '"': []byte("""), -} - -func escapeHTML(w io.Writer, s []byte) { - var start, end int - for end < len(s) { - escSeq := htmlEscaper[s[end]] - if escSeq != nil { - _, _ = w.Write(s[start:end]) - _, _ = w.Write(escSeq) - start = end + 1 - } - end++ - } - if start < len(s) && end <= len(s) { - _, _ = w.Write(s[start:end]) - } -} - -// RenderNode is a default renderer of a single node of a syntax tree. For -// block nodes it will be called twice: first time with entering=true, second -// time with entering=false, so that it could know when it's working on an open -// tag and when on close. It writes the result to w. -// -// The return value is a way to tell the calling walker to adjust its walk -// pattern: e.g. it can terminate the traversal by returning Terminate. Or it -// can ask the walker to skip a subtree of this node by returning SkipChildren. -// The typical behavior is to return GoToNext, which asks for the usual -// traversal to the next node. -func (r *Renderer) RenderNode(w io.Writer, node *blackfriday.Node, entering bool) blackfriday.WalkStatus { - switch node.Type { - case blackfriday.Image: - prefix := r.URLPrefix - if r.IsWiki { - prefix = util.URLJoin(prefix, "wiki", "raw") - } - prefix = strings.Replace(prefix, "/src/", "/media/", 1) - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) { - lnk := string(link) - lnk = util.URLJoin(prefix, lnk) - lnk = strings.Replace(lnk, " ", "+", -1) - link = []byte(lnk) - } - node.LinkData.Destination = link - // Render link around image only if parent is not link already - if node.Parent != nil && node.Parent.Type != blackfriday.Link { - if entering { - _, _ = w.Write([]byte(``)) - return r.Renderer.RenderNode(w, node, entering) - } - s := r.Renderer.RenderNode(w, node, entering) - _, _ = w.Write([]byte(``)) - return s - } - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Link: - // special case: this is not a link, a hash link or a mailto:, so it's a - // relative URL - link := node.LinkData.Destination - if len(link) > 0 && !markup.IsLink(link) && - link[0] != '#' && !bytes.HasPrefix(link, byteMailto) && - node.LinkData.Footnote == nil { - lnk := string(link) - if r.IsWiki { - lnk = util.URLJoin("wiki", lnk) - } - link = []byte(util.URLJoin(r.URLPrefix, lnk)) - } - node.LinkData.Destination = link - return r.Renderer.RenderNode(w, node, entering) - case blackfriday.Text: - isListItem := false - for n := node.Parent; n != nil; n = n.Parent { - if n.Type == blackfriday.Item { - isListItem = true - break - } - } - if isListItem { - text := node.Literal - switch { - case bytes.HasPrefix(text, []byte("[ ] ")): - _, _ = w.Write([]byte(``)) - text = text[3:] - case bytes.HasPrefix(text, []byte("[x] ")): - _, _ = w.Write([]byte(``)) - text = text[3:] - } - node.Literal = text - } - } - return r.Renderer.RenderNode(w, node, entering) -} - -const ( - blackfridayExtensions = 0 | - blackfriday.NoIntraEmphasis | - blackfriday.Tables | - blackfriday.FencedCode | - blackfriday.Strikethrough | - blackfriday.NoEmptyLineBeforeBlock | - blackfriday.DefinitionLists | - blackfriday.Footnotes | - blackfriday.HeadingIDs | - blackfriday.AutoHeadingIDs - blackfridayHTMLFlags = 0 | - blackfriday.Smartypants -) - // RenderRaw renders Markdown to HTML without handling special links. func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte { - renderer := &Renderer{ - Renderer: blackfriday.NewHTMLRenderer(blackfriday.HTMLRendererParameters{ - Flags: blackfridayHTMLFlags, - FootnoteAnchorPrefix: "user-content-", - HeadingIDPrefix: "user-content-", - }), - URLPrefix: urlPrefix, - IsWiki: wikiMarkdown, + once.Do(func() { + converter = goldmark.New( + goldmark.WithExtensions(extension.Table, + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + extension.NewTypographer( + extension.WithTypographicSubstitutions(extension.TypographicSubstitutions{ + extension.EnDash: nil, + extension.EmDash: nil, + }), + ), + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + parser.WithASTTransformers( + util.Prioritized(&GiteaASTTransformer{}, 10000), + ), + ), + goldmark.WithRendererOptions( + html.WithUnsafe(), + ), + ) + + // Override the original Tasklist renderer! + converter.Renderer().AddOptions( + renderer.WithNodeRenderers( + util.Prioritized(NewTaskCheckBoxHTMLRenderer(), 1000), + ), + ) + + if setting.Markdown.EnableHardLineBreak { + converter.Renderer().AddOptions(html.WithHardWraps()) + } + }) + + pc := NewGiteaParseContext(urlPrefix, wikiMarkdown) + var buf bytes.Buffer + if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil { + log.Error("Unable to render: %v", err) } - exts := blackfridayExtensions - if setting.Markdown.EnableHardLineBreak { - exts |= blackfriday.HardLineBreak - } - - // Need to normalize EOL to UNIX LF to have consistent results in rendering - body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts)) - return markup.SanitizeBytes(body) + return markup.SanitizeReader(&buf).Bytes() } var ( @@ -174,8 +96,7 @@ func init() { } // Parser implements markup.Parser -type Parser struct { -} +type Parser struct{} // Name implements markup.Parser func (Parser) Name() string { diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index e3156a657..53772ee44 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -98,16 +98,12 @@ func TestRender_Images(t *testing.T) { func testAnswers(baseURLContent, baseURLImages string) []string { return []string{ `

    Wiki! Enjoy :)

    - -

    See commit 65f1bf27bc

    -

    Ideas and codes

    - `, `

    What is Wine Staging?

    -

    Wine Staging on website wine-staging.com.

    - -

    Here are some links to the most important topics. You can find the full list of pages at the sidebar.

    - @@ -131,7 +123,6 @@ func testAnswers(baseURLContent, baseURLImages string) []string { - @@ -141,20 +132,15 @@ func testAnswers(baseURLContent, baseURLImages string) []string {
    Installation
    images/icon-usage.png
    `, `

    Excelsior JET allows you to create native executables for Windows, Linux and Mac OS X.

    -
    1. Package your libGDX application images/1.png
    2. Perform a test run by hitting the Run! button. images/2.png
    -

    More tests

    -

    (from https://www.markdownguide.org/extended-syntax/)

    -

    Definition list

    -
    First Term
    This is the definition of the first term.
    @@ -162,27 +148,21 @@ func testAnswers(baseURLContent, baseURLImages string) []string {
    This is one definition of the second term.
    This is another definition of the second term.
    -

    Footnotes

    -

    Here is a simple footnote,1 and here is a longer one.2

    -
    -
    -
      -
    1. This is the first footnote.
    2. - -
    3. Here is one with multiple paragraphs and code.

      - +
    4. +

      This is the first footnote. ↩︎

      +
    5. +
    6. +

      Here is one with multiple paragraphs and code.

      Indent paragraphs to include them in the footnote.

      -

      { my code }

      - -

      Add as many paragraphs as you like.

    7. +

      Add as many paragraphs as you like. ↩︎

      +
    -
    `, } @@ -299,15 +279,15 @@ func TestRender_RenderParagraphs(t *testing.T) { test := func(t *testing.T, str string, cnt int) { unix := []byte(str) res := string(RenderRaw(unix, "", false)) - assert.Equal(t, strings.Count(res, "