parent
74433c91bf
commit
15c7738b3a
2
go.mod
2
go.mod
|
@ -96,7 +96,7 @@ require (
|
|||
github.com/unknwon/paginater v0.0.0-20151104151617-7748a72e0141
|
||||
github.com/urfave/cli v1.20.0
|
||||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53
|
||||
github.com/yuin/goldmark v1.1.19
|
||||
github.com/yuin/goldmark v1.1.23
|
||||
go.etcd.io/bbolt v1.3.3 // indirect
|
||||
golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d
|
||||
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa
|
||||
|
|
4
go.sum
4
go.sum
|
@ -574,8 +574,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q
|
|||
github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
|
||||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53 h1:HsIQ6yAjfjQ3IxPGrTusxp6Qxn92gNVq2x5CbvQvx3w=
|
||||
github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53/go.mod h1:f6elajwZV+xceiaqgRL090YzLEDGSbqr3poGL3ZgXYo=
|
||||
github.com/yuin/goldmark v1.1.19 h1:0s2/60x0XsFCXHeFut+F3azDVAAyIMyUfJRbRexiTYs=
|
||||
github.com/yuin/goldmark v1.1.19/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.1.23 h1:eTodJ8hwEUvwXhb9qxQNuL/q1d+xMQClrXR4mdvV7gs=
|
||||
github.com/yuin/goldmark v1.1.23/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/ziutek/mymysql v1.5.4 h1:GB0qdRGsTwQSBVYuVShFBKaXSnSnYYC2d9knnE1LHFs=
|
||||
github.com/ziutek/mymysql v1.5.4/go.mod h1:LMSpPZ6DbqWFxNCHW77HeMg9I646SAhApZ/wKdgO/C0=
|
||||
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
|
||||
|
|
|
@ -6,48 +6,48 @@ goldmark
|
|||
[![https://coveralls.io/github/yuin/goldmark](https://coveralls.io/repos/github/yuin/goldmark/badge.svg?branch=master)](https://coveralls.io/github/yuin/goldmark)
|
||||
[![https://goreportcard.com/report/github.com/yuin/goldmark](https://goreportcard.com/badge/github.com/yuin/goldmark)](https://goreportcard.com/report/github.com/yuin/goldmark)
|
||||
|
||||
> A Markdown parser written in Go. Easy to extend, standard compliant, well structured.
|
||||
> A Markdown parser written in Go. Easy to extend, standards-compliant, well-structured.
|
||||
|
||||
goldmark is compliant with CommonMark 0.29.
|
||||
|
||||
Motivation
|
||||
----------------------
|
||||
I need a Markdown parser for Go that meets following conditions:
|
||||
I needed a Markdown parser for Go that satisfies the following requirements:
|
||||
|
||||
- Easy to extend.
|
||||
- Markdown is poor in document expressions compared with other light markup languages like reStructuredText.
|
||||
- Markdown is poor in document expressions compared to other light markup languages such as reStructuredText.
|
||||
- We have extensions to the Markdown syntax, e.g. PHP Markdown Extra, GitHub Flavored Markdown.
|
||||
- Standard compliant.
|
||||
- Standards-compliant.
|
||||
- Markdown has many dialects.
|
||||
- GitHub Flavored Markdown is widely used and it is based on CommonMark aside from whether CommonMark is good specification or not.
|
||||
- CommonMark is too complicated and hard to implement.
|
||||
- Well structured.
|
||||
- AST based, and preserves source position of nodes.
|
||||
- GitHub-Flavored Markdown is widely used and is based upon CommonMark, effectively mooting the question of whether or not CommonMark is an ideal specification.
|
||||
- CommonMark is complicated and hard to implement.
|
||||
- Well-structured.
|
||||
- AST-based; preserves source position of nodes.
|
||||
- Written in pure Go.
|
||||
|
||||
[golang-commonmark](https://gitlab.com/golang-commonmark/markdown) may be a good choice, but it seems to be a copy of [markdown-it](https://github.com/markdown-it).
|
||||
|
||||
[blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely used implementation, but it is not CommonMark compliant and cannot be extended from outside of the package since its AST uses structs instead of interfaces.
|
||||
[blackfriday.v2](https://github.com/russross/blackfriday/tree/v2) is a fast and widely-used implementation, but is not CommonMark-compliant and cannot be extended from outside of the package, since its AST uses structs instead of interfaces.
|
||||
|
||||
Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: ([Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc).
|
||||
Furthermore, its behavior differs from other implementations in some cases, especially regarding lists: [Deep nested lists don't output correctly #329](https://github.com/russross/blackfriday/issues/329), [List block cannot have a second line #244](https://github.com/russross/blackfriday/issues/244), etc.
|
||||
|
||||
This behavior sometimes causes problems. If you migrate your Markdown text to blackfriday-based wikis from GitHub, many lists will immediately be broken.
|
||||
This behavior sometimes causes problems. If you migrate your Markdown text from GitHub to blackfriday-based wikis, many lists will immediately be broken.
|
||||
|
||||
As mentioned above, CommonMark is too complicated and hard to implement, so Markdown parsers based on CommonMark barely exist.
|
||||
As mentioned above, CommonMark is complicated and hard to implement, so Markdown parsers based on CommonMark are few and far between.
|
||||
|
||||
Features
|
||||
----------------------
|
||||
|
||||
- **Standard compliant.** goldmark gets full compliance with the latest CommonMark spec.
|
||||
- **Standards-compliant.** goldmark is fully compliant with the latest [CommonMark](https://commonmark.org/) specification.
|
||||
- **Extensible.** Do you want to add a `@username` mention syntax to Markdown?
|
||||
You can easily do it in goldmark. You can add your AST nodes,
|
||||
parsers for block level elements, parsers for inline level elements,
|
||||
transformers for paragraphs, transformers for whole AST structure, and
|
||||
You can easily do so in goldmark. You can add your AST nodes,
|
||||
parsers for block-level elements, parsers for inline-level elements,
|
||||
transformers for paragraphs, transformers for the whole AST structure, and
|
||||
renderers.
|
||||
- **Performance.** goldmark performs pretty much equally to cmark,
|
||||
- **Performance.** goldmark's performance is on par with that of cmark,
|
||||
the CommonMark reference implementation written in C.
|
||||
- **Robust.** goldmark is tested with [go-fuzz](https://github.com/dvyukov/go-fuzz), a fuzz testing tool.
|
||||
- **Builtin extensions.** goldmark ships with common extensions like tables, strikethrough,
|
||||
- **Built-in extensions.** goldmark ships with common extensions like tables, strikethrough,
|
||||
task lists, and definition lists.
|
||||
- **Depends only on standard libraries.**
|
||||
|
||||
|
@ -62,7 +62,7 @@ Usage
|
|||
----------------------
|
||||
Import packages:
|
||||
|
||||
```
|
||||
```go
|
||||
import (
|
||||
"bytes"
|
||||
"github.com/yuin/goldmark"
|
||||
|
@ -70,7 +70,7 @@ import (
|
|||
```
|
||||
|
||||
|
||||
Convert Markdown documents with the CommonMark compliant mode:
|
||||
Convert Markdown documents with the CommonMark-compliant mode:
|
||||
|
||||
```go
|
||||
var buf bytes.Buffer
|
||||
|
@ -128,6 +128,14 @@ if err := md.Convert(source, &buf); err != nil {
|
|||
}
|
||||
```
|
||||
|
||||
| Functional option | Type | Description |
|
||||
| ----------------- | ---- | ----------- |
|
||||
| `goldmark.WithParser` | `parser.Parser` | This option must be passed before `goldmark.WithParserOptions` and `goldmark.WithExtensions` |
|
||||
| `goldmark.WithRenderer` | `renderer.Renderer` | This option must be passed before `goldmark.WithRendererOptions` and `goldmark.WithExtensions` |
|
||||
| `goldmark.WithParserOptions` | `...parser.Option` | |
|
||||
| `goldmark.WithRendererOptions` | `...renderer.Option` | |
|
||||
| `goldmark.WithExtensions` | `...goldmark.Extender` | |
|
||||
|
||||
Parser and Renderer options
|
||||
------------------------------
|
||||
|
||||
|
@ -149,7 +157,7 @@ Parser and Renderer options
|
|||
| `html.WithWriter` | `html.Writer` | `html.Writer` for writing contents to an `io.Writer`. |
|
||||
| `html.WithHardWraps` | `-` | Render newlines as `<br>`.|
|
||||
| `html.WithXHTML` | `-` | Render as XHTML. |
|
||||
| `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML and potentially dangerous links. With this option, goldmark renders these contents as written. |
|
||||
| `html.WithUnsafe` | `-` | By default, goldmark does not render raw HTML or potentially dangerous links. With this option, goldmark renders such content as written. |
|
||||
|
||||
### Built-in extensions
|
||||
|
||||
|
@ -164,7 +172,7 @@ Parser and Renderer options
|
|||
- `extension.GFM`
|
||||
- This extension enables Table, Strikethrough, Linkify and TaskList.
|
||||
- This extension does not filter tags defined in [6.11: Disallowed Raw HTML (extension)](https://github.github.com/gfm/#disallowed-raw-html-extension-).
|
||||
If you need to filter HTML tags, see [Security](#security)
|
||||
If you need to filter HTML tags, see [Security](#security).
|
||||
- `extension.DefinitionList`
|
||||
- [PHP Markdown Extra: Definition lists](https://michelf.ca/projects/php-markdown/extra/#def-list)
|
||||
- `extension.Footnote`
|
||||
|
@ -173,7 +181,7 @@ Parser and Renderer options
|
|||
- This extension substitutes punctuations with typographic entities like [smartypants](https://daringfireball.net/projects/smartypants/).
|
||||
|
||||
### Attributes
|
||||
`parser.WithAttribute` option allows you to define attributes on some elements.
|
||||
The `parser.WithAttribute` option allows you to define attributes on some elements.
|
||||
|
||||
Currently only headings support attributes.
|
||||
|
||||
|
@ -197,7 +205,7 @@ heading {#id .className attrName=attrValue}
|
|||
|
||||
### Typographer extension
|
||||
|
||||
Typographer extension translates plain ASCII punctuation characters into typographic punctuation HTML entities.
|
||||
The Typographer extension translates plain ASCII punctuation characters into typographic-punctuation HTML entities.
|
||||
|
||||
Default substitutions are:
|
||||
|
||||
|
@ -211,7 +219,7 @@ Default substitutions are:
|
|||
| `<<` | `«` |
|
||||
| `>>` | `»` |
|
||||
|
||||
You can overwrite the substitutions by `extensions.WithTypographicSubstitutions`.
|
||||
You can override the defualt substitutions via `extensions.WithTypographicSubstitutions`:
|
||||
|
||||
```go
|
||||
markdown := goldmark.New(
|
||||
|
@ -226,10 +234,50 @@ markdown := goldmark.New(
|
|||
)
|
||||
```
|
||||
|
||||
### Linkify extension
|
||||
|
||||
The Linkify extension implements [Autolinks(extension)](https://github.github.com/gfm/#autolinks-extension-), as
|
||||
defined in [GitHub Flavored Markdown Spec](https://github.github.com/gfm/).
|
||||
|
||||
Since the spec does not define details about URLs, there are numerous ambiguous cases.
|
||||
|
||||
You can override autolinking patterns via options.
|
||||
|
||||
| Functional option | Type | Description |
|
||||
| ----------------- | ---- | ----------- |
|
||||
| `extension.WithLinkifyAllowedProtocols` | `[][]byte` | List of allowed protocols such as `[][]byte{ []byte("http:") }` |
|
||||
| `extension.WithLinkifyURLRegexp` | `*regexp.Regexp` | Regexp that defines URLs, including protocols |
|
||||
| `extension.WithLinkifyWWWRegexp` | `*regexp.Regexp` | Regexp that defines URL starting with `www.`. This pattern corresponds to [the extended www autolink](https://github.github.com/gfm/#extended-www-autolink) |
|
||||
| `extension.WithLinkifyEmailRegexp` | `*regexp.Regexp` | Regexp that defines email addresses` |
|
||||
|
||||
Example, using [xurls](https://github.com/mvdan/xurls):
|
||||
|
||||
```go
|
||||
import "mvdan.cc/xurls/v2"
|
||||
|
||||
markdown := goldmark.New(
|
||||
goldmark.WithRendererOptions(
|
||||
html.WithXHTML(),
|
||||
html.WithUnsafe(),
|
||||
),
|
||||
goldmark.WithExtensions(
|
||||
extension.NewLinkify(
|
||||
extension.WithLinkifyAllowedProtocols([][]byte{
|
||||
[]byte("http:"),
|
||||
[]byte("https:"),
|
||||
}),
|
||||
extension.WithLinkifyURLRegexp(
|
||||
xurls.Strict(),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
Security
|
||||
--------------------
|
||||
By default, goldmark does not render raw HTML and potentially dangerous URLs.
|
||||
If you need to gain more control over untrusted contents, it is recommended to
|
||||
By default, goldmark does not render raw HTML or potentially-dangerous URLs.
|
||||
If you need to gain more control over untrusted contents, it is recommended that you
|
||||
use an HTML sanitizer such as [bluemonday](https://github.com/microcosm-cc/bluemonday).
|
||||
|
||||
Benchmark
|
||||
|
@ -238,11 +286,10 @@ You can run this benchmark in the `_benchmark` directory.
|
|||
|
||||
### against other golang libraries
|
||||
|
||||
blackfriday v2 seems to be fastest, but it is not CommonMark compliant, so the performance of
|
||||
blackfriday v2 cannot simply be compared with that of the other CommonMark compliant libraries.
|
||||
blackfriday v2 seems to be the fastest, but as it is not CommonMark compliant, its performance cannot be directly compared to that of the CommonMark-compliant libraries.
|
||||
|
||||
Though goldmark builds clean extensible AST structure and get full compliance with
|
||||
CommonMark, it is reasonably fast and has lower memory consumption.
|
||||
goldmark, meanwhile, builds a clean, extensible AST structure, achieves full compliance with
|
||||
CommonMark, and consumes less memory, all while being reasonably fast.
|
||||
|
||||
```
|
||||
goos: darwin
|
||||
|
@ -268,21 +315,21 @@ iteration: 50
|
|||
average: 0.0040964230 sec
|
||||
```
|
||||
|
||||
As you can see, goldmark performs pretty much equally to cmark.
|
||||
As you can see, goldmark's performance is on par with cmark's.
|
||||
|
||||
Extensions
|
||||
--------------------
|
||||
|
||||
- [goldmark-meta](https://github.com/yuin/goldmark-meta): A YAML metadata
|
||||
extension for the goldmark Markdown parser.
|
||||
- [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A Syntax highlighting extension
|
||||
- [goldmark-highlighting](https://github.com/yuin/goldmark-highlighting): A syntax-highlighting extension
|
||||
for the goldmark markdown parser.
|
||||
- [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for goldmark markdown parser
|
||||
- [goldmark-mathjax](https://github.com/litao91/goldmark-mathjax): Mathjax support for the goldmark markdown parser
|
||||
|
||||
goldmark internal(for extension developers)
|
||||
----------------------------------------------
|
||||
### Overview
|
||||
goldmark's Markdown processing is outlined as a bellow diagram.
|
||||
goldmark's Markdown processing is outlined in the diagram below.
|
||||
|
||||
```
|
||||
<Markdown in []byte, parser.Context>
|
||||
|
@ -313,10 +360,11 @@ goldmark's Markdown processing is outlined as a bellow diagram.
|
|||
### Parsing
|
||||
Markdown documents are read through `text.Reader` interface.
|
||||
|
||||
AST nodes do not have concrete text. AST nodes have segment information of the documents. It is represented by `text.Segment` .
|
||||
AST nodes do not have concrete text. AST nodes have segment information of the documents, represented by `text.Segment` .
|
||||
|
||||
`text.Segment` has 3 attributes: `Start`, `End`, `Padding` .
|
||||
|
||||
(TBC)
|
||||
|
||||
**TODO**
|
||||
|
||||
|
|
|
@ -236,10 +236,12 @@ func (n *BaseNode) RemoveChild(self, v Node) {
|
|||
|
||||
// RemoveChildren implements Node.RemoveChildren .
|
||||
func (n *BaseNode) RemoveChildren(self Node) {
|
||||
for c := n.firstChild; c != nil; c = c.NextSibling() {
|
||||
for c := n.firstChild; c != nil; {
|
||||
c.SetParent(nil)
|
||||
c.SetPreviousSibling(nil)
|
||||
next := c.NextSibling()
|
||||
c.SetNextSibling(nil)
|
||||
c = next
|
||||
}
|
||||
n.firstChild = nil
|
||||
n.lastChild = nil
|
||||
|
@ -466,20 +468,25 @@ type Walker func(n Node, entering bool) (WalkStatus, error)
|
|||
|
||||
// Walk walks a AST tree by the depth first search algorithm.
|
||||
func Walk(n Node, walker Walker) error {
|
||||
_, err := walkHelper(n, walker)
|
||||
return err
|
||||
}
|
||||
|
||||
func walkHelper(n Node, walker Walker) (WalkStatus, error) {
|
||||
status, err := walker(n, true)
|
||||
if err != nil || status == WalkStop {
|
||||
return err
|
||||
return status, err
|
||||
}
|
||||
if status != WalkSkipChildren {
|
||||
for c := n.FirstChild(); c != nil; c = c.NextSibling() {
|
||||
if err = Walk(c, walker); err != nil {
|
||||
return err
|
||||
if st, err := walkHelper(c, walker); err != nil || st == WalkStop {
|
||||
return WalkStop, err
|
||||
}
|
||||
}
|
||||
}
|
||||
status, err = walker(n, false)
|
||||
if err != nil || status == WalkStop {
|
||||
return err
|
||||
return WalkStop, err
|
||||
}
|
||||
return nil
|
||||
return WalkContinue, nil
|
||||
}
|
||||
|
|
|
@ -303,11 +303,11 @@ func NewBlockquote() *Blockquote {
|
|||
}
|
||||
}
|
||||
|
||||
// A List structr represents a list of Markdown text.
|
||||
// A List struct represents a list of Markdown text.
|
||||
type List struct {
|
||||
BaseBlock
|
||||
|
||||
// Marker is a markar character like '-', '+', ')' and '.'.
|
||||
// Marker is a marker character like '-', '+', ')' and '.'.
|
||||
Marker byte
|
||||
|
||||
// IsTight is a true if this list is a 'tight' list.
|
||||
|
@ -364,7 +364,7 @@ func NewList(marker byte) *List {
|
|||
type ListItem struct {
|
||||
BaseBlock
|
||||
|
||||
// Offset is an offset potision of this item.
|
||||
// Offset is an offset position of this item.
|
||||
Offset int
|
||||
}
|
||||
|
||||
|
|
|
@ -170,7 +170,7 @@ func NewText() *Text {
|
|||
}
|
||||
}
|
||||
|
||||
// NewTextSegment returns a new Text node with the given source potision.
|
||||
// NewTextSegment returns a new Text node with the given source position.
|
||||
func NewTextSegment(v textm.Segment) *Text {
|
||||
return &Text{
|
||||
BaseInline: BaseInline{},
|
||||
|
@ -467,7 +467,7 @@ type AutoLink struct {
|
|||
// Inline implements Inline.Inline.
|
||||
func (n *AutoLink) Inline() {}
|
||||
|
||||
// Dump implenets Node.Dump
|
||||
// Dump implements Node.Dump
|
||||
func (n *AutoLink) Dump(source []byte, level int) {
|
||||
segment := n.value.Segment
|
||||
m := map[string]string{
|
||||
|
|
|
@ -11,7 +11,7 @@ type TaskCheckBox struct {
|
|||
IsChecked bool
|
||||
}
|
||||
|
||||
// Dump impelemtns Node.Dump.
|
||||
// Dump implements Node.Dump.
|
||||
func (n *TaskCheckBox) Dump(source []byte, level int) {
|
||||
m := map[string]string{
|
||||
"Checked": fmt.Sprintf("%v", n.IsChecked),
|
||||
|
|
|
@ -2,27 +2,153 @@ package extension
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"regexp"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
"github.com/yuin/goldmark/text"
|
||||
"github.com/yuin/goldmark/util"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
|
||||
var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
|
||||
|
||||
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
|
||||
var urlRegexp = regexp.MustCompile(`^(?:http|https|ftp):\/\/(?:www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]+(?:(?:/|[#?])[-a-zA-Z0-9@:%_+.~#$!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
|
||||
|
||||
type linkifyParser struct {
|
||||
// An LinkifyConfig struct is a data structure that holds configuration of the
|
||||
// Linkify extension.
|
||||
type LinkifyConfig struct {
|
||||
AllowedProtocols [][]byte
|
||||
URLRegexp *regexp.Regexp
|
||||
WWWRegexp *regexp.Regexp
|
||||
EmailRegexp *regexp.Regexp
|
||||
}
|
||||
|
||||
var defaultLinkifyParser = &linkifyParser{}
|
||||
const optLinkifyAllowedProtocols parser.OptionName = "LinkifyAllowedProtocols"
|
||||
const optLinkifyURLRegexp parser.OptionName = "LinkifyURLRegexp"
|
||||
const optLinkifyWWWRegexp parser.OptionName = "LinkifyWWWRegexp"
|
||||
const optLinkifyEmailRegexp parser.OptionName = "LinkifyEmailRegexp"
|
||||
|
||||
// SetOption implements SetOptioner.
|
||||
func (c *LinkifyConfig) SetOption(name parser.OptionName, value interface{}) {
|
||||
switch name {
|
||||
case optLinkifyAllowedProtocols:
|
||||
c.AllowedProtocols = value.([][]byte)
|
||||
case optLinkifyURLRegexp:
|
||||
c.URLRegexp = value.(*regexp.Regexp)
|
||||
case optLinkifyWWWRegexp:
|
||||
c.WWWRegexp = value.(*regexp.Regexp)
|
||||
case optLinkifyEmailRegexp:
|
||||
c.EmailRegexp = value.(*regexp.Regexp)
|
||||
}
|
||||
}
|
||||
|
||||
// A LinkifyOption interface sets options for the LinkifyOption.
|
||||
type LinkifyOption interface {
|
||||
parser.Option
|
||||
SetLinkifyOption(*LinkifyConfig)
|
||||
}
|
||||
|
||||
type withLinkifyAllowedProtocols struct {
|
||||
value [][]byte
|
||||
}
|
||||
|
||||
func (o *withLinkifyAllowedProtocols) SetParserOption(c *parser.Config) {
|
||||
c.Options[optLinkifyAllowedProtocols] = o.value
|
||||
}
|
||||
|
||||
func (o *withLinkifyAllowedProtocols) SetLinkifyOption(p *LinkifyConfig) {
|
||||
p.AllowedProtocols = o.value
|
||||
}
|
||||
|
||||
// WithLinkifyAllowedProtocols is a functional option that specify allowed
|
||||
// protocols in autolinks. Each protocol must end with ':' like
|
||||
// 'http:' .
|
||||
func WithLinkifyAllowedProtocols(value [][]byte) LinkifyOption {
|
||||
return &withLinkifyAllowedProtocols{
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
type withLinkifyURLRegexp struct {
|
||||
value *regexp.Regexp
|
||||
}
|
||||
|
||||
func (o *withLinkifyURLRegexp) SetParserOption(c *parser.Config) {
|
||||
c.Options[optLinkifyURLRegexp] = o.value
|
||||
}
|
||||
|
||||
func (o *withLinkifyURLRegexp) SetLinkifyOption(p *LinkifyConfig) {
|
||||
p.URLRegexp = o.value
|
||||
}
|
||||
|
||||
// WithLinkifyURLRegexp is a functional option that specify
|
||||
// a pattern of the URL including a protocol.
|
||||
func WithLinkifyURLRegexp(value *regexp.Regexp) LinkifyOption {
|
||||
return &withLinkifyURLRegexp{
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
// WithLinkifyWWWRegexp is a functional option that specify
|
||||
// a pattern of the URL without a protocol.
|
||||
// This pattern must start with 'www.' .
|
||||
type withLinkifyWWWRegexp struct {
|
||||
value *regexp.Regexp
|
||||
}
|
||||
|
||||
func (o *withLinkifyWWWRegexp) SetParserOption(c *parser.Config) {
|
||||
c.Options[optLinkifyWWWRegexp] = o.value
|
||||
}
|
||||
|
||||
func (o *withLinkifyWWWRegexp) SetLinkifyOption(p *LinkifyConfig) {
|
||||
p.WWWRegexp = o.value
|
||||
}
|
||||
|
||||
func WithLinkifyWWWRegexp(value *regexp.Regexp) LinkifyOption {
|
||||
return &withLinkifyWWWRegexp{
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
// WithLinkifyWWWRegexp is a functional otpion that specify
|
||||
// a pattern of the email address.
|
||||
type withLinkifyEmailRegexp struct {
|
||||
value *regexp.Regexp
|
||||
}
|
||||
|
||||
func (o *withLinkifyEmailRegexp) SetParserOption(c *parser.Config) {
|
||||
c.Options[optLinkifyEmailRegexp] = o.value
|
||||
}
|
||||
|
||||
func (o *withLinkifyEmailRegexp) SetLinkifyOption(p *LinkifyConfig) {
|
||||
p.EmailRegexp = o.value
|
||||
}
|
||||
|
||||
func WithLinkifyEmailRegexp(value *regexp.Regexp) LinkifyOption {
|
||||
return &withLinkifyEmailRegexp{
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
type linkifyParser struct {
|
||||
LinkifyConfig
|
||||
}
|
||||
|
||||
// NewLinkifyParser return a new InlineParser can parse
|
||||
// text that seems like a URL.
|
||||
func NewLinkifyParser() parser.InlineParser {
|
||||
return defaultLinkifyParser
|
||||
func NewLinkifyParser(opts ...LinkifyOption) parser.InlineParser {
|
||||
p := &linkifyParser{
|
||||
LinkifyConfig: LinkifyConfig{
|
||||
AllowedProtocols: nil,
|
||||
URLRegexp: urlRegexp,
|
||||
WWWRegexp: wwwURLRegxp,
|
||||
},
|
||||
}
|
||||
for _, o := range opts {
|
||||
o.SetLinkifyOption(&p.LinkifyConfig)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s *linkifyParser) Trigger() []byte {
|
||||
|
@ -53,14 +179,26 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
|
|||
var m []int
|
||||
var protocol []byte
|
||||
var typ ast.AutoLinkType = ast.AutoLinkURL
|
||||
if s.LinkifyConfig.AllowedProtocols == nil {
|
||||
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
|
||||
m = urlRegexp.FindSubmatchIndex(line)
|
||||
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
|
||||
}
|
||||
} else {
|
||||
for _, prefix := range s.LinkifyConfig.AllowedProtocols {
|
||||
if bytes.HasPrefix(line, prefix) {
|
||||
m = s.LinkifyConfig.URLRegexp.FindSubmatchIndex(line)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if m == nil && bytes.HasPrefix(line, domainWWW) {
|
||||
m = wwwURLRegxp.FindSubmatchIndex(line)
|
||||
m = s.LinkifyConfig.WWWRegexp.FindSubmatchIndex(line)
|
||||
protocol = []byte("http")
|
||||
}
|
||||
if m != nil {
|
||||
if m != nil && m[0] != 0 {
|
||||
m = nil
|
||||
}
|
||||
if m != nil && m[0] == 0 {
|
||||
lastChar := line[m[1]-1]
|
||||
if lastChar == '.' {
|
||||
m[1]--
|
||||
|
@ -96,7 +234,15 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
|
|||
return nil
|
||||
}
|
||||
typ = ast.AutoLinkEmail
|
||||
stop := util.FindEmailIndex(line)
|
||||
stop := -1
|
||||
if s.LinkifyConfig.EmailRegexp == nil {
|
||||
stop = util.FindEmailIndex(line)
|
||||
} else {
|
||||
m := s.LinkifyConfig.EmailRegexp.FindSubmatchIndex(line)
|
||||
if m != nil && m[0] == 0 {
|
||||
stop = m[1]
|
||||
}
|
||||
}
|
||||
if stop < 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -136,15 +282,22 @@ func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
|
|||
}
|
||||
|
||||
type linkify struct {
|
||||
options []LinkifyOption
|
||||
}
|
||||
|
||||
// Linkify is an extension that allow you to parse text that seems like a URL.
|
||||
var Linkify = &linkify{}
|
||||
|
||||
func NewLinkify(opts ...LinkifyOption) goldmark.Extender {
|
||||
return &linkify{
|
||||
options: opts,
|
||||
}
|
||||
}
|
||||
|
||||
func (e *linkify) Extend(m goldmark.Markdown) {
|
||||
m.Parser().AddOptions(
|
||||
parser.WithInlineParsers(
|
||||
util.Prioritized(NewLinkifyParser(), 999),
|
||||
util.Prioritized(NewLinkifyParser(e.options...), 999),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ type tableParagraphTransformer struct {
|
|||
var defaultTableParagraphTransformer = &tableParagraphTransformer{}
|
||||
|
||||
// NewTableParagraphTransformer returns a new ParagraphTransformer
|
||||
// that can transform pargraphs into tables.
|
||||
// that can transform paragraphs into tables.
|
||||
func NewTableParagraphTransformer() parser.ParagraphTransformer {
|
||||
return defaultTableParagraphTransformer
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package extension
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
"github.com/yuin/goldmark"
|
||||
gast "github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/parser"
|
||||
|
@ -31,6 +33,8 @@ const (
|
|||
LeftAngleQuote
|
||||
// RightAngleQuote is >>
|
||||
RightAngleQuote
|
||||
// Apostrophe is '
|
||||
Apostrophe
|
||||
|
||||
typographicPunctuationMax
|
||||
)
|
||||
|
@ -52,6 +56,7 @@ func newDefaultSubstitutions() [][]byte {
|
|||
replacements[Ellipsis] = []byte("…")
|
||||
replacements[LeftAngleQuote] = []byte("«")
|
||||
replacements[RightAngleQuote] = []byte("»")
|
||||
replacements[Apostrophe] = []byte("’")
|
||||
|
||||
return replacements
|
||||
}
|
||||
|
@ -189,6 +194,26 @@ func (s *typographerParser) Parse(parent gast.Node, block text.Reader, pc parser
|
|||
return nil
|
||||
}
|
||||
if c == '\'' {
|
||||
if s.Substitutions[Apostrophe] != nil {
|
||||
// Handle decade abbrevations such as '90s
|
||||
if d.CanOpen && !d.CanClose && len(line) > 3 && util.IsNumeric(line[1]) && util.IsNumeric(line[2]) && line[3] == 's' {
|
||||
after := util.ToRune(line, 4)
|
||||
if len(line) == 3 || unicode.IsSpace(after) || unicode.IsPunct(after) {
|
||||
node := gast.NewString(s.Substitutions[Apostrophe])
|
||||
node.SetCode(true)
|
||||
block.Advance(1)
|
||||
return node
|
||||
}
|
||||
}
|
||||
// Convert normal apostrophes. This is probably more flexible than necessary but
|
||||
// converts any apostrophe in between two alphanumerics.
|
||||
if len(line) > 1 && (unicode.IsDigit(before) || unicode.IsLetter(before)) && (util.IsAlphaNumeric(line[1])) {
|
||||
node := gast.NewString(s.Substitutions[Apostrophe])
|
||||
node.SetCode(true)
|
||||
block.Advance(1)
|
||||
return node
|
||||
}
|
||||
}
|
||||
if s.Substitutions[LeftSingleQuote] != nil && d.CanOpen && !d.CanClose {
|
||||
node := gast.NewString(s.Substitutions[LeftSingleQuote])
|
||||
node.SetCode(true)
|
||||
|
@ -228,10 +253,10 @@ type typographer struct {
|
|||
options []TypographerOption
|
||||
}
|
||||
|
||||
// Typographer is an extension that repalace punctuations with typographic entities.
|
||||
// Typographer is an extension that replaces punctuations with typographic entities.
|
||||
var Typographer = &typographer{}
|
||||
|
||||
// NewTypographer returns a new Entender that repalace punctuations with typographic entities.
|
||||
// NewTypographer returns a new Extender that replaces punctuations with typographic entities.
|
||||
func NewTypographer(opts ...TypographerOption) goldmark.Extender {
|
||||
return &typographer{
|
||||
options: opts,
|
||||
|
|
|
@ -11,7 +11,7 @@ import (
|
|||
)
|
||||
|
||||
// A DelimiterProcessor interface provides a set of functions about
|
||||
// Deliiter nodes.
|
||||
// Delimiter nodes.
|
||||
type DelimiterProcessor interface {
|
||||
// IsDelimiter returns true if given character is a delimiter, otherwise false.
|
||||
IsDelimiter(byte) bool
|
||||
|
@ -38,7 +38,7 @@ type Delimiter struct {
|
|||
// See https://spec.commonmark.org/0.29/#can-open-emphasis for details.
|
||||
CanClose bool
|
||||
|
||||
// Length is a remaining length of this delmiter.
|
||||
// Length is a remaining length of this delimiter.
|
||||
Length int
|
||||
|
||||
// OriginalLength is a original length of this delimiter.
|
||||
|
|
|
@ -147,11 +147,6 @@ func (s *linkParser) Parse(parent ast.Node, block text.Reader, pc Context) ast.N
|
|||
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
|
||||
return nil
|
||||
}
|
||||
labelValue := block.Value(text.NewSegment(last.Segment.Start+1, segment.Start))
|
||||
if util.IsBlank(labelValue) && !last.IsImage {
|
||||
ast.MergeOrReplaceTextSegment(last.Parent(), last, last.Segment)
|
||||
return nil
|
||||
}
|
||||
|
||||
c := block.Peek()
|
||||
l, pos := block.Position()
|
||||
|
@ -351,14 +346,31 @@ func parseLinkTitle(block text.Reader) ([]byte, bool) {
|
|||
if opener == '(' {
|
||||
closer = ')'
|
||||
}
|
||||
savedLine, savedPosition := block.Position()
|
||||
var title []byte
|
||||
for i := 0; ; i++ {
|
||||
line, _ := block.PeekLine()
|
||||
pos := util.FindClosure(line[1:], opener, closer, false, true)
|
||||
if pos < 0 {
|
||||
if line == nil {
|
||||
block.SetPosition(savedLine, savedPosition)
|
||||
return nil, false
|
||||
}
|
||||
pos += 2 // opener + closer
|
||||
offset := 0
|
||||
if i == 0 {
|
||||
offset = 1
|
||||
}
|
||||
pos := util.FindClosure(line[offset:], opener, closer, false, true)
|
||||
if pos < 0 {
|
||||
title = append(title, line[offset:]...)
|
||||
block.AdvanceLine()
|
||||
continue
|
||||
}
|
||||
pos += offset + 1 // 1: closer
|
||||
block.Advance(pos)
|
||||
return line[1 : pos-1], true
|
||||
if i == 0 { // avoid allocating new slice
|
||||
return line[offset : pos-1], true
|
||||
}
|
||||
return append(title, line[offset:pos-1]...), true
|
||||
}
|
||||
}
|
||||
|
||||
func (s *linkParser) CloseBlock(parent ast.Node, block text.Reader, pc Context) {
|
||||
|
|
|
@ -459,7 +459,7 @@ type Parser interface {
|
|||
// Parse parses the given Markdown text into AST nodes.
|
||||
Parse(reader text.Reader, opts ...ParseOption) ast.Node
|
||||
|
||||
// AddOption adds the given option to thie parser.
|
||||
// AddOption adds the given option to this parser.
|
||||
AddOptions(...Option)
|
||||
}
|
||||
|
||||
|
@ -505,7 +505,7 @@ type BlockParser interface {
|
|||
// Close will be called when the parser returns Close.
|
||||
Close(node ast.Node, reader text.Reader, pc Context)
|
||||
|
||||
// CanInterruptParagraph returns true if the parser can interrupt pargraphs,
|
||||
// CanInterruptParagraph returns true if the parser can interrupt paragraphs,
|
||||
// otherwise false.
|
||||
CanInterruptParagraph() bool
|
||||
|
||||
|
|
|
@ -660,13 +660,13 @@ func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter)
|
|||
}
|
||||
}
|
||||
|
||||
// A Writer interface wirtes textual contents to a writer.
|
||||
// A Writer interface writes textual contents to a writer.
|
||||
type Writer interface {
|
||||
// Write writes the given source to writer with resolving references and unescaping
|
||||
// backslash escaped characters.
|
||||
Write(writer util.BufWriter, source []byte)
|
||||
|
||||
// RawWrite wirtes the given source to writer without resolving references and
|
||||
// RawWrite writes the given source to writer without resolving references and
|
||||
// unescaping backslash escaped characters.
|
||||
RawWrite(writer util.BufWriter, source []byte)
|
||||
}
|
||||
|
|
|
@ -7,7 +7,7 @@ import (
|
|||
|
||||
var space = []byte(" ")
|
||||
|
||||
// A Segment struct holds information about source potisions.
|
||||
// A Segment struct holds information about source positions.
|
||||
type Segment struct {
|
||||
// Start is a start position of the segment.
|
||||
Start int
|
||||
|
@ -197,7 +197,7 @@ func (s *Segments) Sliced(lo, hi int) []Segment {
|
|||
return s.values[lo:hi]
|
||||
}
|
||||
|
||||
// Clear delete all element of the collction.
|
||||
// Clear delete all element of the collection.
|
||||
func (s *Segments) Clear() {
|
||||
s.values = nil
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8,7 +8,6 @@ import (
|
|||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
|
@ -55,7 +54,7 @@ func (b *CopyOnWriteBuffer) IsCopied() bool {
|
|||
return b.copied
|
||||
}
|
||||
|
||||
// IsEscapedPunctuation returns true if caracter at a given index i
|
||||
// IsEscapedPunctuation returns true if character at a given index i
|
||||
// is an escaped punctuation, otherwise false.
|
||||
func IsEscapedPunctuation(source []byte, i int) bool {
|
||||
return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
|
||||
|
@ -229,7 +228,7 @@ func IndentWidth(bs []byte, currentPos int) (width, pos int) {
|
|||
return
|
||||
}
|
||||
|
||||
// FirstNonSpacePosition returns a potisoin line that is a first nonspace
|
||||
// FirstNonSpacePosition returns a position line that is a first nonspace
|
||||
// character.
|
||||
func FirstNonSpacePosition(bs []byte) int {
|
||||
i := 0
|
||||
|
@ -387,6 +386,52 @@ func TrimRightSpace(source []byte) []byte {
|
|||
return TrimRight(source, spaces)
|
||||
}
|
||||
|
||||
// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
|
||||
func DoFullUnicodeCaseFolding(v []byte) []byte {
|
||||
var rbuf []byte
|
||||
cob := NewCopyOnWriteBuffer(v)
|
||||
n := 0
|
||||
for i := 0; i < len(v); i++ {
|
||||
c := v[i]
|
||||
if c < 0xb5 {
|
||||
if c >= 0x41 && c <= 0x5a {
|
||||
// A-Z to a-z
|
||||
cob.Write(v[n:i])
|
||||
cob.WriteByte(c + 32)
|
||||
n = i + 1
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if !utf8.RuneStart(c) {
|
||||
continue
|
||||
}
|
||||
r, length := utf8.DecodeRune(v[i:])
|
||||
if r == utf8.RuneError {
|
||||
continue
|
||||
}
|
||||
folded, ok := unicodeCaseFoldings[r]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
cob.Write(v[n:i])
|
||||
if rbuf == nil {
|
||||
rbuf = make([]byte, 4)
|
||||
}
|
||||
for _, f := range folded {
|
||||
l := utf8.EncodeRune(rbuf, f)
|
||||
cob.Write(rbuf[:l])
|
||||
}
|
||||
i += length - 1
|
||||
n = i + 1
|
||||
}
|
||||
if cob.IsCopied() {
|
||||
cob.Write(v[n:])
|
||||
}
|
||||
return cob.Bytes()
|
||||
}
|
||||
|
||||
// ReplaceSpaces replaces sequence of spaces with the given repl.
|
||||
func ReplaceSpaces(source []byte, repl byte) []byte {
|
||||
var ret []byte
|
||||
|
@ -439,13 +484,14 @@ func ToValidRune(v rune) rune {
|
|||
return v
|
||||
}
|
||||
|
||||
// ToLinkReference convert given bytes into a valid link reference string.
|
||||
// ToLinkReference trims leading and trailing spaces and convert into lower
|
||||
// ToLinkReference converts given bytes into a valid link reference string.
|
||||
// ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower
|
||||
// case and replace spaces with a single space character.
|
||||
func ToLinkReference(v []byte) string {
|
||||
v = TrimLeftSpace(v)
|
||||
v = TrimRightSpace(v)
|
||||
return strings.ToLower(string(ReplaceSpaces(v, ' ')))
|
||||
v = DoFullUnicodeCaseFolding(v)
|
||||
return string(ReplaceSpaces(v, ' '))
|
||||
}
|
||||
|
||||
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
|
||||
|
@ -589,7 +635,7 @@ var htmlSpace = []byte("%20")
|
|||
// 2. resolve numeric references
|
||||
// 3. resolve entity references
|
||||
//
|
||||
// URL encoded values (%xx) are keeped as is.
|
||||
// URL encoded values (%xx) are kept as is.
|
||||
func URLEscape(v []byte, resolveReference bool) []byte {
|
||||
if resolveReference {
|
||||
v = UnescapePunctuations(v)
|
||||
|
|
|
@ -450,7 +450,7 @@ github.com/willf/bitset
|
|||
github.com/xanzy/ssh-agent
|
||||
# github.com/yohcop/openid-go v0.0.0-20160914080427-2c050d2dae53
|
||||
github.com/yohcop/openid-go
|
||||
# github.com/yuin/goldmark v1.1.19
|
||||
# github.com/yuin/goldmark v1.1.23
|
||||
github.com/yuin/goldmark
|
||||
github.com/yuin/goldmark/ast
|
||||
github.com/yuin/goldmark/extension
|
||||
|
|
Loading…
Reference in New Issue