Replace linkRegex with xurls library (#6261)
* Replace linkRegex with xurls library Rather than maintaining a complicated regex to match URLs for autolinking, gitea can use this existing go library that takes care of the matching with very little code change to gitea itself. After spending a while trying to find the perfect regex for all cases this library still works better as it is more flexible than a single regex ever will be. This will also fix the following issues: #5844 #3095 #3381 This passes all our current tests and I've added new ones mentioned in those issues as well. * Use xurls.StrictMatchingScheme instead of xurls.Strict This is much faster and we only care about https? links to preserve existing behavior.release/v1.15
parent
01bd1fcd33
commit
f2de5dc8c8
|
@ -725,6 +725,14 @@
|
||||||
pruneopts = "NUT"
|
pruneopts = "NUT"
|
||||||
revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63"
|
revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63"
|
||||||
|
|
||||||
|
[[projects]]
|
||||||
|
digest = "1:63953ffb90bbc880c612d576fcfd973a5904277d25ec9e2d8d5719bf67969662"
|
||||||
|
name = "github.com/mvdan/xurls"
|
||||||
|
packages = ["."]
|
||||||
|
pruneopts = "NUT"
|
||||||
|
revision = "e52e821cbfe8fe163ff6f8628ab5869b11fc05af"
|
||||||
|
version = "v2.0.0"
|
||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6"
|
digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6"
|
||||||
name = "github.com/nfnt/resize"
|
name = "github.com/nfnt/resize"
|
||||||
|
@ -1293,6 +1301,7 @@
|
||||||
"github.com/mcuadros/go-version",
|
"github.com/mcuadros/go-version",
|
||||||
"github.com/microcosm-cc/bluemonday",
|
"github.com/microcosm-cc/bluemonday",
|
||||||
"github.com/msteinert/pam",
|
"github.com/msteinert/pam",
|
||||||
|
"github.com/mvdan/xurls",
|
||||||
"github.com/nfnt/resize",
|
"github.com/nfnt/resize",
|
||||||
"github.com/pquerna/otp",
|
"github.com/pquerna/otp",
|
||||||
"github.com/pquerna/otp/totp",
|
"github.com/pquerna/otp/totp",
|
||||||
|
|
|
@ -113,3 +113,7 @@ ignored = ["google.golang.org/appengine*"]
|
||||||
[[constraint]]
|
[[constraint]]
|
||||||
name = "github.com/prometheus/client_golang"
|
name = "github.com/prometheus/client_golang"
|
||||||
version = "0.9.0"
|
version = "0.9.0"
|
||||||
|
|
||||||
|
[[constraint]]
|
||||||
|
name = "github.com/mvdan/xurls"
|
||||||
|
version = "2.0.0"
|
||||||
|
|
|
@ -17,6 +17,7 @@ import (
|
||||||
"code.gitea.io/gitea/modules/util"
|
"code.gitea.io/gitea/modules/util"
|
||||||
|
|
||||||
"github.com/Unknwon/com"
|
"github.com/Unknwon/com"
|
||||||
|
"github.com/mvdan/xurls"
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
"golang.org/x/net/html/atom"
|
"golang.org/x/net/html/atom"
|
||||||
)
|
)
|
||||||
|
@ -64,9 +65,7 @@ var (
|
||||||
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
|
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
|
||||||
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
|
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
|
||||||
|
|
||||||
// matches http/https links. used for autlinking those. partly modified from
|
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
|
||||||
// the original present in autolink.js
|
|
||||||
linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// regexp for full links to issues/pulls
|
// regexp for full links to issues/pulls
|
||||||
|
|
|
@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) {
|
||||||
test(
|
test(
|
||||||
"http://142.42.1.1/",
|
"http://142.42.1.1/",
|
||||||
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
|
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
|
||||||
|
test(
|
||||||
|
"https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd",
|
||||||
|
`<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
|
||||||
|
test(
|
||||||
|
"https://en.wikipedia.org/wiki/URL_(disambiguation)",
|
||||||
|
`<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`)
|
||||||
|
test(
|
||||||
|
"https://foo_bar.example.com/",
|
||||||
|
`<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`)
|
||||||
|
|
||||||
// Test that should *not* be turned into URL
|
// Test that should *not* be turned into URL
|
||||||
test(
|
test(
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
Copyright (c) 2015, Daniel Martí. All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following disclaimer
|
||||||
|
in the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived from
|
||||||
|
this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1,299 @@
|
||||||
|
// Generated by schemesgen
|
||||||
|
|
||||||
|
package xurls
|
||||||
|
|
||||||
|
// Schemes is a sorted list of all IANA assigned schemes.
|
||||||
|
//
|
||||||
|
// Source:
|
||||||
|
// https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv
|
||||||
|
var Schemes = []string{
|
||||||
|
`aaa`,
|
||||||
|
`aaas`,
|
||||||
|
`about`,
|
||||||
|
`acap`,
|
||||||
|
`acct`,
|
||||||
|
`acr`,
|
||||||
|
`adiumxtra`,
|
||||||
|
`afp`,
|
||||||
|
`afs`,
|
||||||
|
`aim`,
|
||||||
|
`appdata`,
|
||||||
|
`apt`,
|
||||||
|
`attachment`,
|
||||||
|
`aw`,
|
||||||
|
`barion`,
|
||||||
|
`beshare`,
|
||||||
|
`bitcoin`,
|
||||||
|
`bitcoincash`,
|
||||||
|
`blob`,
|
||||||
|
`bolo`,
|
||||||
|
`browserext`,
|
||||||
|
`callto`,
|
||||||
|
`cap`,
|
||||||
|
`chrome`,
|
||||||
|
`chrome-extension`,
|
||||||
|
`cid`,
|
||||||
|
`coap`,
|
||||||
|
`coap+tcp`,
|
||||||
|
`coap+ws`,
|
||||||
|
`coaps`,
|
||||||
|
`coaps+tcp`,
|
||||||
|
`coaps+ws`,
|
||||||
|
`com-eventbrite-attendee`,
|
||||||
|
`content`,
|
||||||
|
`conti`,
|
||||||
|
`crid`,
|
||||||
|
`cvs`,
|
||||||
|
`data`,
|
||||||
|
`dav`,
|
||||||
|
`diaspora`,
|
||||||
|
`dict`,
|
||||||
|
`did`,
|
||||||
|
`dis`,
|
||||||
|
`dlna-playcontainer`,
|
||||||
|
`dlna-playsingle`,
|
||||||
|
`dns`,
|
||||||
|
`dntp`,
|
||||||
|
`dtn`,
|
||||||
|
`dvb`,
|
||||||
|
`ed2k`,
|
||||||
|
`elsi`,
|
||||||
|
`example`,
|
||||||
|
`facetime`,
|
||||||
|
`fax`,
|
||||||
|
`feed`,
|
||||||
|
`feedready`,
|
||||||
|
`file`,
|
||||||
|
`filesystem`,
|
||||||
|
`finger`,
|
||||||
|
`fish`,
|
||||||
|
`ftp`,
|
||||||
|
`geo`,
|
||||||
|
`gg`,
|
||||||
|
`git`,
|
||||||
|
`gizmoproject`,
|
||||||
|
`go`,
|
||||||
|
`gopher`,
|
||||||
|
`graph`,
|
||||||
|
`gtalk`,
|
||||||
|
`h323`,
|
||||||
|
`ham`,
|
||||||
|
`hcap`,
|
||||||
|
`hcp`,
|
||||||
|
`http`,
|
||||||
|
`https`,
|
||||||
|
`hxxp`,
|
||||||
|
`hxxps`,
|
||||||
|
`hydrazone`,
|
||||||
|
`iax`,
|
||||||
|
`icap`,
|
||||||
|
`icon`,
|
||||||
|
`im`,
|
||||||
|
`imap`,
|
||||||
|
`info`,
|
||||||
|
`iotdisco`,
|
||||||
|
`ipn`,
|
||||||
|
`ipp`,
|
||||||
|
`ipps`,
|
||||||
|
`irc`,
|
||||||
|
`irc6`,
|
||||||
|
`ircs`,
|
||||||
|
`iris`,
|
||||||
|
`iris.beep`,
|
||||||
|
`iris.lwz`,
|
||||||
|
`iris.xpc`,
|
||||||
|
`iris.xpcs`,
|
||||||
|
`isostore`,
|
||||||
|
`itms`,
|
||||||
|
`jabber`,
|
||||||
|
`jar`,
|
||||||
|
`jms`,
|
||||||
|
`keyparc`,
|
||||||
|
`lastfm`,
|
||||||
|
`ldap`,
|
||||||
|
`ldaps`,
|
||||||
|
`lvlt`,
|
||||||
|
`magnet`,
|
||||||
|
`mailserver`,
|
||||||
|
`mailto`,
|
||||||
|
`maps`,
|
||||||
|
`market`,
|
||||||
|
`message`,
|
||||||
|
`microsoft.windows.camera`,
|
||||||
|
`microsoft.windows.camera.multipicker`,
|
||||||
|
`microsoft.windows.camera.picker`,
|
||||||
|
`mid`,
|
||||||
|
`mms`,
|
||||||
|
`modem`,
|
||||||
|
`mongodb`,
|
||||||
|
`moz`,
|
||||||
|
`ms-access`,
|
||||||
|
`ms-browser-extension`,
|
||||||
|
`ms-drive-to`,
|
||||||
|
`ms-enrollment`,
|
||||||
|
`ms-excel`,
|
||||||
|
`ms-gamebarservices`,
|
||||||
|
`ms-gamingoverlay`,
|
||||||
|
`ms-getoffice`,
|
||||||
|
`ms-help`,
|
||||||
|
`ms-infopath`,
|
||||||
|
`ms-inputapp`,
|
||||||
|
`ms-lockscreencomponent-config`,
|
||||||
|
`ms-media-stream-id`,
|
||||||
|
`ms-mixedrealitycapture`,
|
||||||
|
`ms-officeapp`,
|
||||||
|
`ms-people`,
|
||||||
|
`ms-project`,
|
||||||
|
`ms-powerpoint`,
|
||||||
|
`ms-publisher`,
|
||||||
|
`ms-restoretabcompanion`,
|
||||||
|
`ms-screenclip`,
|
||||||
|
`ms-screensketch`,
|
||||||
|
`ms-search`,
|
||||||
|
`ms-search-repair`,
|
||||||
|
`ms-secondary-screen-controller`,
|
||||||
|
`ms-secondary-screen-setup`,
|
||||||
|
`ms-settings`,
|
||||||
|
`ms-settings-airplanemode`,
|
||||||
|
`ms-settings-bluetooth`,
|
||||||
|
`ms-settings-camera`,
|
||||||
|
`ms-settings-cellular`,
|
||||||
|
`ms-settings-cloudstorage`,
|
||||||
|
`ms-settings-connectabledevices`,
|
||||||
|
`ms-settings-displays-topology`,
|
||||||
|
`ms-settings-emailandaccounts`,
|
||||||
|
`ms-settings-language`,
|
||||||
|
`ms-settings-location`,
|
||||||
|
`ms-settings-lock`,
|
||||||
|
`ms-settings-nfctransactions`,
|
||||||
|
`ms-settings-notifications`,
|
||||||
|
`ms-settings-power`,
|
||||||
|
`ms-settings-privacy`,
|
||||||
|
`ms-settings-proximity`,
|
||||||
|
`ms-settings-screenrotation`,
|
||||||
|
`ms-settings-wifi`,
|
||||||
|
`ms-settings-workplace`,
|
||||||
|
`ms-spd`,
|
||||||
|
`ms-sttoverlay`,
|
||||||
|
`ms-transit-to`,
|
||||||
|
`ms-useractivityset`,
|
||||||
|
`ms-virtualtouchpad`,
|
||||||
|
`ms-visio`,
|
||||||
|
`ms-walk-to`,
|
||||||
|
`ms-whiteboard`,
|
||||||
|
`ms-whiteboard-cmd`,
|
||||||
|
`ms-word`,
|
||||||
|
`msnim`,
|
||||||
|
`msrp`,
|
||||||
|
`msrps`,
|
||||||
|
`mtqp`,
|
||||||
|
`mumble`,
|
||||||
|
`mupdate`,
|
||||||
|
`mvn`,
|
||||||
|
`news`,
|
||||||
|
`nfs`,
|
||||||
|
`ni`,
|
||||||
|
`nih`,
|
||||||
|
`nntp`,
|
||||||
|
`notes`,
|
||||||
|
`ocf`,
|
||||||
|
`oid`,
|
||||||
|
`onenote`,
|
||||||
|
`onenote-cmd`,
|
||||||
|
`opaquelocktoken`,
|
||||||
|
`openpgp4fpr`,
|
||||||
|
`pack`,
|
||||||
|
`palm`,
|
||||||
|
`paparazzi`,
|
||||||
|
`pkcs11`,
|
||||||
|
`platform`,
|
||||||
|
`pop`,
|
||||||
|
`pres`,
|
||||||
|
`prospero`,
|
||||||
|
`proxy`,
|
||||||
|
`pwid`,
|
||||||
|
`psyc`,
|
||||||
|
`qb`,
|
||||||
|
`query`,
|
||||||
|
`redis`,
|
||||||
|
`rediss`,
|
||||||
|
`reload`,
|
||||||
|
`res`,
|
||||||
|
`resource`,
|
||||||
|
`rmi`,
|
||||||
|
`rsync`,
|
||||||
|
`rtmfp`,
|
||||||
|
`rtmp`,
|
||||||
|
`rtsp`,
|
||||||
|
`rtsps`,
|
||||||
|
`rtspu`,
|
||||||
|
`secondlife`,
|
||||||
|
`service`,
|
||||||
|
`session`,
|
||||||
|
`sftp`,
|
||||||
|
`sgn`,
|
||||||
|
`shttp`,
|
||||||
|
`sieve`,
|
||||||
|
`simpleledger`,
|
||||||
|
`sip`,
|
||||||
|
`sips`,
|
||||||
|
`skype`,
|
||||||
|
`smb`,
|
||||||
|
`sms`,
|
||||||
|
`smtp`,
|
||||||
|
`snews`,
|
||||||
|
`snmp`,
|
||||||
|
`soap.beep`,
|
||||||
|
`soap.beeps`,
|
||||||
|
`soldat`,
|
||||||
|
`spiffe`,
|
||||||
|
`spotify`,
|
||||||
|
`ssh`,
|
||||||
|
`steam`,
|
||||||
|
`stun`,
|
||||||
|
`stuns`,
|
||||||
|
`submit`,
|
||||||
|
`svn`,
|
||||||
|
`tag`,
|
||||||
|
`teamspeak`,
|
||||||
|
`tel`,
|
||||||
|
`teliaeid`,
|
||||||
|
`telnet`,
|
||||||
|
`tftp`,
|
||||||
|
`things`,
|
||||||
|
`thismessage`,
|
||||||
|
`tip`,
|
||||||
|
`tn3270`,
|
||||||
|
`tool`,
|
||||||
|
`turn`,
|
||||||
|
`turns`,
|
||||||
|
`tv`,
|
||||||
|
`udp`,
|
||||||
|
`unreal`,
|
||||||
|
`urn`,
|
||||||
|
`ut2004`,
|
||||||
|
`v-event`,
|
||||||
|
`vemmi`,
|
||||||
|
`ventrilo`,
|
||||||
|
`videotex`,
|
||||||
|
`vnc`,
|
||||||
|
`view-source`,
|
||||||
|
`wais`,
|
||||||
|
`webcal`,
|
||||||
|
`wpid`,
|
||||||
|
`ws`,
|
||||||
|
`wss`,
|
||||||
|
`wtai`,
|
||||||
|
`wyciwyg`,
|
||||||
|
`xcon`,
|
||||||
|
`xcon-userid`,
|
||||||
|
`xfire`,
|
||||||
|
`xmlrpc.beep`,
|
||||||
|
`xmlrpc.beeps`,
|
||||||
|
`xmpp`,
|
||||||
|
`xri`,
|
||||||
|
`ymsgr`,
|
||||||
|
`z39.50`,
|
||||||
|
`z39.50r`,
|
||||||
|
`z39.50s`,
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,24 @@
|
||||||
|
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||||
|
// See LICENSE for licensing information
|
||||||
|
|
||||||
|
package xurls
|
||||||
|
|
||||||
|
// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
|
||||||
|
//
|
||||||
|
// Sources:
|
||||||
|
// * https://en.wikipedia.org/wiki/Pseudo-top-level_domain
|
||||||
|
// * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains
|
||||||
|
// * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00
|
||||||
|
// * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml
|
||||||
|
var PseudoTLDs = []string{
|
||||||
|
`bit`, // Namecoin
|
||||||
|
`example`, // Example domain
|
||||||
|
`exit`, // Tor exit node
|
||||||
|
`gnu`, // GNS by public key
|
||||||
|
`i2p`, // I2P network
|
||||||
|
`invalid`, // Invalid domain
|
||||||
|
`local`, // Local network
|
||||||
|
`localhost`, // Local network
|
||||||
|
`test`, // Test domain
|
||||||
|
`zkey`, // GNS domain name
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
|
||||||
|
// See LICENSE for licensing information
|
||||||
|
|
||||||
|
// Package xurls extracts urls from plain text using regular expressions.
|
||||||
|
package xurls
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"regexp"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:generate go run generate/tldsgen/main.go
|
||||||
|
//go:generate go run generate/schemesgen/main.go
|
||||||
|
|
||||||
|
const (
|
||||||
|
letter = `\p{L}`
|
||||||
|
mark = `\p{M}`
|
||||||
|
number = `\p{N}`
|
||||||
|
iriChar = letter + mark + number
|
||||||
|
currency = `\p{Sc}`
|
||||||
|
otherSymb = `\p{So}`
|
||||||
|
endChar = iriChar + `/\-+_&~*%=#` + currency + otherSymb
|
||||||
|
otherPunc = `\p{Po}`
|
||||||
|
midChar = endChar + `|` + otherPunc
|
||||||
|
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
|
||||||
|
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
|
||||||
|
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
|
||||||
|
wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
|
||||||
|
pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
|
||||||
|
|
||||||
|
iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
|
||||||
|
domain = `(` + iri + `\.)+`
|
||||||
|
octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
|
||||||
|
ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
|
||||||
|
ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
|
||||||
|
ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
|
||||||
|
port = `(:[0-9]*)?`
|
||||||
|
)
|
||||||
|
|
||||||
|
// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid
|
||||||
|
// scheme, and not just the known ones.
|
||||||
|
var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||||
|
|
||||||
|
// SchemesNoAuthority is a sorted list of some well-known url schemes that are
|
||||||
|
// followed by ":" instead of "://".
|
||||||
|
var SchemesNoAuthority = []string{
|
||||||
|
`bitcoin`, // Bitcoin
|
||||||
|
`file`, // Files
|
||||||
|
`magnet`, // Torrent magnets
|
||||||
|
`mailto`, // Mail
|
||||||
|
`sms`, // SMS
|
||||||
|
`tel`, // Telephone
|
||||||
|
`xmpp`, // XMPP
|
||||||
|
}
|
||||||
|
|
||||||
|
func anyOf(strs ...string) string {
|
||||||
|
var b bytes.Buffer
|
||||||
|
b.WriteByte('(')
|
||||||
|
for i, s := range strs {
|
||||||
|
if i != 0 {
|
||||||
|
b.WriteByte('|')
|
||||||
|
}
|
||||||
|
b.WriteString(regexp.QuoteMeta(s))
|
||||||
|
}
|
||||||
|
b.WriteByte(')')
|
||||||
|
return b.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
func strictExp() string {
|
||||||
|
schemes := `(` + anyOf(Schemes...) + `://|` + anyOf(SchemesNoAuthority...) + `:)`
|
||||||
|
return `(?i)` + schemes + `(?-i)` + pathCont
|
||||||
|
}
|
||||||
|
|
||||||
|
func relaxedExp() string {
|
||||||
|
site := domain + `(?i)` + anyOf(append(TLDs, PseudoTLDs...)...) + `(?-i)`
|
||||||
|
hostName := `(` + site + `|` + ipAddr + `)`
|
||||||
|
webURL := hostName + port + `(/|/` + pathCont + `?|\b|$)`
|
||||||
|
return strictExp() + `|` + webURL
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strict produces a regexp that matches any URL with a scheme in either the
|
||||||
|
// Schemes or SchemesNoAuthority lists.
|
||||||
|
func Strict() *regexp.Regexp {
|
||||||
|
re := regexp.MustCompile(strictExp())
|
||||||
|
re.Longest()
|
||||||
|
return re
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relaxed produces a regexp that matches any URL matched by Strict, plus any
|
||||||
|
// URL with no scheme.
|
||||||
|
func Relaxed() *regexp.Regexp {
|
||||||
|
re := regexp.MustCompile(relaxedExp())
|
||||||
|
re.Longest()
|
||||||
|
return re
|
||||||
|
}
|
||||||
|
|
||||||
|
// StrictMatchingScheme produces a regexp similar to Strict, but requiring that
|
||||||
|
// the scheme match the given regular expression. See AnyScheme too.
|
||||||
|
func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
|
||||||
|
strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont
|
||||||
|
re, err := regexp.Compile(strictMatching)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
re.Longest()
|
||||||
|
return re, nil
|
||||||
|
}
|
Loading…
Reference in New Issue