From 23bed196e61d4dfb8719e6bfff82888173519cde Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 17 Jun 2020 14:26:45 +0100 Subject: [PATCH] Fallback parsing for Content-Disposition minefield (#1144) --- mediaapi/routing/download.go | 27 ++++++++++++++++++++++----- sytest-whitelist | 2 ++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/mediaapi/routing/download.go b/mediaapi/routing/download.go index fa1bb257..7e121de3 100644 --- a/mediaapi/routing/download.go +++ b/mediaapi/routing/download.go @@ -47,6 +47,10 @@ const mediaIDCharacters = "A-Za-z0-9_=-" // Note: unfortunately regex.MustCompile() cannot be assigned to a const var mediaIDRegex = regexp.MustCompile("^[" + mediaIDCharacters + "]+$") +// Regular expressions to help us cope with Content-Disposition parsing +var rfc2183 = regexp.MustCompile(`filename\=utf-8\"(.*)\"`) +var rfc6266 = regexp.MustCompile(`filename\*\=utf-8\'\'(.*)`) + // downloadRequest metadata included in or derivable from a download or thumbnail request // https://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-download-servername-mediaid // http://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-thumbnail-servername-mediaid @@ -378,8 +382,8 @@ func (r *downloadRequest) addDownloadFilenameToHeaders( } else { // For UTF-8 filenames, we quote always, as that's the standard w.Header().Set("Content-Disposition", fmt.Sprintf( - `inline; filename=utf-8"%s"`, - unescaped, + `inline; filename*=utf-8''%s`, + url.QueryEscape(unescaped), )) } @@ -700,9 +704,22 @@ func (r *downloadRequest) fetchRemoteFile( } r.MediaMetadata.FileSizeBytes = types.FileSizeBytes(contentLength) r.MediaMetadata.ContentType = types.ContentType(resp.Header.Get("Content-Type")) - _, params, err := mime.ParseMediaType(resp.Header.Get("Content-Disposition")) - if err == nil && params["filename"] != "" { - r.MediaMetadata.UploadName = types.Filename(params["filename"]) + + dispositionHeader := resp.Header.Get("Content-Disposition") + if _, params, e := mime.ParseMediaType(dispositionHeader); e == nil { + if params["filename"] != "" { + r.MediaMetadata.UploadName = types.Filename(params["filename"]) + } else if params["filename*"] != "" { + r.MediaMetadata.UploadName = types.Filename(params["filename*"]) + } + } else { + if matches := rfc6266.FindStringSubmatch(dispositionHeader); len(matches) > 1 { + // Always prefer the RFC6266 UTF-8 name if possible + r.MediaMetadata.UploadName = types.Filename(matches[1]) + } else if matches := rfc2183.FindStringSubmatch(dispositionHeader); len(matches) > 1 { + // Otherwise, see if an RFC2183 name was provided (ASCII only) + r.MediaMetadata.UploadName = types.Filename(matches[1]) + } } r.Logger.Info("Transferring remote file") diff --git a/sytest-whitelist b/sytest-whitelist index 244cfe8c..e6860562 100644 --- a/sytest-whitelist +++ b/sytest-whitelist @@ -331,4 +331,6 @@ Can download file 'ascii' Can download file 'name with spaces' Can download file 'name;with;semicolons' Can download specifying a different ASCII file name +Can download with Unicode file name over federation +Can download specifying a different Unicode file name Inbound /v1/send_join rejects joins from other servers