From 5d5aa0a31d60941c7ece95b4b516044cb8a10cce Mon Sep 17 00:00:00 2001 From: Neil Alexander Date: Wed, 17 Jun 2020 11:53:26 +0100 Subject: [PATCH] Media filename handling improvements (#1140) * Derive content ID from hash+filename but preserve dedupe, improve Content-Disposition handling and ASCII handling * Linter fix * Some more comments * Update sytest-whitelist --- mediaapi/routing/download.go | 77 +++++++++++++++++++++++++++++++----- mediaapi/routing/routing.go | 5 ++- mediaapi/routing/upload.go | 5 ++- sytest-whitelist | 6 ++- 4 files changed, 80 insertions(+), 13 deletions(-) diff --git a/mediaapi/routing/download.go b/mediaapi/routing/download.go index 3ce4ba39..fa1bb257 100644 --- a/mediaapi/routing/download.go +++ b/mediaapi/routing/download.go @@ -28,6 +28,7 @@ import ( "strconv" "strings" "sync" + "unicode" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/internal/config" @@ -54,6 +55,7 @@ type downloadRequest struct { IsThumbnailRequest bool ThumbnailSize types.ThumbnailSize Logger *log.Entry + DownloadFilename string } // Download implements GET /download and GET /thumbnail @@ -73,6 +75,7 @@ func Download( activeRemoteRequests *types.ActiveRemoteRequests, activeThumbnailGeneration *types.ActiveThumbnailGeneration, isThumbnailRequest bool, + customFilename string, ) { dReq := &downloadRequest{ MediaMetadata: &types.MediaMetadata{ @@ -84,6 +87,7 @@ func Download( "Origin": origin, "MediaID": mediaID, }), + DownloadFilename: customFilename, } if dReq.IsThumbnailRequest { @@ -301,16 +305,8 @@ func (r *downloadRequest) respondFromLocalFile( }).Info("Responding with file") responseFile = file responseMetadata = r.MediaMetadata - - if len(responseMetadata.UploadName) > 0 { - uploadName, err := url.PathUnescape(string(responseMetadata.UploadName)) - if err != nil { - return nil, fmt.Errorf("url.PathUnescape: %w", err) - } - w.Header().Set("Content-Disposition", fmt.Sprintf( - `inline; filename=utf-8"%s"`, - strings.ReplaceAll(uploadName, `"`, `\"`), // escape quote marks only, as per RFC6266 - )) + if err := r.addDownloadFilenameToHeaders(w, responseMetadata); err != nil { + return nil, err } } @@ -329,6 +325,67 @@ func (r *downloadRequest) respondFromLocalFile( return responseMetadata, nil } +func (r *downloadRequest) addDownloadFilenameToHeaders( + w http.ResponseWriter, + responseMetadata *types.MediaMetadata, +) error { + // If the requestor supplied a filename to name the download then + // use that, otherwise use the filename from the response metadata. + filename := string(responseMetadata.UploadName) + if r.DownloadFilename != "" { + filename = r.DownloadFilename + } + + if len(filename) == 0 { + return nil + } + + unescaped, err := url.PathUnescape(filename) + if err != nil { + return fmt.Errorf("url.PathUnescape: %w", err) + } + + isASCII := true // Is the string ASCII or UTF-8? + quote := `` // Encloses the string (ASCII only) + for i := 0; i < len(unescaped); i++ { + if unescaped[i] > unicode.MaxASCII { + isASCII = false + } + if unescaped[i] == 0x20 || unescaped[i] == 0x3B { + // If the filename contains a space or a semicolon, which + // are special characters in Content-Disposition + quote = `"` + } + } + + // We don't necessarily want a full escape as the Content-Disposition + // can take many of the characters that PathEscape would otherwise and + // browser support for encoding is a bit wild, so we'll escape only + // the characters that we know will mess up the parsing of the + // Content-Disposition header elements themselves + unescaped = strings.ReplaceAll(unescaped, `\`, `\\"`) + unescaped = strings.ReplaceAll(unescaped, `"`, `\"`) + + if isASCII { + // For ASCII filenames, we should only quote the filename if + // it needs to be done, e.g. it contains a space or a character + // that would otherwise be parsed as a control character in the + // Content-Disposition header + w.Header().Set("Content-Disposition", fmt.Sprintf( + `inline; filename=%s%s%s`, + quote, unescaped, quote, + )) + } else { + // For UTF-8 filenames, we quote always, as that's the standard + w.Header().Set("Content-Disposition", fmt.Sprintf( + `inline; filename=utf-8"%s"`, + unescaped, + )) + } + + return nil +} + // Note: Thumbnail generation may be ongoing asynchronously. // If no thumbnail was found then returns nil, nil, nil func (r *downloadRequest) getThumbnailFile( diff --git a/mediaapi/routing/routing.go b/mediaapi/routing/routing.go index f8577826..bc0de0f4 100644 --- a/mediaapi/routing/routing.go +++ b/mediaapi/routing/routing.go @@ -66,7 +66,9 @@ func Setup( downloadHandler := makeDownloadAPI("download", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration) r0mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) - v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed + r0mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) + v1mux.Handle("/download/{serverName}/{mediaId}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed + v1mux.Handle("/download/{serverName}/{mediaId}/{downloadName}", downloadHandler).Methods(http.MethodGet, http.MethodOptions) // TODO: remove when synapse is fixed r0mux.Handle("/thumbnail/{serverName}/{mediaId}", makeDownloadAPI("thumbnail", cfg, db, client, activeRemoteRequests, activeThumbnailGeneration), @@ -120,6 +122,7 @@ func makeDownloadAPI( activeRemoteRequests, activeThumbnailGeneration, name == "thumbnail", + vars["downloadName"], ) } return promhttp.InstrumentHandlerCounter(counterVec, http.HandlerFunc(httpHandler)) diff --git a/mediaapi/routing/upload.go b/mediaapi/routing/upload.go index 022f978d..9b5dc3df 100644 --- a/mediaapi/routing/upload.go +++ b/mediaapi/routing/upload.go @@ -16,6 +16,7 @@ package routing import ( "context" + "encoding/base64" "fmt" "io" "net/http" @@ -123,7 +124,9 @@ func (r *uploadRequest) doUpload( r.MediaMetadata.FileSizeBytes = bytesWritten r.MediaMetadata.Base64Hash = hash - r.MediaMetadata.MediaID = types.MediaID(hash) + r.MediaMetadata.MediaID = types.MediaID(base64.RawURLEncoding.EncodeToString( + []byte(string(r.MediaMetadata.UploadName) + string(r.MediaMetadata.Base64Hash)), + )) r.Logger = r.Logger.WithField("MediaID", r.MediaMetadata.MediaID) diff --git a/sytest-whitelist b/sytest-whitelist index 04c6f098..8f3d128d 100644 --- a/sytest-whitelist +++ b/sytest-whitelist @@ -128,7 +128,7 @@ Outbound federation can send events # test for now. #Backfill checks the events requested belong to the room Can upload without a file name -#Can download without a file name locally +Can download without a file name locally Can upload with ASCII file name Can send image in room message AS cannot create users outside its own namespace @@ -327,3 +327,7 @@ Can upload with Unicode file name POSTed media can be thumbnailed Remote media can be thumbnailed Can download with Unicode file name locally +Can download file 'ascii' +Can download file 'name with spaces' +Can download file 'name;with;semicolons' +Can download specifying a different ASCII file name