diff --git a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go index bfc1ee0e..29876248 100644 --- a/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go +++ b/src/github.com/matrix-org/dendrite/cmd/dendrite-media-api-server/main.go @@ -23,6 +23,7 @@ import ( "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/routing" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/gomatrixserverlib" @@ -69,6 +70,11 @@ func main() { DataSource: dataSource, } + db, err := storage.Open(cfg.DataSource) + if err != nil { + log.WithError(err).Panic("Failed to open database") + } + log.WithFields(log.Fields{ "BASE_PATH": absBasePath, "BIND_ADDRESS": bindAddr, @@ -78,6 +84,6 @@ func main() { "SERVER_NAME": serverName, }).Info("Starting mediaapi") - routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) + routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db) log.Fatal(http.ListenAndServe(bindAddr, nil)) } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go index a2d8f43c..5c514194 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/config/config.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/config/config.go @@ -26,8 +26,8 @@ type MediaAPI struct { // The absolute base path to where media files will be stored. AbsBasePath types.Path `yaml:"abs_base_path"` // The maximum file size in bytes that is allowed to be stored on this server. - // Note that remote files larger than this can still be proxied to a client, they will just not be cached. // Note: if MaxFileSizeBytes is set to 0, the size is unlimited. + // Note: if max_file_size_bytes is not set, it will default to 10485760 (10MB) MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` // The postgres connection config for connecting to the database e.g a postgres:// URI DataSource string `yaml:"database"` diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go new file mode 100644 index 00000000..a166f476 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -0,0 +1,183 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fileutils + +import ( + "bufio" + "crypto/sha256" + "encoding/base64" + "fmt" + "io" + "io/ioutil" + "os" + "path/filepath" + "strings" + + log "github.com/Sirupsen/logrus" + "github.com/matrix-org/dendrite/mediaapi/types" +) + +// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash +// 3 subdirectories are created for more manageable browsing and use the remainder as the file name. +// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty/file'. +func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) { + if len(base64Hash) < 3 { + return "", fmt.Errorf("Invalid filePath (Base64Hash too short - min 3 characters): %q", base64Hash) + } + if len(base64Hash) > 255 { + return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash) + } + + filePath, err := filepath.Abs(filepath.Join( + string(absBasePath), + string(base64Hash[0:1]), + string(base64Hash[1:2]), + string(base64Hash[2:]), + "file", + )) + if err != nil { + return "", fmt.Errorf("Unable to construct filePath: %q", err) + } + + // check if the absolute absBasePath is a prefix of the absolute filePath + // if so, no directory escape has occurred and the filePath is valid + // Note: absBasePath is already absolute + if strings.HasPrefix(filePath, string(absBasePath)) == false { + return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath) + } + + return filePath, nil +} + +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata +// The final path is based on the hash of the file. +// If the final path exists and the file size matches, the file does not need to be moved. +// In error cases where the file is not a duplicate, the caller may decide to remove the final path. +// Returns the final path of the file, whether it is a duplicate and an error. +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) { + // Note: in all error and success cases, we need to remove the temporary directory + defer RemoveDir(tmpDir, logger) + duplicate := false + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) + if err != nil { + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + // Note: The double-negative is intentional as os.IsExist(err) != !os.IsNotExist(err). + // The functions are error checkers to be used in different cases. + if stat, err = os.Stat(finalPath); !os.IsNotExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { + return types.Path(finalPath), duplicate, nil + } + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(filepath.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return types.Path(finalPath), duplicate, nil +} + +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + +// WriteTempFile writes to a new temporary file +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, "", err + } + defer tmpFile.Close() + + // The amount of data read is limited to maxFileSizeBytes. At this point, if there is more data it will be truncated. + limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) + // Hash the file data. The hash will be returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hasher := sha256.New() + teeReader := io.TeeReader(limitedReader, hasher) + bytesWritten, err := io.Copy(tmpFileWriter, teeReader) + if err != nil && err != io.EOF { + return "", -1, "", err + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil +} + +// moveFile attempts to move the file src to dst +func moveFile(src types.Path, dst types.Path) error { + dstDir := filepath.Dir(string(dst)) + + err := os.MkdirAll(dstDir, 0770) + if err != nil { + return fmt.Errorf("Failed to make directory: %q", err) + } + err = os.Rename(string(src), string(dst)) + if err != nil { + return fmt.Errorf("Failed to move directory: %q", err) + } + return nil +} + +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { + tmpDir, err := createTempDir(absBasePath) + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) + } + writer, tmpFile, err := createFileWriter(tmpDir, "content") + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) + } + return writer, tmpFile, tmpDir, nil +} + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := filepath.Join(string(baseDirectory), "tmp") + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { + return "", fmt.Errorf("Failed to create base temp dir: %v", err) + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + return "", fmt.Errorf("Failed to create temp dir: %v", err) + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// The caller should flush the writer before closing the file. +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { + filePath := filepath.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("Failed to create file: %v", err) + } + + return bufio.NewWriter(file), file, nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go index bc06d332..666a102a 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/routing/routing.go @@ -21,6 +21,7 @@ import ( "github.com/gorilla/mux" "github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/gomatrixserverlib" @@ -32,11 +33,12 @@ const pathPrefixR0 = "/_matrix/media/v1" // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // to clients which need to make outbound HTTP requests. -func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) { +func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) { apiMux := mux.NewRouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() + // FIXME: /upload should use common.MakeAuthAPI() r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse { - return writers.Upload(req, cfg) + return writers.Upload(req, cfg, db) })) activeRemoteRequests := &types.ActiveRemoteRequests{ diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go new file mode 100644 index 00000000..99df713d --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/media_repository_table.go @@ -0,0 +1,108 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" + "time" + + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) + +const mediaSchema = ` +-- The media_repository table holds metadata for each media file stored and accessible to the local server, +-- the actual file is stored separately. +CREATE TABLE IF NOT EXISTS media_repository ( + -- The id used to refer to the media. + -- For uploads to this server this is a base64-encoded sha256 hash of the file data + -- For media from remote servers, this can be any unique identifier string + media_id TEXT NOT NULL, + -- The origin of the media as requested by the client. Should be a homeserver domain. + media_origin TEXT NOT NULL, + -- The MIME-type of the media file as specified when uploading. + content_type TEXT NOT NULL, + -- Size of the media file in bytes. + file_size_bytes BIGINT NOT NULL, + -- When the content was uploaded in UNIX epoch ms. + creation_ts BIGINT NOT NULL, + -- The file name with which the media was uploaded. + upload_name TEXT NOT NULL, + -- Alternate RFC 4648 unpadded base64 encoding string representation of a SHA-256 hash sum of the file data. + base64hash TEXT NOT NULL, + -- The user who uploaded the file. Should be a Matrix user ID. + user_id TEXT NOT NULL +); +CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (media_id, media_origin); +` + +const insertMediaSQL = ` +INSERT INTO media_repository (media_id, media_origin, content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8) +` + +const selectMediaSQL = ` +SELECT content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2 +` + +type mediaStatements struct { + insertMediaStmt *sql.Stmt + selectMediaStmt *sql.Stmt +} + +func (s *mediaStatements) prepare(db *sql.DB) (err error) { + _, err = db.Exec(mediaSchema) + if err != nil { + return + } + + return statementList{ + {&s.insertMediaStmt, insertMediaSQL}, + {&s.selectMediaStmt, selectMediaSQL}, + }.prepare(db) +} + +func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error { + mediaMetadata.CreationTimestamp = types.UnixMs(time.Now().UnixNano() / 1000000) + _, err := s.insertMediaStmt.Exec( + mediaMetadata.MediaID, + mediaMetadata.Origin, + mediaMetadata.ContentType, + mediaMetadata.FileSizeBytes, + mediaMetadata.CreationTimestamp, + mediaMetadata.UploadName, + mediaMetadata.Base64Hash, + mediaMetadata.UserID, + ) + return err +} + +func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { + mediaMetadata := types.MediaMetadata{ + MediaID: mediaID, + Origin: mediaOrigin, + } + err := s.selectMediaStmt.QueryRow( + mediaMetadata.MediaID, mediaMetadata.Origin, + ).Scan( + &mediaMetadata.ContentType, + &mediaMetadata.FileSizeBytes, + &mediaMetadata.CreationTimestamp, + &mediaMetadata.UploadName, + &mediaMetadata.Base64Hash, + &mediaMetadata.UserID, + ) + return &mediaMetadata, err +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go new file mode 100644 index 00000000..a30586de --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/prepare.go @@ -0,0 +1,37 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// FIXME: This should be made common! + +package storage + +import ( + "database/sql" +) + +// a statementList is a list of SQL statements to prepare and a pointer to where to store the resulting prepared statement. +type statementList []struct { + statement **sql.Stmt + sql string +} + +// prepare the SQL for each statement in the list and assign the result to the prepared statement. +func (s statementList) prepare(db *sql.DB) (err error) { + for _, statement := range s { + if *statement.statement, err = db.Prepare(statement.sql); err != nil { + return + } + } + return +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go new file mode 100644 index 00000000..e992e073 --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/sql.go @@ -0,0 +1,33 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" +) + +type statements struct { + mediaStatements +} + +func (s *statements) prepare(db *sql.DB) error { + var err error + + if err = s.mediaStatements.prepare(db); err != nil { + return err + } + + return nil +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go new file mode 100644 index 00000000..4b86967f --- /dev/null +++ b/src/github.com/matrix-org/dendrite/mediaapi/storage/storage.go @@ -0,0 +1,56 @@ +// Copyright 2017 Vector Creations Ltd +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package storage + +import ( + "database/sql" + + // Import the postgres database driver. + _ "github.com/lib/pq" + "github.com/matrix-org/dendrite/mediaapi/types" + "github.com/matrix-org/gomatrixserverlib" +) + +// Database is used to store metadata about a repository of media files. +type Database struct { + statements statements + db *sql.DB +} + +// Open opens a postgres database. +func Open(dataSourceName string) (*Database, error) { + var d Database + var err error + if d.db, err = sql.Open("postgres", dataSourceName); err != nil { + return nil, err + } + if err = d.statements.prepare(d.db); err != nil { + return nil, err + } + return &d, nil +} + +// StoreMediaMetadata inserts the metadata about the uploaded media into the database. +// Returns an error if the combination of MediaID and Origin are not unique in the table. +func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error { + return d.statements.insertMedia(mediaMetadata) +} + +// GetMediaMetadata returns metadata about media stored on this server. +// The media could have been uploaded to this server or fetched from another server and cached here. +// Returns sql.ErrNoRows if there is no metadata associated with this media. +func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) { + return d.statements.selectMedia(mediaID, mediaOrigin) +} diff --git a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go index a3065bf8..ac18f5fe 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/types/types.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/types/types.go @@ -20,9 +20,6 @@ import ( "github.com/matrix-org/gomatrixserverlib" ) -// ContentDisposition is an HTTP Content-Disposition header string -type ContentDisposition string - // FileSizeBytes is a file size in bytes type FileSizeBytes int64 @@ -52,15 +49,14 @@ type UnixMs int64 // MediaMetadata is metadata associated with a media file type MediaMetadata struct { - MediaID MediaID - Origin gomatrixserverlib.ServerName - ContentType ContentType - ContentDisposition ContentDisposition - FileSizeBytes FileSizeBytes - CreationTimestamp UnixMs - UploadName Filename - Base64Hash Base64Hash - UserID MatrixUserID + MediaID MediaID + Origin gomatrixserverlib.ServerName + ContentType ContentType + FileSizeBytes FileSizeBytes + CreationTimestamp UnixMs + UploadName Filename + Base64Hash Base64Hash + UserID MatrixUserID } // ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 3b021357..dc525ee4 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -15,14 +15,19 @@ package writers import ( + "database/sql" "fmt" + "io" "net/http" "net/url" + "path" "strings" log "github.com/Sirupsen/logrus" "github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/mediaapi/config" + "github.com/matrix-org/dendrite/mediaapi/fileutils" + "github.com/matrix-org/dendrite/mediaapi/storage" "github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/util" ) @@ -46,13 +51,15 @@ type uploadResponse struct { // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // TODO: We should time out requests if they have not received any data within a configured timeout period. -func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { +func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse { r, resErr := parseAndValidateRequest(req, cfg) if resErr != nil { return *resErr } - // doUpload + if resErr = r.doUpload(req.Body, cfg, db); resErr != nil { + return *resErr + } return util.JSONResponse{ Code: 200, @@ -68,20 +75,17 @@ func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { if req.Method != "POST" { return nil, &util.JSONResponse{ - Code: 400, + Code: 405, JSON: jsonerror.Unknown("HTTP request method must be POST."), } } - // authenticate user - r := &uploadRequest{ MediaMetadata: &types.MediaMetadata{ - Origin: cfg.ServerName, - ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), - FileSizeBytes: types.FileSizeBytes(req.ContentLength), - ContentType: types.ContentType(req.Header.Get("Content-Type")), - UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), + Origin: cfg.ServerName, + FileSizeBytes: types.FileSizeBytes(req.ContentLength), + ContentType: types.ContentType(req.Header.Get("Content-Type")), + UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), }, Logger: util.GetLogger(req.Context()), } @@ -90,26 +94,83 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe return nil, resErr } - if len(r.MediaMetadata.UploadName) > 0 { - r.MediaMetadata.ContentDisposition = types.ContentDisposition( - "inline; filename*=utf-8''" + string(r.MediaMetadata.UploadName), - ) + return r, nil +} + +func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db *storage.Database) *util.JSONResponse { + r.Logger.WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + }).Info("Uploading file") + + // The file data is hashed and the hash is used as the MediaID. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + // Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK. + hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath) + if err != nil { + r.Logger.WithError(err).WithFields(log.Fields{ + "Origin": r.MediaMetadata.Origin, + "MediaID": r.MediaMetadata.MediaID, + "MaxFileSizeBytes": cfg.MaxFileSizeBytes, + }).Warn("Error while transferring file") + fileutils.RemoveDir(tmpDir, r.Logger) + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("Failed to upload"), + } } - return r, nil + r.MediaMetadata.FileSizeBytes = bytesWritten + r.MediaMetadata.Base64Hash = hash + r.MediaMetadata.MediaID = types.MediaID(hash) + + r.Logger.WithFields(log.Fields{ + "MediaID": r.MediaMetadata.MediaID, + "Origin": r.MediaMetadata.Origin, + "Base64Hash": r.MediaMetadata.Base64Hash, + "UploadName": r.MediaMetadata.UploadName, + "FileSizeBytes": r.MediaMetadata.FileSizeBytes, + "Content-Type": r.MediaMetadata.ContentType, + }).Info("File uploaded") + + // check if we already have a record of the media in our database and if so, we can remove the temporary directory + mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin) + if err == nil { + r.MediaMetadata = mediaMetadata + fileutils.RemoveDir(tmpDir, r.Logger) + return &util.JSONResponse{ + Code: 200, + JSON: uploadResponse{ + ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID), + }, + } + } else if err != sql.ErrNoRows { + r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database") + } + + // TODO: generate thumbnails + + if resErr := r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db); resErr != nil { + return resErr + } + + return nil } // Validate validates the uploadRequest fields func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { if r.MediaMetadata.FileSizeBytes < 1 { return &util.JSONResponse{ - Code: 400, + Code: 411, JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), } } if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { return &util.JSONResponse{ - Code: 400, + Code: 413, JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), } } @@ -149,3 +210,38 @@ func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSO } return nil } + +// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database +// See getPathFromMediaMetadata in fileutils for details of the final path. +// The order of operations is important as it avoids metadata entering the database before the file +// is ready, and if we fail to move the file, it never gets added to the database. +// Returns a util.JSONResponse error and cleans up directories in case of error. +func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse { + finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger) + if err != nil { + r.Logger.WithError(err).Error("Failed to move file.") + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("Failed to upload"), + } + } + if duplicate { + r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate") + } + + if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil { + r.Logger.WithError(err).Warn("Failed to store metadata") + // If the file is a duplicate (has the same hash as an existing file) then + // there is valid metadata in the database for that file. As such we only + // remove the file if it is not a duplicate. + if duplicate == false { + fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger) + } + return &util.JSONResponse{ + Code: 400, + JSON: jsonerror.Unknown("Failed to upload"), + } + } + + return nil +}