Merge pull request #124 from matrix-org/rob/media-api-upload

/upload handler and storage database
main
Robert Swain 2017-05-31 12:32:10 +02:00 committed by GitHub
commit 3f7ef7690b
10 changed files with 550 additions and 33 deletions

View File

@ -23,6 +23,7 @@ import (
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/routing" "github.com/matrix-org/dendrite/mediaapi/routing"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
@ -69,6 +70,11 @@ func main() {
DataSource: dataSource, DataSource: dataSource,
} }
db, err := storage.Open(cfg.DataSource)
if err != nil {
log.WithError(err).Panic("Failed to open database")
}
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"BASE_PATH": absBasePath, "BASE_PATH": absBasePath,
"BIND_ADDRESS": bindAddr, "BIND_ADDRESS": bindAddr,
@ -78,6 +84,6 @@ func main() {
"SERVER_NAME": serverName, "SERVER_NAME": serverName,
}).Info("Starting mediaapi") }).Info("Starting mediaapi")
routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db)
log.Fatal(http.ListenAndServe(bindAddr, nil)) log.Fatal(http.ListenAndServe(bindAddr, nil))
} }

View File

@ -26,8 +26,8 @@ type MediaAPI struct {
// The absolute base path to where media files will be stored. // The absolute base path to where media files will be stored.
AbsBasePath types.Path `yaml:"abs_base_path"` AbsBasePath types.Path `yaml:"abs_base_path"`
// The maximum file size in bytes that is allowed to be stored on this server. // The maximum file size in bytes that is allowed to be stored on this server.
// Note that remote files larger than this can still be proxied to a client, they will just not be cached.
// Note: if MaxFileSizeBytes is set to 0, the size is unlimited. // Note: if MaxFileSizeBytes is set to 0, the size is unlimited.
// Note: if max_file_size_bytes is not set, it will default to 10485760 (10MB)
MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"`
// The postgres connection config for connecting to the database e.g a postgres:// URI // The postgres connection config for connecting to the database e.g a postgres:// URI
DataSource string `yaml:"database"` DataSource string `yaml:"database"`

View File

@ -0,0 +1,183 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fileutils
import (
"bufio"
"crypto/sha256"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/mediaapi/types"
)
// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash
// 3 subdirectories are created for more manageable browsing and use the remainder as the file name.
// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty/file'.
func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) {
if len(base64Hash) < 3 {
return "", fmt.Errorf("Invalid filePath (Base64Hash too short - min 3 characters): %q", base64Hash)
}
if len(base64Hash) > 255 {
return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash)
}
filePath, err := filepath.Abs(filepath.Join(
string(absBasePath),
string(base64Hash[0:1]),
string(base64Hash[1:2]),
string(base64Hash[2:]),
"file",
))
if err != nil {
return "", fmt.Errorf("Unable to construct filePath: %q", err)
}
// check if the absolute absBasePath is a prefix of the absolute filePath
// if so, no directory escape has occurred and the filePath is valid
// Note: absBasePath is already absolute
if strings.HasPrefix(filePath, string(absBasePath)) == false {
return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath)
}
return filePath, nil
}
// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata
// The final path is based on the hash of the file.
// If the final path exists and the file size matches, the file does not need to be moved.
// In error cases where the file is not a duplicate, the caller may decide to remove the final path.
// Returns the final path of the file, whether it is a duplicate and an error.
func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) {
// Note: in all error and success cases, we need to remove the temporary directory
defer RemoveDir(tmpDir, logger)
duplicate := false
finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath)
if err != nil {
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
}
var stat os.FileInfo
// Note: The double-negative is intentional as os.IsExist(err) != !os.IsNotExist(err).
// The functions are error checkers to be used in different cases.
if stat, err = os.Stat(finalPath); !os.IsNotExist(err) {
duplicate = true
if stat.Size() == int64(mediaMetadata.FileSizeBytes) {
return types.Path(finalPath), duplicate, nil
}
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
}
err = moveFile(
types.Path(filepath.Join(string(tmpDir), "content")),
types.Path(finalPath),
)
if err != nil {
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
}
return types.Path(finalPath), duplicate, nil
}
// RemoveDir removes a directory and logs a warning in case of errors
func RemoveDir(dir types.Path, logger *log.Entry) {
dirErr := os.RemoveAll(string(dir))
if dirErr != nil {
logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory")
}
}
// WriteTempFile writes to a new temporary file
func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) {
tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath)
if err != nil {
return "", -1, "", err
}
defer tmpFile.Close()
// The amount of data read is limited to maxFileSizeBytes. At this point, if there is more data it will be truncated.
limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes))
// Hash the file data. The hash will be returned. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
hasher := sha256.New()
teeReader := io.TeeReader(limitedReader, hasher)
bytesWritten, err := io.Copy(tmpFileWriter, teeReader)
if err != nil && err != io.EOF {
return "", -1, "", err
}
tmpFileWriter.Flush()
hash := hasher.Sum(nil)
return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil
}
// moveFile attempts to move the file src to dst
func moveFile(src types.Path, dst types.Path) error {
dstDir := filepath.Dir(string(dst))
err := os.MkdirAll(dstDir, 0770)
if err != nil {
return fmt.Errorf("Failed to make directory: %q", err)
}
err = os.Rename(string(src), string(dst))
if err != nil {
return fmt.Errorf("Failed to move directory: %q", err)
}
return nil
}
func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) {
tmpDir, err := createTempDir(absBasePath)
if err != nil {
return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err)
}
writer, tmpFile, err := createFileWriter(tmpDir, "content")
if err != nil {
return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err)
}
return writer, tmpFile, tmpDir, nil
}
// createTempDir creates a tmp/<random string> directory within baseDirectory and returns its path
func createTempDir(baseDirectory types.Path) (types.Path, error) {
baseTmpDir := filepath.Join(string(baseDirectory), "tmp")
if err := os.MkdirAll(baseTmpDir, 0770); err != nil {
return "", fmt.Errorf("Failed to create base temp dir: %v", err)
}
tmpDir, err := ioutil.TempDir(baseTmpDir, "")
if err != nil {
return "", fmt.Errorf("Failed to create temp dir: %v", err)
}
return types.Path(tmpDir), nil
}
// createFileWriter creates a buffered file writer with a new file at directory/filename
// The caller should flush the writer before closing the file.
// Returns the file handle as it needs to be closed when writing is complete
func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) {
filePath := filepath.Join(string(directory), string(filename))
file, err := os.Create(filePath)
if err != nil {
return nil, nil, fmt.Errorf("Failed to create file: %v", err)
}
return bufio.NewWriter(file), file, nil
}

View File

@ -21,6 +21,7 @@ import (
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/dendrite/mediaapi/writers"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
@ -32,11 +33,12 @@ const pathPrefixR0 = "/_matrix/media/v1"
// Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client
// to clients which need to make outbound HTTP requests. // to clients which need to make outbound HTTP requests.
func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) { func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) {
apiMux := mux.NewRouter() apiMux := mux.NewRouter()
r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter()
// FIXME: /upload should use common.MakeAuthAPI()
r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse { r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse {
return writers.Upload(req, cfg) return writers.Upload(req, cfg, db)
})) }))
activeRemoteRequests := &types.ActiveRemoteRequests{ activeRemoteRequests := &types.ActiveRemoteRequests{

View File

@ -0,0 +1,108 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
"time"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
)
const mediaSchema = `
-- The media_repository table holds metadata for each media file stored and accessible to the local server,
-- the actual file is stored separately.
CREATE TABLE IF NOT EXISTS media_repository (
-- The id used to refer to the media.
-- For uploads to this server this is a base64-encoded sha256 hash of the file data
-- For media from remote servers, this can be any unique identifier string
media_id TEXT NOT NULL,
-- The origin of the media as requested by the client. Should be a homeserver domain.
media_origin TEXT NOT NULL,
-- The MIME-type of the media file as specified when uploading.
content_type TEXT NOT NULL,
-- Size of the media file in bytes.
file_size_bytes BIGINT NOT NULL,
-- When the content was uploaded in UNIX epoch ms.
creation_ts BIGINT NOT NULL,
-- The file name with which the media was uploaded.
upload_name TEXT NOT NULL,
-- Alternate RFC 4648 unpadded base64 encoding string representation of a SHA-256 hash sum of the file data.
base64hash TEXT NOT NULL,
-- The user who uploaded the file. Should be a Matrix user ID.
user_id TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (media_id, media_origin);
`
const insertMediaSQL = `
INSERT INTO media_repository (media_id, media_origin, content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`
const selectMediaSQL = `
SELECT content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2
`
type mediaStatements struct {
insertMediaStmt *sql.Stmt
selectMediaStmt *sql.Stmt
}
func (s *mediaStatements) prepare(db *sql.DB) (err error) {
_, err = db.Exec(mediaSchema)
if err != nil {
return
}
return statementList{
{&s.insertMediaStmt, insertMediaSQL},
{&s.selectMediaStmt, selectMediaSQL},
}.prepare(db)
}
func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error {
mediaMetadata.CreationTimestamp = types.UnixMs(time.Now().UnixNano() / 1000000)
_, err := s.insertMediaStmt.Exec(
mediaMetadata.MediaID,
mediaMetadata.Origin,
mediaMetadata.ContentType,
mediaMetadata.FileSizeBytes,
mediaMetadata.CreationTimestamp,
mediaMetadata.UploadName,
mediaMetadata.Base64Hash,
mediaMetadata.UserID,
)
return err
}
func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) {
mediaMetadata := types.MediaMetadata{
MediaID: mediaID,
Origin: mediaOrigin,
}
err := s.selectMediaStmt.QueryRow(
mediaMetadata.MediaID, mediaMetadata.Origin,
).Scan(
&mediaMetadata.ContentType,
&mediaMetadata.FileSizeBytes,
&mediaMetadata.CreationTimestamp,
&mediaMetadata.UploadName,
&mediaMetadata.Base64Hash,
&mediaMetadata.UserID,
)
return &mediaMetadata, err
}

View File

@ -0,0 +1,37 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// FIXME: This should be made common!
package storage
import (
"database/sql"
)
// a statementList is a list of SQL statements to prepare and a pointer to where to store the resulting prepared statement.
type statementList []struct {
statement **sql.Stmt
sql string
}
// prepare the SQL for each statement in the list and assign the result to the prepared statement.
func (s statementList) prepare(db *sql.DB) (err error) {
for _, statement := range s {
if *statement.statement, err = db.Prepare(statement.sql); err != nil {
return
}
}
return
}

View File

@ -0,0 +1,33 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
)
type statements struct {
mediaStatements
}
func (s *statements) prepare(db *sql.DB) error {
var err error
if err = s.mediaStatements.prepare(db); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,56 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
// Import the postgres database driver.
_ "github.com/lib/pq"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
)
// Database is used to store metadata about a repository of media files.
type Database struct {
statements statements
db *sql.DB
}
// Open opens a postgres database.
func Open(dataSourceName string) (*Database, error) {
var d Database
var err error
if d.db, err = sql.Open("postgres", dataSourceName); err != nil {
return nil, err
}
if err = d.statements.prepare(d.db); err != nil {
return nil, err
}
return &d, nil
}
// StoreMediaMetadata inserts the metadata about the uploaded media into the database.
// Returns an error if the combination of MediaID and Origin are not unique in the table.
func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error {
return d.statements.insertMedia(mediaMetadata)
}
// GetMediaMetadata returns metadata about media stored on this server.
// The media could have been uploaded to this server or fetched from another server and cached here.
// Returns sql.ErrNoRows if there is no metadata associated with this media.
func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) {
return d.statements.selectMedia(mediaID, mediaOrigin)
}

View File

@ -20,9 +20,6 @@ import (
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
) )
// ContentDisposition is an HTTP Content-Disposition header string
type ContentDisposition string
// FileSizeBytes is a file size in bytes // FileSizeBytes is a file size in bytes
type FileSizeBytes int64 type FileSizeBytes int64
@ -55,7 +52,6 @@ type MediaMetadata struct {
MediaID MediaID MediaID MediaID
Origin gomatrixserverlib.ServerName Origin gomatrixserverlib.ServerName
ContentType ContentType ContentType ContentType
ContentDisposition ContentDisposition
FileSizeBytes FileSizeBytes FileSizeBytes FileSizeBytes
CreationTimestamp UnixMs CreationTimestamp UnixMs
UploadName Filename UploadName Filename

View File

@ -15,14 +15,19 @@
package writers package writers
import ( import (
"database/sql"
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/url" "net/url"
"path"
"strings" "strings"
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/fileutils"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/util" "github.com/matrix-org/util"
) )
@ -46,13 +51,15 @@ type uploadResponse struct {
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period. // TODO: We should time out requests if they have not received any data within a configured timeout period.
func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse {
r, resErr := parseAndValidateRequest(req, cfg) r, resErr := parseAndValidateRequest(req, cfg)
if resErr != nil { if resErr != nil {
return *resErr return *resErr
} }
// doUpload if resErr = r.doUpload(req.Body, cfg, db); resErr != nil {
return *resErr
}
return util.JSONResponse{ return util.JSONResponse{
Code: 200, Code: 200,
@ -68,17 +75,14 @@ func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse {
func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) {
if req.Method != "POST" { if req.Method != "POST" {
return nil, &util.JSONResponse{ return nil, &util.JSONResponse{
Code: 400, Code: 405,
JSON: jsonerror.Unknown("HTTP request method must be POST."), JSON: jsonerror.Unknown("HTTP request method must be POST."),
} }
} }
// authenticate user
r := &uploadRequest{ r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{ MediaMetadata: &types.MediaMetadata{
Origin: cfg.ServerName, Origin: cfg.ServerName,
ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")),
FileSizeBytes: types.FileSizeBytes(req.ContentLength), FileSizeBytes: types.FileSizeBytes(req.ContentLength),
ContentType: types.ContentType(req.Header.Get("Content-Type")), ContentType: types.ContentType(req.Header.Get("Content-Type")),
UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))), UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))),
@ -90,26 +94,83 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe
return nil, resErr return nil, resErr
} }
if len(r.MediaMetadata.UploadName) > 0 { return r, nil
r.MediaMetadata.ContentDisposition = types.ContentDisposition(
"inline; filename*=utf-8''" + string(r.MediaMetadata.UploadName),
)
} }
return r, nil func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db *storage.Database) *util.JSONResponse {
r.Logger.WithFields(log.Fields{
"Origin": r.MediaMetadata.Origin,
"UploadName": r.MediaMetadata.UploadName,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"Content-Type": r.MediaMetadata.ContentType,
}).Info("Uploading file")
// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath)
if err != nil {
r.Logger.WithError(err).WithFields(log.Fields{
"Origin": r.MediaMetadata.Origin,
"MediaID": r.MediaMetadata.MediaID,
"MaxFileSizeBytes": cfg.MaxFileSizeBytes,
}).Warn("Error while transferring file")
fileutils.RemoveDir(tmpDir, r.Logger)
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
}
r.MediaMetadata.FileSizeBytes = bytesWritten
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.MediaID = types.MediaID(hash)
r.Logger.WithFields(log.Fields{
"MediaID": r.MediaMetadata.MediaID,
"Origin": r.MediaMetadata.Origin,
"Base64Hash": r.MediaMetadata.Base64Hash,
"UploadName": r.MediaMetadata.UploadName,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"Content-Type": r.MediaMetadata.ContentType,
}).Info("File uploaded")
// check if we already have a record of the media in our database and if so, we can remove the temporary directory
mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
if err == nil {
r.MediaMetadata = mediaMetadata
fileutils.RemoveDir(tmpDir, r.Logger)
return &util.JSONResponse{
Code: 200,
JSON: uploadResponse{
ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID),
},
}
} else if err != sql.ErrNoRows {
r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database")
}
// TODO: generate thumbnails
if resErr := r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db); resErr != nil {
return resErr
}
return nil
} }
// Validate validates the uploadRequest fields // Validate validates the uploadRequest fields
func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse {
if r.MediaMetadata.FileSizeBytes < 1 { if r.MediaMetadata.FileSizeBytes < 1 {
return &util.JSONResponse{ return &util.JSONResponse{
Code: 400, Code: 411,
JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."),
} }
} }
if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes {
return &util.JSONResponse{ return &util.JSONResponse{
Code: 400, Code: 413,
JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
} }
} }
@ -149,3 +210,38 @@ func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSO
} }
return nil return nil
} }
// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
// See getPathFromMediaMetadata in fileutils for details of the final path.
// The order of operations is important as it avoids metadata entering the database before the file
// is ready, and if we fail to move the file, it never gets added to the database.
// Returns a util.JSONResponse error and cleans up directories in case of error.
func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse {
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
if err != nil {
r.Logger.WithError(err).Error("Failed to move file.")
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
}
if duplicate {
r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
}
if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil {
r.Logger.WithError(err).Warn("Failed to store metadata")
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
if duplicate == false {
fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
}
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
}
return nil
}