Merge pull request #124 from matrix-org/rob/media-api-upload

/upload handler and storage database
main
Robert Swain 2017-05-31 12:32:10 +02:00 committed by GitHub
commit 3f7ef7690b
10 changed files with 550 additions and 33 deletions

View File

@ -23,6 +23,7 @@ import (
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/routing" "github.com/matrix-org/dendrite/mediaapi/routing"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
@ -69,6 +70,11 @@ func main() {
DataSource: dataSource, DataSource: dataSource,
} }
db, err := storage.Open(cfg.DataSource)
if err != nil {
log.WithError(err).Panic("Failed to open database")
}
log.WithFields(log.Fields{ log.WithFields(log.Fields{
"BASE_PATH": absBasePath, "BASE_PATH": absBasePath,
"BIND_ADDRESS": bindAddr, "BIND_ADDRESS": bindAddr,
@ -78,6 +84,6 @@ func main() {
"SERVER_NAME": serverName, "SERVER_NAME": serverName,
}).Info("Starting mediaapi") }).Info("Starting mediaapi")
routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg) routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg, db)
log.Fatal(http.ListenAndServe(bindAddr, nil)) log.Fatal(http.ListenAndServe(bindAddr, nil))
} }

View File

@ -26,8 +26,8 @@ type MediaAPI struct {
// The absolute base path to where media files will be stored. // The absolute base path to where media files will be stored.
AbsBasePath types.Path `yaml:"abs_base_path"` AbsBasePath types.Path `yaml:"abs_base_path"`
// The maximum file size in bytes that is allowed to be stored on this server. // The maximum file size in bytes that is allowed to be stored on this server.
// Note that remote files larger than this can still be proxied to a client, they will just not be cached.
// Note: if MaxFileSizeBytes is set to 0, the size is unlimited. // Note: if MaxFileSizeBytes is set to 0, the size is unlimited.
// Note: if max_file_size_bytes is not set, it will default to 10485760 (10MB)
MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"` MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"`
// The postgres connection config for connecting to the database e.g a postgres:// URI // The postgres connection config for connecting to the database e.g a postgres:// URI
DataSource string `yaml:"database"` DataSource string `yaml:"database"`

View File

@ -0,0 +1,183 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fileutils
import (
"bufio"
"crypto/sha256"
"encoding/base64"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strings"
log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/mediaapi/types"
)
// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash
// 3 subdirectories are created for more manageable browsing and use the remainder as the file name.
// For example, if Base64Hash is 'qwerty', the path will be 'q/w/erty/file'.
func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) (string, error) {
if len(base64Hash) < 3 {
return "", fmt.Errorf("Invalid filePath (Base64Hash too short - min 3 characters): %q", base64Hash)
}
if len(base64Hash) > 255 {
return "", fmt.Errorf("Invalid filePath (Base64Hash too long - max 255 characters): %q", base64Hash)
}
filePath, err := filepath.Abs(filepath.Join(
string(absBasePath),
string(base64Hash[0:1]),
string(base64Hash[1:2]),
string(base64Hash[2:]),
"file",
))
if err != nil {
return "", fmt.Errorf("Unable to construct filePath: %q", err)
}
// check if the absolute absBasePath is a prefix of the absolute filePath
// if so, no directory escape has occurred and the filePath is valid
// Note: absBasePath is already absolute
if strings.HasPrefix(filePath, string(absBasePath)) == false {
return "", fmt.Errorf("Invalid filePath (not within absBasePath %v): %v", absBasePath, filePath)
}
return filePath, nil
}
// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata
// The final path is based on the hash of the file.
// If the final path exists and the file size matches, the file does not need to be moved.
// In error cases where the file is not a duplicate, the caller may decide to remove the final path.
// Returns the final path of the file, whether it is a duplicate and an error.
func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) {
// Note: in all error and success cases, we need to remove the temporary directory
defer RemoveDir(tmpDir, logger)
duplicate := false
finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath)
if err != nil {
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
}
var stat os.FileInfo
// Note: The double-negative is intentional as os.IsExist(err) != !os.IsNotExist(err).
// The functions are error checkers to be used in different cases.
if stat, err = os.Stat(finalPath); !os.IsNotExist(err) {
duplicate = true
if stat.Size() == int64(mediaMetadata.FileSizeBytes) {
return types.Path(finalPath), duplicate, nil
}
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
}
err = moveFile(
types.Path(filepath.Join(string(tmpDir), "content")),
types.Path(finalPath),
)
if err != nil {
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
}
return types.Path(finalPath), duplicate, nil
}
// RemoveDir removes a directory and logs a warning in case of errors
func RemoveDir(dir types.Path, logger *log.Entry) {
dirErr := os.RemoveAll(string(dir))
if dirErr != nil {
logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory")
}
}
// WriteTempFile writes to a new temporary file
func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) {
tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath)
if err != nil {
return "", -1, "", err
}
defer tmpFile.Close()
// The amount of data read is limited to maxFileSizeBytes. At this point, if there is more data it will be truncated.
limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes))
// Hash the file data. The hash will be returned. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
hasher := sha256.New()
teeReader := io.TeeReader(limitedReader, hasher)
bytesWritten, err := io.Copy(tmpFileWriter, teeReader)
if err != nil && err != io.EOF {
return "", -1, "", err
}
tmpFileWriter.Flush()
hash := hasher.Sum(nil)
return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil
}
// moveFile attempts to move the file src to dst
func moveFile(src types.Path, dst types.Path) error {
dstDir := filepath.Dir(string(dst))
err := os.MkdirAll(dstDir, 0770)
if err != nil {
return fmt.Errorf("Failed to make directory: %q", err)
}
err = os.Rename(string(src), string(dst))
if err != nil {
return fmt.Errorf("Failed to move directory: %q", err)
}
return nil
}
func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) {
tmpDir, err := createTempDir(absBasePath)
if err != nil {
return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err)
}
writer, tmpFile, err := createFileWriter(tmpDir, "content")
if err != nil {
return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err)
}
return writer, tmpFile, tmpDir, nil
}
// createTempDir creates a tmp/<random string> directory within baseDirectory and returns its path
func createTempDir(baseDirectory types.Path) (types.Path, error) {
baseTmpDir := filepath.Join(string(baseDirectory), "tmp")
if err := os.MkdirAll(baseTmpDir, 0770); err != nil {
return "", fmt.Errorf("Failed to create base temp dir: %v", err)
}
tmpDir, err := ioutil.TempDir(baseTmpDir, "")
if err != nil {
return "", fmt.Errorf("Failed to create temp dir: %v", err)
}
return types.Path(tmpDir), nil
}
// createFileWriter creates a buffered file writer with a new file at directory/filename
// The caller should flush the writer before closing the file.
// Returns the file handle as it needs to be closed when writing is complete
func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) {
filePath := filepath.Join(string(directory), string(filename))
file, err := os.Create(filePath)
if err != nil {
return nil, nil, fmt.Errorf("Failed to create file: %v", err)
}
return bufio.NewWriter(file), file, nil
}

View File

@ -21,6 +21,7 @@ import (
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/matrix-org/dendrite/common" "github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/dendrite/mediaapi/writers" "github.com/matrix-org/dendrite/mediaapi/writers"
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
@ -32,11 +33,12 @@ const pathPrefixR0 = "/_matrix/media/v1"
// Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client // Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client
// to clients which need to make outbound HTTP requests. // to clients which need to make outbound HTTP requests.
func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) { func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI, db *storage.Database) {
apiMux := mux.NewRouter() apiMux := mux.NewRouter()
r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter() r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter()
// FIXME: /upload should use common.MakeAuthAPI()
r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse { r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse {
return writers.Upload(req, cfg) return writers.Upload(req, cfg, db)
})) }))
activeRemoteRequests := &types.ActiveRemoteRequests{ activeRemoteRequests := &types.ActiveRemoteRequests{

View File

@ -0,0 +1,108 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
"time"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
)
const mediaSchema = `
-- The media_repository table holds metadata for each media file stored and accessible to the local server,
-- the actual file is stored separately.
CREATE TABLE IF NOT EXISTS media_repository (
-- The id used to refer to the media.
-- For uploads to this server this is a base64-encoded sha256 hash of the file data
-- For media from remote servers, this can be any unique identifier string
media_id TEXT NOT NULL,
-- The origin of the media as requested by the client. Should be a homeserver domain.
media_origin TEXT NOT NULL,
-- The MIME-type of the media file as specified when uploading.
content_type TEXT NOT NULL,
-- Size of the media file in bytes.
file_size_bytes BIGINT NOT NULL,
-- When the content was uploaded in UNIX epoch ms.
creation_ts BIGINT NOT NULL,
-- The file name with which the media was uploaded.
upload_name TEXT NOT NULL,
-- Alternate RFC 4648 unpadded base64 encoding string representation of a SHA-256 hash sum of the file data.
base64hash TEXT NOT NULL,
-- The user who uploaded the file. Should be a Matrix user ID.
user_id TEXT NOT NULL
);
CREATE UNIQUE INDEX IF NOT EXISTS media_repository_index ON media_repository (media_id, media_origin);
`
const insertMediaSQL = `
INSERT INTO media_repository (media_id, media_origin, content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
`
const selectMediaSQL = `
SELECT content_type, file_size_bytes, creation_ts, upload_name, base64hash, user_id FROM media_repository WHERE media_id = $1 AND media_origin = $2
`
type mediaStatements struct {
insertMediaStmt *sql.Stmt
selectMediaStmt *sql.Stmt
}
func (s *mediaStatements) prepare(db *sql.DB) (err error) {
_, err = db.Exec(mediaSchema)
if err != nil {
return
}
return statementList{
{&s.insertMediaStmt, insertMediaSQL},
{&s.selectMediaStmt, selectMediaSQL},
}.prepare(db)
}
func (s *mediaStatements) insertMedia(mediaMetadata *types.MediaMetadata) error {
mediaMetadata.CreationTimestamp = types.UnixMs(time.Now().UnixNano() / 1000000)
_, err := s.insertMediaStmt.Exec(
mediaMetadata.MediaID,
mediaMetadata.Origin,
mediaMetadata.ContentType,
mediaMetadata.FileSizeBytes,
mediaMetadata.CreationTimestamp,
mediaMetadata.UploadName,
mediaMetadata.Base64Hash,
mediaMetadata.UserID,
)
return err
}
func (s *mediaStatements) selectMedia(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) {
mediaMetadata := types.MediaMetadata{
MediaID: mediaID,
Origin: mediaOrigin,
}
err := s.selectMediaStmt.QueryRow(
mediaMetadata.MediaID, mediaMetadata.Origin,
).Scan(
&mediaMetadata.ContentType,
&mediaMetadata.FileSizeBytes,
&mediaMetadata.CreationTimestamp,
&mediaMetadata.UploadName,
&mediaMetadata.Base64Hash,
&mediaMetadata.UserID,
)
return &mediaMetadata, err
}

View File

@ -0,0 +1,37 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// FIXME: This should be made common!
package storage
import (
"database/sql"
)
// a statementList is a list of SQL statements to prepare and a pointer to where to store the resulting prepared statement.
type statementList []struct {
statement **sql.Stmt
sql string
}
// prepare the SQL for each statement in the list and assign the result to the prepared statement.
func (s statementList) prepare(db *sql.DB) (err error) {
for _, statement := range s {
if *statement.statement, err = db.Prepare(statement.sql); err != nil {
return
}
}
return
}

View File

@ -0,0 +1,33 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
)
type statements struct {
mediaStatements
}
func (s *statements) prepare(db *sql.DB) error {
var err error
if err = s.mediaStatements.prepare(db); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,56 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"database/sql"
// Import the postgres database driver.
_ "github.com/lib/pq"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
)
// Database is used to store metadata about a repository of media files.
type Database struct {
statements statements
db *sql.DB
}
// Open opens a postgres database.
func Open(dataSourceName string) (*Database, error) {
var d Database
var err error
if d.db, err = sql.Open("postgres", dataSourceName); err != nil {
return nil, err
}
if err = d.statements.prepare(d.db); err != nil {
return nil, err
}
return &d, nil
}
// StoreMediaMetadata inserts the metadata about the uploaded media into the database.
// Returns an error if the combination of MediaID and Origin are not unique in the table.
func (d *Database) StoreMediaMetadata(mediaMetadata *types.MediaMetadata) error {
return d.statements.insertMedia(mediaMetadata)
}
// GetMediaMetadata returns metadata about media stored on this server.
// The media could have been uploaded to this server or fetched from another server and cached here.
// Returns sql.ErrNoRows if there is no metadata associated with this media.
func (d *Database) GetMediaMetadata(mediaID types.MediaID, mediaOrigin gomatrixserverlib.ServerName) (*types.MediaMetadata, error) {
return d.statements.selectMedia(mediaID, mediaOrigin)
}

View File

@ -20,9 +20,6 @@ import (
"github.com/matrix-org/gomatrixserverlib" "github.com/matrix-org/gomatrixserverlib"
) )
// ContentDisposition is an HTTP Content-Disposition header string
type ContentDisposition string
// FileSizeBytes is a file size in bytes // FileSizeBytes is a file size in bytes
type FileSizeBytes int64 type FileSizeBytes int64
@ -52,15 +49,14 @@ type UnixMs int64
// MediaMetadata is metadata associated with a media file // MediaMetadata is metadata associated with a media file
type MediaMetadata struct { type MediaMetadata struct {
MediaID MediaID MediaID MediaID
Origin gomatrixserverlib.ServerName Origin gomatrixserverlib.ServerName
ContentType ContentType ContentType ContentType
ContentDisposition ContentDisposition FileSizeBytes FileSizeBytes
FileSizeBytes FileSizeBytes CreationTimestamp UnixMs
CreationTimestamp UnixMs UploadName Filename
UploadName Filename Base64Hash Base64Hash
Base64Hash Base64Hash UserID MatrixUserID
UserID MatrixUserID
} }
// ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers // ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers

View File

@ -15,14 +15,19 @@
package writers package writers
import ( import (
"database/sql"
"fmt" "fmt"
"io"
"net/http" "net/http"
"net/url" "net/url"
"path"
"strings" "strings"
log "github.com/Sirupsen/logrus" log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/clientapi/jsonerror" "github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/mediaapi/config" "github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/fileutils"
"github.com/matrix-org/dendrite/mediaapi/storage"
"github.com/matrix-org/dendrite/mediaapi/types" "github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/util" "github.com/matrix-org/util"
) )
@ -46,13 +51,15 @@ type uploadResponse struct {
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large. // This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory. // Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period. // TODO: We should time out requests if they have not received any data within a configured timeout period.
func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse { func Upload(req *http.Request, cfg *config.MediaAPI, db *storage.Database) util.JSONResponse {
r, resErr := parseAndValidateRequest(req, cfg) r, resErr := parseAndValidateRequest(req, cfg)
if resErr != nil { if resErr != nil {
return *resErr return *resErr
} }
// doUpload if resErr = r.doUpload(req.Body, cfg, db); resErr != nil {
return *resErr
}
return util.JSONResponse{ return util.JSONResponse{
Code: 200, Code: 200,
@ -68,20 +75,17 @@ func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse {
func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) { func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) {
if req.Method != "POST" { if req.Method != "POST" {
return nil, &util.JSONResponse{ return nil, &util.JSONResponse{
Code: 400, Code: 405,
JSON: jsonerror.Unknown("HTTP request method must be POST."), JSON: jsonerror.Unknown("HTTP request method must be POST."),
} }
} }
// authenticate user
r := &uploadRequest{ r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{ MediaMetadata: &types.MediaMetadata{
Origin: cfg.ServerName, Origin: cfg.ServerName,
ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")), FileSizeBytes: types.FileSizeBytes(req.ContentLength),
FileSizeBytes: types.FileSizeBytes(req.ContentLength), ContentType: types.ContentType(req.Header.Get("Content-Type")),
ContentType: types.ContentType(req.Header.Get("Content-Type")), UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))),
UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))),
}, },
Logger: util.GetLogger(req.Context()), Logger: util.GetLogger(req.Context()),
} }
@ -90,26 +94,83 @@ func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRe
return nil, resErr return nil, resErr
} }
if len(r.MediaMetadata.UploadName) > 0 { return r, nil
r.MediaMetadata.ContentDisposition = types.ContentDisposition( }
"inline; filename*=utf-8''" + string(r.MediaMetadata.UploadName),
) func (r *uploadRequest) doUpload(reqReader io.Reader, cfg *config.MediaAPI, db *storage.Database) *util.JSONResponse {
r.Logger.WithFields(log.Fields{
"Origin": r.MediaMetadata.Origin,
"UploadName": r.MediaMetadata.UploadName,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"Content-Type": r.MediaMetadata.ContentType,
}).Info("Uploading file")
// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
hash, bytesWritten, tmpDir, err := fileutils.WriteTempFile(reqReader, cfg.MaxFileSizeBytes, cfg.AbsBasePath)
if err != nil {
r.Logger.WithError(err).WithFields(log.Fields{
"Origin": r.MediaMetadata.Origin,
"MediaID": r.MediaMetadata.MediaID,
"MaxFileSizeBytes": cfg.MaxFileSizeBytes,
}).Warn("Error while transferring file")
fileutils.RemoveDir(tmpDir, r.Logger)
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
} }
return r, nil r.MediaMetadata.FileSizeBytes = bytesWritten
r.MediaMetadata.Base64Hash = hash
r.MediaMetadata.MediaID = types.MediaID(hash)
r.Logger.WithFields(log.Fields{
"MediaID": r.MediaMetadata.MediaID,
"Origin": r.MediaMetadata.Origin,
"Base64Hash": r.MediaMetadata.Base64Hash,
"UploadName": r.MediaMetadata.UploadName,
"FileSizeBytes": r.MediaMetadata.FileSizeBytes,
"Content-Type": r.MediaMetadata.ContentType,
}).Info("File uploaded")
// check if we already have a record of the media in our database and if so, we can remove the temporary directory
mediaMetadata, err := db.GetMediaMetadata(r.MediaMetadata.MediaID, r.MediaMetadata.Origin)
if err == nil {
r.MediaMetadata = mediaMetadata
fileutils.RemoveDir(tmpDir, r.Logger)
return &util.JSONResponse{
Code: 200,
JSON: uploadResponse{
ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID),
},
}
} else if err != sql.ErrNoRows {
r.Logger.WithError(err).WithField("MediaID", r.MediaMetadata.MediaID).Warn("Failed to query database")
}
// TODO: generate thumbnails
if resErr := r.storeFileAndMetadata(tmpDir, cfg.AbsBasePath, db); resErr != nil {
return resErr
}
return nil
} }
// Validate validates the uploadRequest fields // Validate validates the uploadRequest fields
func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse { func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse {
if r.MediaMetadata.FileSizeBytes < 1 { if r.MediaMetadata.FileSizeBytes < 1 {
return &util.JSONResponse{ return &util.JSONResponse{
Code: 400, Code: 411,
JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."), JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."),
} }
} }
if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes { if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes {
return &util.JSONResponse{ return &util.JSONResponse{
Code: 400, Code: 413,
JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)), JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
} }
} }
@ -149,3 +210,38 @@ func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSO
} }
return nil return nil
} }
// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
// See getPathFromMediaMetadata in fileutils for details of the final path.
// The order of operations is important as it avoids metadata entering the database before the file
// is ready, and if we fail to move the file, it never gets added to the database.
// Returns a util.JSONResponse error and cleans up directories in case of error.
func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath types.Path, db *storage.Database) *util.JSONResponse {
finalPath, duplicate, err := fileutils.MoveFileWithHashCheck(tmpDir, r.MediaMetadata, absBasePath, r.Logger)
if err != nil {
r.Logger.WithError(err).Error("Failed to move file.")
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
}
if duplicate {
r.Logger.WithField("dst", finalPath).Info("File was stored previously - discarding duplicate")
}
if err = db.StoreMediaMetadata(r.MediaMetadata); err != nil {
r.Logger.WithError(err).Warn("Failed to store metadata")
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
if duplicate == false {
fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
}
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("Failed to upload"),
}
}
return nil
}