From 731c10a41885300305bdd33b7ef0c833dc6731ec Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Fri, 26 May 2017 17:15:54 +0200 Subject: [PATCH] mediaapi/fileutils: Clean up Reorder functions to have public API functions in alphabetical order at the top, internal package functions at the bottom in call order. Use RawURLEncoding to avoid padding the hash with '='. Use stronger types for paths in public API. Simplify comments. --- .../dendrite/mediaapi/fileutils/fileutils.go | 199 +++++++++--------- .../dendrite/mediaapi/writers/upload.go | 2 +- 2 files changed, 100 insertions(+), 101 deletions(-) diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 1305f433..b4ad7c9d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -30,51 +30,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -// RemoveDir removes a directory and logs a warning in case of errors -func RemoveDir(dir types.Path, logger *log.Entry) { - dirErr := os.RemoveAll(string(dir)) - if dirErr != nil { - logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") - } -} - -// createTempDir creates a tmp/ directory within baseDirectory and returns its path -func createTempDir(baseDirectory types.Path) (types.Path, error) { - baseTmpDir := path.Join(string(baseDirectory), "tmp") - if err := os.MkdirAll(baseTmpDir, 0770); err != nil { - return "", fmt.Errorf("Failed to create base temp dir: %v", err) - } - tmpDir, err := ioutil.TempDir(baseTmpDir, "") - if err != nil { - return "", fmt.Errorf("Failed to create temp dir: %v", err) - } - return types.Path(tmpDir), nil -} - -// createFileWriter creates a buffered file writer with a new file at directory/filename -// Returns the file handle as it needs to be closed when writing is complete -func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { - filePath := path.Join(string(directory), string(filename)) - file, err := os.Create(filePath) - if err != nil { - return nil, nil, fmt.Errorf("Failed to create file: %v", err) - } - - return bufio.NewWriter(file), file, nil -} - -func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { - tmpDir, err := createTempDir(absBasePath) - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) - } - writer, tmpFile, err := createFileWriter(tmpDir, "content") - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) - } - return writer, tmpFile, tmpDir, nil -} - +// FIXME: make into error types var ( // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size ErrFileIsTooLarge = fmt.Errorf("file is too large") @@ -84,31 +40,6 @@ var ( errWrite = fmt.Errorf("failed to write file to disk") ) -// WriteTempFile writes to a new temporary file -func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { - tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) - if err != nil { - return "", -1, "", err - } - defer tmpFile.Close() - - limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) - // Hash the file data. The hash will be returned. The hash is useful as a - // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. - hasher := sha256.New() - teeReader := io.TeeReader(limitedReader, hasher) - bytesWritten, err := io.Copy(tmpFileWriter, teeReader) - if err != nil && err != io.EOF { - return "", -1, "", err - } - - tmpFileWriter.Flush() - - hash := hasher.Sum(nil) - return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil -} - // GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. @@ -156,6 +87,71 @@ func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) return filePath, nil } +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata +// The final path is based on the hash of the file. +// If the final path exists and the file size matches, the file does not need to be moved. +// In error cases where the file is not a duplicate, the caller may decide to remove the final path. +// Returns the final path of the file, whether it is a duplicate and an error. +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) { + // Note: in all error and success cases, we need to remove the temporary directory + defer RemoveDir(tmpDir, logger) + duplicate := false + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) + if err != nil { + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + if stat, err = os.Stat(finalPath); os.IsExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { + return types.Path(finalPath), duplicate, nil + } + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return types.Path(finalPath), duplicate, nil +} + +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + +// WriteTempFile writes to a new temporary file +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, "", err + } + defer tmpFile.Close() + + limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) + // Hash the file data. The hash will be returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hasher := sha256.New() + teeReader := io.TeeReader(limitedReader, hasher) + bytesWritten, err := io.Copy(tmpFileWriter, teeReader) + if err != nil && err != io.EOF { + return "", -1, "", err + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil +} + // moveFile attempts to move the file src to dst func moveFile(src types.Path, dst types.Path) error { dstDir := path.Dir(string(dst)) @@ -171,37 +167,40 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata -// Check if destination file exists. As the destination is based on a hash of the file data, -// if it exists and the file size does not match then there is a hash collision for two different files. If -// it exists and the file size matches, it is believable that it is the same file and we can just -// discard the temporary file. -func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { - duplicate := false - finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { + tmpDir, err := createTempDir(absBasePath) if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) + } + writer, tmpFile, err := createFileWriter(tmpDir, "content") + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) + } + return writer, tmpFile, tmpDir, nil +} + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := path.Join(string(baseDirectory), "tmp") + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { + return "", fmt.Errorf("Failed to create base temp dir: %v", err) + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + return "", fmt.Errorf("Failed to create temp dir: %v", err) + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// The caller should flush the writer before closing the file. +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { + filePath := path.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("Failed to create file: %v", err) } - var stat os.FileInfo - if stat, err = os.Stat(finalPath); os.IsExist(err) { - duplicate = true - if stat.Size() == int64(mediaMetadata.FileSizeBytes) { - RemoveDir(tmpDir, logger) - return finalPath, duplicate, nil - } - // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) - } - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) - } - return finalPath, duplicate, nil + return bufio.NewWriter(file), file, nil } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 7117f246..bcaaa255 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -240,7 +240,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger) + fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger) } return &util.JSONResponse{ Code: 400,