diff --git a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go index 1305f433..b4ad7c9d 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/fileutils/fileutils.go @@ -30,51 +30,7 @@ import ( "github.com/matrix-org/dendrite/mediaapi/types" ) -// RemoveDir removes a directory and logs a warning in case of errors -func RemoveDir(dir types.Path, logger *log.Entry) { - dirErr := os.RemoveAll(string(dir)) - if dirErr != nil { - logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") - } -} - -// createTempDir creates a tmp/ directory within baseDirectory and returns its path -func createTempDir(baseDirectory types.Path) (types.Path, error) { - baseTmpDir := path.Join(string(baseDirectory), "tmp") - if err := os.MkdirAll(baseTmpDir, 0770); err != nil { - return "", fmt.Errorf("Failed to create base temp dir: %v", err) - } - tmpDir, err := ioutil.TempDir(baseTmpDir, "") - if err != nil { - return "", fmt.Errorf("Failed to create temp dir: %v", err) - } - return types.Path(tmpDir), nil -} - -// createFileWriter creates a buffered file writer with a new file at directory/filename -// Returns the file handle as it needs to be closed when writing is complete -func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { - filePath := path.Join(string(directory), string(filename)) - file, err := os.Create(filePath) - if err != nil { - return nil, nil, fmt.Errorf("Failed to create file: %v", err) - } - - return bufio.NewWriter(file), file, nil -} - -func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { - tmpDir, err := createTempDir(absBasePath) - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) - } - writer, tmpFile, err := createFileWriter(tmpDir, "content") - if err != nil { - return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) - } - return writer, tmpFile, tmpDir, nil -} - +// FIXME: make into error types var ( // ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size ErrFileIsTooLarge = fmt.Errorf("file is too large") @@ -84,31 +40,6 @@ var ( errWrite = fmt.Errorf("failed to write file to disk") ) -// WriteTempFile writes to a new temporary file -func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { - tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) - if err != nil { - return "", -1, "", err - } - defer tmpFile.Close() - - limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) - // Hash the file data. The hash will be returned. The hash is useful as a - // method of deduplicating files to save storage, as well as a way to conduct - // integrity checks on the file data in the repository. - hasher := sha256.New() - teeReader := io.TeeReader(limitedReader, hasher) - bytesWritten, err := io.Copy(tmpFileWriter, teeReader) - if err != nil && err != io.EOF { - return "", -1, "", err - } - - tmpFileWriter.Flush() - - hash := hasher.Sum(nil) - return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil -} - // GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash // If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories // for more manageable browsing and use the remainder as the file name. @@ -156,6 +87,71 @@ func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path) return filePath, nil } +// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata +// The final path is based on the hash of the file. +// If the final path exists and the file size matches, the file does not need to be moved. +// In error cases where the file is not a duplicate, the caller may decide to remove the final path. +// Returns the final path of the file, whether it is a duplicate and an error. +func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) { + // Note: in all error and success cases, we need to remove the temporary directory + defer RemoveDir(tmpDir, logger) + duplicate := false + finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) + if err != nil { + return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + } + + var stat os.FileInfo + if stat, err = os.Stat(finalPath); os.IsExist(err) { + duplicate = true + if stat.Size() == int64(mediaMetadata.FileSizeBytes) { + return types.Path(finalPath), duplicate, nil + } + return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) + } + err = moveFile( + types.Path(path.Join(string(tmpDir), "content")), + types.Path(finalPath), + ) + if err != nil { + return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) + } + return types.Path(finalPath), duplicate, nil +} + +// RemoveDir removes a directory and logs a warning in case of errors +func RemoveDir(dir types.Path, logger *log.Entry) { + dirErr := os.RemoveAll(string(dir)) + if dirErr != nil { + logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory") + } +} + +// WriteTempFile writes to a new temporary file +func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) { + tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath) + if err != nil { + return "", -1, "", err + } + defer tmpFile.Close() + + limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes)) + // Hash the file data. The hash will be returned. The hash is useful as a + // method of deduplicating files to save storage, as well as a way to conduct + // integrity checks on the file data in the repository. + hasher := sha256.New() + teeReader := io.TeeReader(limitedReader, hasher) + bytesWritten, err := io.Copy(tmpFileWriter, teeReader) + if err != nil && err != io.EOF { + return "", -1, "", err + } + + tmpFileWriter.Flush() + + hash := hasher.Sum(nil) + return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil +} + // moveFile attempts to move the file src to dst func moveFile(src types.Path, dst types.Path) error { dstDir := path.Dir(string(dst)) @@ -171,37 +167,40 @@ func moveFile(src types.Path, dst types.Path) error { return nil } -// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata -// Check if destination file exists. As the destination is based on a hash of the file data, -// if it exists and the file size does not match then there is a hash collision for two different files. If -// it exists and the file size matches, it is believable that it is the same file and we can just -// discard the temporary file. -func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) { - duplicate := false - finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath) +func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) { + tmpDir, err := createTempDir(absBasePath) if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err) + return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err) + } + writer, tmpFile, err := createFileWriter(tmpDir, "content") + if err != nil { + return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err) + } + return writer, tmpFile, tmpDir, nil +} + +// createTempDir creates a tmp/ directory within baseDirectory and returns its path +func createTempDir(baseDirectory types.Path) (types.Path, error) { + baseTmpDir := path.Join(string(baseDirectory), "tmp") + if err := os.MkdirAll(baseTmpDir, 0770); err != nil { + return "", fmt.Errorf("Failed to create base temp dir: %v", err) + } + tmpDir, err := ioutil.TempDir(baseTmpDir, "") + if err != nil { + return "", fmt.Errorf("Failed to create temp dir: %v", err) + } + return types.Path(tmpDir), nil +} + +// createFileWriter creates a buffered file writer with a new file at directory/filename +// The caller should flush the writer before closing the file. +// Returns the file handle as it needs to be closed when writing is complete +func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) { + filePath := path.Join(string(directory), string(filename)) + file, err := os.Create(filePath) + if err != nil { + return nil, nil, fmt.Errorf("Failed to create file: %v", err) } - var stat os.FileInfo - if stat, err = os.Stat(finalPath); os.IsExist(err) { - duplicate = true - if stat.Size() == int64(mediaMetadata.FileSizeBytes) { - RemoveDir(tmpDir, logger) - return finalPath, duplicate, nil - } - // Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath) - } - err = moveFile( - types.Path(path.Join(string(tmpDir), "content")), - types.Path(finalPath), - ) - if err != nil { - RemoveDir(tmpDir, logger) - return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err) - } - return finalPath, duplicate, nil + return bufio.NewWriter(file), file, nil } diff --git a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go index 7117f246..bcaaa255 100644 --- a/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go +++ b/src/github.com/matrix-org/dendrite/mediaapi/writers/upload.go @@ -240,7 +240,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type // there is valid metadata in the database for that file. As such we only // remove the file if it is not a duplicate. if duplicate == false { - fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger) + fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger) } return &util.JSONResponse{ Code: 400,