mediaapi/fileutils: Clean up
Reorder functions to have public API functions in alphabetical order at the top, internal package functions at the bottom in call order. Use RawURLEncoding to avoid padding the hash with '='. Use stronger types for paths in public API. Simplify comments.main
parent
05e88d81cb
commit
731c10a418
|
@ -30,51 +30,7 @@ import (
|
|||
"github.com/matrix-org/dendrite/mediaapi/types"
|
||||
)
|
||||
|
||||
// RemoveDir removes a directory and logs a warning in case of errors
|
||||
func RemoveDir(dir types.Path, logger *log.Entry) {
|
||||
dirErr := os.RemoveAll(string(dir))
|
||||
if dirErr != nil {
|
||||
logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory")
|
||||
}
|
||||
}
|
||||
|
||||
// createTempDir creates a tmp/<random string> directory within baseDirectory and returns its path
|
||||
func createTempDir(baseDirectory types.Path) (types.Path, error) {
|
||||
baseTmpDir := path.Join(string(baseDirectory), "tmp")
|
||||
if err := os.MkdirAll(baseTmpDir, 0770); err != nil {
|
||||
return "", fmt.Errorf("Failed to create base temp dir: %v", err)
|
||||
}
|
||||
tmpDir, err := ioutil.TempDir(baseTmpDir, "")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
return types.Path(tmpDir), nil
|
||||
}
|
||||
|
||||
// createFileWriter creates a buffered file writer with a new file at directory/filename
|
||||
// Returns the file handle as it needs to be closed when writing is complete
|
||||
func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) {
|
||||
filePath := path.Join(string(directory), string(filename))
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Failed to create file: %v", err)
|
||||
}
|
||||
|
||||
return bufio.NewWriter(file), file, nil
|
||||
}
|
||||
|
||||
func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) {
|
||||
tmpDir, err := createTempDir(absBasePath)
|
||||
if err != nil {
|
||||
return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err)
|
||||
}
|
||||
writer, tmpFile, err := createFileWriter(tmpDir, "content")
|
||||
if err != nil {
|
||||
return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err)
|
||||
}
|
||||
return writer, tmpFile, tmpDir, nil
|
||||
}
|
||||
|
||||
// FIXME: make into error types
|
||||
var (
|
||||
// ErrFileIsTooLarge indicates that the uploaded file is larger than the configured maximum file size
|
||||
ErrFileIsTooLarge = fmt.Errorf("file is too large")
|
||||
|
@ -84,31 +40,6 @@ var (
|
|||
errWrite = fmt.Errorf("failed to write file to disk")
|
||||
)
|
||||
|
||||
// WriteTempFile writes to a new temporary file
|
||||
func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) {
|
||||
tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath)
|
||||
if err != nil {
|
||||
return "", -1, "", err
|
||||
}
|
||||
defer tmpFile.Close()
|
||||
|
||||
limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes))
|
||||
// Hash the file data. The hash will be returned. The hash is useful as a
|
||||
// method of deduplicating files to save storage, as well as a way to conduct
|
||||
// integrity checks on the file data in the repository.
|
||||
hasher := sha256.New()
|
||||
teeReader := io.TeeReader(limitedReader, hasher)
|
||||
bytesWritten, err := io.Copy(tmpFileWriter, teeReader)
|
||||
if err != nil && err != io.EOF {
|
||||
return "", -1, "", err
|
||||
}
|
||||
|
||||
tmpFileWriter.Flush()
|
||||
|
||||
hash := hasher.Sum(nil)
|
||||
return types.Base64Hash(base64.URLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil
|
||||
}
|
||||
|
||||
// GetPathFromBase64Hash evaluates the path to a media file from its Base64Hash
|
||||
// If the Base64Hash is long enough, we split it into pieces, creating up to 2 subdirectories
|
||||
// for more manageable browsing and use the remainder as the file name.
|
||||
|
@ -156,6 +87,71 @@ func GetPathFromBase64Hash(base64Hash types.Base64Hash, absBasePath types.Path)
|
|||
return filePath, nil
|
||||
}
|
||||
|
||||
// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its final path based on metadata
|
||||
// The final path is based on the hash of the file.
|
||||
// If the final path exists and the file size matches, the file does not need to be moved.
|
||||
// In error cases where the file is not a duplicate, the caller may decide to remove the final path.
|
||||
// Returns the final path of the file, whether it is a duplicate and an error.
|
||||
func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (types.Path, bool, error) {
|
||||
// Note: in all error and success cases, we need to remove the temporary directory
|
||||
defer RemoveDir(tmpDir, logger)
|
||||
duplicate := false
|
||||
finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath)
|
||||
if err != nil {
|
||||
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
|
||||
}
|
||||
|
||||
var stat os.FileInfo
|
||||
if stat, err = os.Stat(finalPath); os.IsExist(err) {
|
||||
duplicate = true
|
||||
if stat.Size() == int64(mediaMetadata.FileSizeBytes) {
|
||||
return types.Path(finalPath), duplicate, nil
|
||||
}
|
||||
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
|
||||
}
|
||||
err = moveFile(
|
||||
types.Path(path.Join(string(tmpDir), "content")),
|
||||
types.Path(finalPath),
|
||||
)
|
||||
if err != nil {
|
||||
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
|
||||
}
|
||||
return types.Path(finalPath), duplicate, nil
|
||||
}
|
||||
|
||||
// RemoveDir removes a directory and logs a warning in case of errors
|
||||
func RemoveDir(dir types.Path, logger *log.Entry) {
|
||||
dirErr := os.RemoveAll(string(dir))
|
||||
if dirErr != nil {
|
||||
logger.WithError(dirErr).WithField("dir", dir).Warn("Failed to remove directory")
|
||||
}
|
||||
}
|
||||
|
||||
// WriteTempFile writes to a new temporary file
|
||||
func WriteTempFile(reqReader io.Reader, maxFileSizeBytes types.FileSizeBytes, absBasePath types.Path) (types.Base64Hash, types.FileSizeBytes, types.Path, error) {
|
||||
tmpFileWriter, tmpFile, tmpDir, err := createTempFileWriter(absBasePath)
|
||||
if err != nil {
|
||||
return "", -1, "", err
|
||||
}
|
||||
defer tmpFile.Close()
|
||||
|
||||
limitedReader := io.LimitReader(reqReader, int64(maxFileSizeBytes))
|
||||
// Hash the file data. The hash will be returned. The hash is useful as a
|
||||
// method of deduplicating files to save storage, as well as a way to conduct
|
||||
// integrity checks on the file data in the repository.
|
||||
hasher := sha256.New()
|
||||
teeReader := io.TeeReader(limitedReader, hasher)
|
||||
bytesWritten, err := io.Copy(tmpFileWriter, teeReader)
|
||||
if err != nil && err != io.EOF {
|
||||
return "", -1, "", err
|
||||
}
|
||||
|
||||
tmpFileWriter.Flush()
|
||||
|
||||
hash := hasher.Sum(nil)
|
||||
return types.Base64Hash(base64.RawURLEncoding.EncodeToString(hash[:])), types.FileSizeBytes(bytesWritten), tmpDir, nil
|
||||
}
|
||||
|
||||
// moveFile attempts to move the file src to dst
|
||||
func moveFile(src types.Path, dst types.Path) error {
|
||||
dstDir := path.Dir(string(dst))
|
||||
|
@ -171,37 +167,40 @@ func moveFile(src types.Path, dst types.Path) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// MoveFileWithHashCheck checks for hash collisions when moving a temporary file to its destination based on metadata
|
||||
// Check if destination file exists. As the destination is based on a hash of the file data,
|
||||
// if it exists and the file size does not match then there is a hash collision for two different files. If
|
||||
// it exists and the file size matches, it is believable that it is the same file and we can just
|
||||
// discard the temporary file.
|
||||
func MoveFileWithHashCheck(tmpDir types.Path, mediaMetadata *types.MediaMetadata, absBasePath types.Path, logger *log.Entry) (string, bool, error) {
|
||||
duplicate := false
|
||||
finalPath, err := GetPathFromBase64Hash(mediaMetadata.Base64Hash, absBasePath)
|
||||
func createTempFileWriter(absBasePath types.Path) (*bufio.Writer, *os.File, types.Path, error) {
|
||||
tmpDir, err := createTempDir(absBasePath)
|
||||
if err != nil {
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("failed to get file path from metadata: %q", err)
|
||||
return nil, nil, "", fmt.Errorf("Failed to create temp dir: %q", err)
|
||||
}
|
||||
writer, tmpFile, err := createFileWriter(tmpDir, "content")
|
||||
if err != nil {
|
||||
return nil, nil, "", fmt.Errorf("Failed to create file writer: %q", err)
|
||||
}
|
||||
return writer, tmpFile, tmpDir, nil
|
||||
}
|
||||
|
||||
// createTempDir creates a tmp/<random string> directory within baseDirectory and returns its path
|
||||
func createTempDir(baseDirectory types.Path) (types.Path, error) {
|
||||
baseTmpDir := path.Join(string(baseDirectory), "tmp")
|
||||
if err := os.MkdirAll(baseTmpDir, 0770); err != nil {
|
||||
return "", fmt.Errorf("Failed to create base temp dir: %v", err)
|
||||
}
|
||||
tmpDir, err := ioutil.TempDir(baseTmpDir, "")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Failed to create temp dir: %v", err)
|
||||
}
|
||||
return types.Path(tmpDir), nil
|
||||
}
|
||||
|
||||
// createFileWriter creates a buffered file writer with a new file at directory/filename
|
||||
// The caller should flush the writer before closing the file.
|
||||
// Returns the file handle as it needs to be closed when writing is complete
|
||||
func createFileWriter(directory types.Path, filename types.Filename) (*bufio.Writer, *os.File, error) {
|
||||
filePath := path.Join(string(directory), string(filename))
|
||||
file, err := os.Create(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("Failed to create file: %v", err)
|
||||
}
|
||||
|
||||
var stat os.FileInfo
|
||||
if stat, err = os.Stat(finalPath); os.IsExist(err) {
|
||||
duplicate = true
|
||||
if stat.Size() == int64(mediaMetadata.FileSizeBytes) {
|
||||
RemoveDir(tmpDir, logger)
|
||||
return finalPath, duplicate, nil
|
||||
}
|
||||
// Remove the tmpDir as we anyway cannot cache the file on disk due to the hash collision
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("downloaded file with hash collision but different file size (%v)", finalPath)
|
||||
}
|
||||
err = moveFile(
|
||||
types.Path(path.Join(string(tmpDir), "content")),
|
||||
types.Path(finalPath),
|
||||
)
|
||||
if err != nil {
|
||||
RemoveDir(tmpDir, logger)
|
||||
return "", duplicate, fmt.Errorf("failed to move file to final destination (%v): %q", finalPath, err)
|
||||
}
|
||||
return finalPath, duplicate, nil
|
||||
return bufio.NewWriter(file), file, nil
|
||||
}
|
||||
|
|
|
@ -240,7 +240,7 @@ func (r *uploadRequest) storeFileAndMetadata(tmpDir types.Path, absBasePath type
|
|||
// there is valid metadata in the database for that file. As such we only
|
||||
// remove the file if it is not a duplicate.
|
||||
if duplicate == false {
|
||||
fileutils.RemoveDir(types.Path(path.Dir(finalPath)), r.Logger)
|
||||
fileutils.RemoveDir(types.Path(path.Dir(string(finalPath))), r.Logger)
|
||||
}
|
||||
return &util.JSONResponse{
|
||||
Code: 400,
|
||||
|
|
Loading…
Reference in New Issue