Merge pull request #120 from matrix-org/rob/media-skeleton

Skeleton for the Media API
main
Robert Swain 2017-05-26 15:57:13 +02:00 committed by GitHub
commit e21cd5ae66
6 changed files with 504 additions and 0 deletions

View File

@ -0,0 +1,83 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"net/http"
"os"
"path/filepath"
"strconv"
"github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/routing"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
log "github.com/Sirupsen/logrus"
)
var (
bindAddr = os.Getenv("BIND_ADDRESS")
dataSource = os.Getenv("DATABASE")
logDir = os.Getenv("LOG_DIR")
serverName = os.Getenv("SERVER_NAME")
basePath = os.Getenv("BASE_PATH")
// Note: if the MAX_FILE_SIZE_BYTES is set to 0, it will be unlimited
maxFileSizeBytesString = os.Getenv("MAX_FILE_SIZE_BYTES")
)
func main() {
common.SetupLogging(logDir)
if bindAddr == "" {
log.Panic("No BIND_ADDRESS environment variable found.")
}
if basePath == "" {
log.Panic("No BASE_PATH environment variable found.")
}
absBasePath, err := filepath.Abs(basePath)
if err != nil {
log.WithError(err).WithField("BASE_PATH", basePath).Panic("BASE_PATH is invalid (must be able to make absolute)")
}
if serverName == "" {
serverName = "localhost"
}
maxFileSizeBytes, err := strconv.ParseInt(maxFileSizeBytesString, 10, 64)
if err != nil {
maxFileSizeBytes = 10 * 1024 * 1024
log.WithError(err).WithField("MAX_FILE_SIZE_BYTES", maxFileSizeBytesString).Warnf("Failed to parse MAX_FILE_SIZE_BYTES. Defaulting to %v bytes.", maxFileSizeBytes)
}
cfg := &config.MediaAPI{
ServerName: gomatrixserverlib.ServerName(serverName),
AbsBasePath: types.Path(absBasePath),
MaxFileSizeBytes: types.FileSizeBytes(maxFileSizeBytes),
DataSource: dataSource,
}
log.WithFields(log.Fields{
"BASE_PATH": absBasePath,
"BIND_ADDRESS": bindAddr,
"DATABASE": dataSource,
"LOG_DIR": logDir,
"MAX_FILE_SIZE_BYTES": maxFileSizeBytes,
"SERVER_NAME": serverName,
}).Info("Starting mediaapi")
routing.Setup(http.DefaultServeMux, http.DefaultClient, cfg)
log.Fatal(http.ListenAndServe(bindAddr, nil))
}

View File

@ -0,0 +1,34 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
)
// MediaAPI contains the config information necessary to spin up a mediaapi process.
type MediaAPI struct {
// The name of the server. This is usually the domain name, e.g 'matrix.org', 'localhost'.
ServerName gomatrixserverlib.ServerName `yaml:"server_name"`
// The absolute base path to where media files will be stored.
AbsBasePath types.Path `yaml:"abs_base_path"`
// The maximum file size in bytes that is allowed to be stored on this server.
// Note that remote files larger than this can still be proxied to a client, they will just not be cached.
// Note: if MaxFileSizeBytes is set to 0, the size is unlimited.
MaxFileSizeBytes types.FileSizeBytes `yaml:"max_file_size_bytes"`
// The postgres connection config for connecting to the database e.g a postgres:// URI
DataSource string `yaml:"database"`
}

View File

@ -0,0 +1,61 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package routing
import (
"net/http"
"sync"
"github.com/gorilla/mux"
"github.com/matrix-org/dendrite/common"
"github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/dendrite/mediaapi/writers"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
"github.com/prometheus/client_golang/prometheus"
)
const pathPrefixR0 = "/_matrix/media/v1"
// Setup registers HTTP handlers with the given ServeMux. It also supplies the given http.Client
// to clients which need to make outbound HTTP requests.
func Setup(servMux *http.ServeMux, httpClient *http.Client, cfg *config.MediaAPI) {
apiMux := mux.NewRouter()
r0mux := apiMux.PathPrefix(pathPrefixR0).Subrouter()
r0mux.Handle("/upload", common.MakeAPI("upload", func(req *http.Request) util.JSONResponse {
return writers.Upload(req, cfg)
}))
activeRemoteRequests := &types.ActiveRemoteRequests{
MXCToCond: map[string]*sync.Cond{},
}
r0mux.Handle("/download/{serverName}/{mediaId}",
prometheus.InstrumentHandler("download", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
req = util.RequestWithLogging(req)
// Set common headers returned regardless of the outcome of the request
util.SetCORSHeaders(w)
// Content-Type will be overridden in case of returning file data, else we respond with JSON-formatted errors
w.Header().Set("Content-Type", "application/json")
vars := mux.Vars(req)
writers.Download(w, req, gomatrixserverlib.ServerName(vars["serverName"]), types.MediaID(vars["mediaId"]), cfg, activeRemoteRequests)
})),
)
servMux.Handle("/metrics", prometheus.Handler())
servMux.Handle("/api/", http.StripPrefix("/api", apiMux))
}

View File

@ -0,0 +1,72 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package types
import (
"sync"
"github.com/matrix-org/gomatrixserverlib"
)
// ContentDisposition is an HTTP Content-Disposition header string
type ContentDisposition string
// FileSizeBytes is a file size in bytes
type FileSizeBytes int64
// ContentType is an HTTP Content-Type header string representing the MIME type of a request body
type ContentType string
// Filename is a string representing the name of a file
type Filename string
// Base64Hash is a base64 URLEncoding string representation of a SHA-256 hash sum
type Base64Hash string
// Path is an absolute or relative UNIX filesystem path
type Path string
// MediaID is a string representing the unique identifier for a file (could be a hash but does not have to be)
type MediaID string
// RequestMethod is an HTTP request method i.e. GET, POST, etc
type RequestMethod string
// MatrixUserID is a Matrix user ID string in the form @user:domain e.g. @alice:matrix.org
type MatrixUserID string
// UnixMs is the milliseconds since the Unix epoch
type UnixMs int64
// MediaMetadata is metadata associated with a media file
type MediaMetadata struct {
MediaID MediaID
Origin gomatrixserverlib.ServerName
ContentType ContentType
ContentDisposition ContentDisposition
FileSizeBytes FileSizeBytes
CreationTimestamp UnixMs
UploadName Filename
Base64Hash Base64Hash
UserID MatrixUserID
}
// ActiveRemoteRequests is a lockable map of media URIs requested from remote homeservers
// It is used for ensuring multiple requests for the same file do not clobber each other.
type ActiveRemoteRequests struct {
sync.Mutex
// The string key is an mxc:// URL
MXCToCond map[string]*sync.Cond
}

View File

@ -0,0 +1,103 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package writers
import (
"encoding/json"
"fmt"
"net/http"
"regexp"
log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/gomatrixserverlib"
"github.com/matrix-org/util"
)
const mediaIDCharacters = "A-Za-z0-9_=-"
// Note: unfortunately regex.MustCompile() cannot be assigned to a const
var mediaIDRegex = regexp.MustCompile("[" + mediaIDCharacters + "]+")
// downloadRequest metadata included in or derivable from an download request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#get-matrix-media-r0-download-servername-mediaid
type downloadRequest struct {
MediaMetadata *types.MediaMetadata
Logger *log.Entry
}
// Download implements /download
func Download(w http.ResponseWriter, req *http.Request, origin gomatrixserverlib.ServerName, mediaID types.MediaID, cfg *config.MediaAPI, activeRemoteRequests *types.ActiveRemoteRequests) {
r := &downloadRequest{
MediaMetadata: &types.MediaMetadata{
MediaID: mediaID,
Origin: origin,
},
Logger: util.GetLogger(req.Context()),
}
// request validation
if req.Method != "GET" {
r.jsonErrorResponse(w, util.JSONResponse{
Code: 405,
JSON: jsonerror.Unknown("request method must be GET"),
})
return
}
if resErr := r.Validate(); resErr != nil {
r.jsonErrorResponse(w, *resErr)
return
}
// doDownload
}
func (r *downloadRequest) jsonErrorResponse(w http.ResponseWriter, res util.JSONResponse) {
// Marshal JSON response into raw bytes to send as the HTTP body
resBytes, err := json.Marshal(res.JSON)
if err != nil {
r.Logger.WithError(err).Error("Failed to marshal JSONResponse")
// this should never fail to be marshalled so drop err to the floor
res = util.MessageResponse(500, "Internal Server Error")
resBytes, _ = json.Marshal(res.JSON)
}
// Set status code and write the body
w.WriteHeader(res.Code)
r.Logger.WithField("code", res.Code).Infof("Responding (%d bytes)", len(resBytes))
w.Write(resBytes)
}
// Validate validates the downloadRequest fields
func (r *downloadRequest) Validate() *util.JSONResponse {
if mediaIDRegex.MatchString(string(r.MediaMetadata.MediaID)) == false {
return &util.JSONResponse{
Code: 404,
JSON: jsonerror.NotFound(fmt.Sprintf("mediaId must be a non-empty string using only characters in %v", mediaIDCharacters)),
}
}
// Note: the origin will be validated either by comparison to the configured server name of this homeserver
// or by a DNS SRV record lookup when creating a request for remote files
if r.MediaMetadata.Origin == "" {
return &util.JSONResponse{
Code: 404,
JSON: jsonerror.NotFound("serverName must be a non-empty string"),
}
}
return nil
}

View File

@ -0,0 +1,151 @@
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package writers
import (
"fmt"
"net/http"
"net/url"
"strings"
log "github.com/Sirupsen/logrus"
"github.com/matrix-org/dendrite/clientapi/jsonerror"
"github.com/matrix-org/dendrite/mediaapi/config"
"github.com/matrix-org/dendrite/mediaapi/types"
"github.com/matrix-org/util"
)
// uploadRequest metadata included in or derivable from an upload request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
type uploadRequest struct {
MediaMetadata *types.MediaMetadata
Logger *log.Entry
}
// uploadResponse defines the format of the JSON response
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
type uploadResponse struct {
ContentURI string `json:"content_uri"`
}
// Upload implements /upload
// This endpoint involves uploading potentially significant amounts of data to the homeserver.
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period.
func Upload(req *http.Request, cfg *config.MediaAPI) util.JSONResponse {
r, resErr := parseAndValidateRequest(req, cfg)
if resErr != nil {
return *resErr
}
// doUpload
return util.JSONResponse{
Code: 200,
JSON: uploadResponse{
ContentURI: fmt.Sprintf("mxc://%s/%s", cfg.ServerName, r.MediaMetadata.MediaID),
},
}
}
// parseAndValidateRequest parses the incoming upload request to validate and extract
// all the metadata about the media being uploaded.
// Returns either an uploadRequest or an error formatted as a util.JSONResponse
func parseAndValidateRequest(req *http.Request, cfg *config.MediaAPI) (*uploadRequest, *util.JSONResponse) {
if req.Method != "POST" {
return nil, &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("HTTP request method must be POST."),
}
}
// authenticate user
r := &uploadRequest{
MediaMetadata: &types.MediaMetadata{
Origin: cfg.ServerName,
ContentDisposition: types.ContentDisposition(req.Header.Get("Content-Disposition")),
FileSizeBytes: types.FileSizeBytes(req.ContentLength),
ContentType: types.ContentType(req.Header.Get("Content-Type")),
UploadName: types.Filename(url.PathEscape(req.FormValue("filename"))),
},
Logger: util.GetLogger(req.Context()),
}
if resErr := r.Validate(cfg.MaxFileSizeBytes); resErr != nil {
return nil, resErr
}
if len(r.MediaMetadata.UploadName) > 0 {
r.MediaMetadata.ContentDisposition = types.ContentDisposition(
"inline; filename*=utf-8''" + string(r.MediaMetadata.UploadName),
)
}
return r, nil
}
// Validate validates the uploadRequest fields
func (r *uploadRequest) Validate(maxFileSizeBytes types.FileSizeBytes) *util.JSONResponse {
if r.MediaMetadata.FileSizeBytes < 1 {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("HTTP Content-Length request header must be greater than zero."),
}
}
if maxFileSizeBytes > 0 && r.MediaMetadata.FileSizeBytes > maxFileSizeBytes {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown(fmt.Sprintf("HTTP Content-Length is greater than the maximum allowed upload size (%v).", maxFileSizeBytes)),
}
}
// TODO: Check if the Content-Type is a valid type?
if r.MediaMetadata.ContentType == "" {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("HTTP Content-Type request header must be set."),
}
}
if r.MediaMetadata.UploadName[0] == '~' {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("File name must not begin with '~'."),
}
}
// TODO: Validate filename - what are the valid characters?
if r.MediaMetadata.UserID != "" {
// TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
// (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
// It should be a struct (with pointers into a single string to avoid copying) and
// we should update all refs to use UserID types rather than strings.
// https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
if len(r.MediaMetadata.UserID) == 0 || r.MediaMetadata.UserID[0] != '@' {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.Unknown("user id must start with '@'"),
}
}
parts := strings.SplitN(string(r.MediaMetadata.UserID[1:]), ":", 2)
if len(parts) != 2 {
return &util.JSONResponse{
Code: 400,
JSON: jsonerror.BadJSON("user id must be in the form @localpart:domain"),
}
}
}
return nil
}