Reduce calls to git cat-file -s (#14682)
* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sizedrelease/v1.15
parent
7ba158183a
commit
ae7e6cd474
|
@ -10,12 +10,13 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/go-git/go-git/v5/plumbing/filemode"
|
"github.com/go-git/go-git/v5/plumbing/filemode"
|
||||||
"github.com/go-git/go-git/v5/plumbing/object"
|
"github.com/go-git/go-git/v5/plumbing/object"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseTreeEntries parses the output of a `git ls-tree` command.
|
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
|
||||||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
||||||
return parseTreeEntries(data, nil)
|
return parseTreeEntries(data, nil)
|
||||||
}
|
}
|
||||||
|
@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
||||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
entries := make([]*TreeEntry, 0, 10)
|
entries := make([]*TreeEntry, 0, 10)
|
||||||
for pos := 0; pos < len(data); {
|
for pos := 0; pos < len(data); {
|
||||||
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
|
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
|
||||||
entry := new(TreeEntry)
|
entry := new(TreeEntry)
|
||||||
entry.gogitTreeEntry = &object.TreeEntry{}
|
entry.gogitTreeEntry = &object.TreeEntry{}
|
||||||
entry.ptree = ptree
|
entry.ptree = ptree
|
||||||
|
@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
entry.gogitTreeEntry.Hash = id
|
entry.gogitTreeEntry.Hash = id
|
||||||
pos += 41 // skip over sha and trailing space
|
pos += 41 // skip over sha and trailing space
|
||||||
|
|
||||||
end := pos + bytes.IndexByte(data[pos:], '\n')
|
end := pos + bytes.IndexByte(data[pos:], '\t')
|
||||||
|
if end < pos {
|
||||||
|
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
|
||||||
|
}
|
||||||
|
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
|
||||||
|
entry.sized = true
|
||||||
|
|
||||||
|
pos = end + 1
|
||||||
|
|
||||||
|
end = pos + bytes.IndexByte(data[pos:], '\n')
|
||||||
if end < pos {
|
if end < pos {
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) {
|
||||||
Expected: []*TreeEntry{},
|
Expected: []*TreeEntry{},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n",
|
Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 1022\texample/file2.txt\n",
|
||||||
Expected: []*TreeEntry{
|
Expected: []*TreeEntry{
|
||||||
{
|
{
|
||||||
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
|
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
|
||||||
|
@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) {
|
||||||
Name: "example/file2.txt",
|
Name: "example/file2.txt",
|
||||||
Mode: filemode.Regular,
|
Mode: filemode.Regular,
|
||||||
},
|
},
|
||||||
|
size: 1022,
|
||||||
|
sized: true,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" +
|
Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c 234131\t\"example/\\n.txt\"\n" +
|
||||||
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n",
|
"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8 -\texample\n",
|
||||||
Expected: []*TreeEntry{
|
Expected: []*TreeEntry{
|
||||||
{
|
{
|
||||||
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
|
ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"),
|
||||||
|
@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) {
|
||||||
Name: "example/\n.txt",
|
Name: "example/\n.txt",
|
||||||
Mode: filemode.Symlink,
|
Mode: filemode.Symlink,
|
||||||
},
|
},
|
||||||
|
size: 234131,
|
||||||
|
sized: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
|
ID: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
|
||||||
|
sized: true,
|
||||||
gogitTreeEntry: &object.TreeEntry{
|
gogitTreeEntry: &object.TreeEntry{
|
||||||
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
|
Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"),
|
||||||
Name: "example",
|
Name: "example",
|
||||||
|
|
|
@ -10,9 +10,10 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strconv"
|
"strconv"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ParseTreeEntries parses the output of a `git ls-tree` command.
|
// ParseTreeEntries parses the output of a `git ls-tree -l` command.
|
||||||
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
||||||
return parseTreeEntries(data, nil)
|
return parseTreeEntries(data, nil)
|
||||||
}
|
}
|
||||||
|
@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
||||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
entries := make([]*TreeEntry, 0, 10)
|
entries := make([]*TreeEntry, 0, 10)
|
||||||
for pos := 0; pos < len(data); {
|
for pos := 0; pos < len(data); {
|
||||||
// expect line to be of the form "<mode> <type> <sha>\t<filename>"
|
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
|
||||||
entry := new(TreeEntry)
|
entry := new(TreeEntry)
|
||||||
entry.ptree = ptree
|
entry.ptree = ptree
|
||||||
if pos+6 > len(data) {
|
if pos+6 > len(data) {
|
||||||
|
@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||||
entry.ID = id
|
entry.ID = id
|
||||||
pos += 41 // skip over sha and trailing space
|
pos += 41 // skip over sha and trailing space
|
||||||
|
|
||||||
end := pos + bytes.IndexByte(data[pos:], '\n')
|
end := pos + bytes.IndexByte(data[pos:], '\t')
|
||||||
|
if end < pos {
|
||||||
|
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
|
||||||
|
}
|
||||||
|
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
|
||||||
|
entry.sized = true
|
||||||
|
|
||||||
|
pos = end + 1
|
||||||
|
|
||||||
|
end = pos + bytes.IndexByte(data[pos:], '\n')
|
||||||
if end < pos {
|
if end < pos {
|
||||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
// Copyright 2021 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
// +build !gogit
|
||||||
|
|
||||||
|
package git
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseTreeEntries(t *testing.T) {
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
Input string
|
||||||
|
Expected []*TreeEntry
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af 8218 README.md
|
||||||
|
100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35 4681 README_ZH.md
|
||||||
|
100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8 429 SECURITY.md
|
||||||
|
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d - assets
|
||||||
|
`,
|
||||||
|
Expected: []*TreeEntry{
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
|
||||||
|
name: "README.md",
|
||||||
|
entryMode: EntryModeBlob,
|
||||||
|
size: 8218,
|
||||||
|
sized: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"),
|
||||||
|
name: "README_ZH.md",
|
||||||
|
entryMode: EntryModeBlob,
|
||||||
|
size: 4681,
|
||||||
|
sized: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"),
|
||||||
|
name: "SECURITY.md",
|
||||||
|
entryMode: EntryModeBlob,
|
||||||
|
size: 429,
|
||||||
|
sized: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
|
||||||
|
name: "assets",
|
||||||
|
entryMode: EntryModeTree,
|
||||||
|
sized: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, testCase := range testCases {
|
||||||
|
entries, err := ParseTreeEntries([]byte(testCase.Input))
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, len(testCase.Expected), len(entries))
|
||||||
|
for i, entry := range entries {
|
||||||
|
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
|
||||||
|
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
|
||||||
|
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
|
||||||
|
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
|
||||||
|
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob {
|
||||||
ID: te.ID,
|
ID: te.ID,
|
||||||
repoPath: te.ptree.repo.Path,
|
repoPath: te.ptree.repo.Path,
|
||||||
name: te.Name(),
|
name: te.Name(),
|
||||||
|
size: te.size,
|
||||||
|
gotSize: te.sized,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) {
|
||||||
return t.entries, nil
|
return t.entries, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path)
|
stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
|
if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") {
|
||||||
return nil, ErrNotExist{
|
return nil, ErrNotExist{
|
||||||
|
@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {
|
||||||
if t.entriesRecursiveParsed {
|
if t.entriesRecursiveParsed {
|
||||||
return t.entriesRecursive, nil
|
return t.entriesRecursive, nil
|
||||||
}
|
}
|
||||||
stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
|
stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size := update.Size
|
||||||
|
|
||||||
|
if !update.Sized {
|
||||||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
|
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
|
||||||
RunInDir(repo.RepoPath())
|
RunInDir(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
|
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
||||||
return fmt.Errorf("Misformatted git cat-file output: %v", err)
|
return fmt.Errorf("Misformatted git cat-file output: %v", err)
|
||||||
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if size > setting.Indexer.MaxIndexerFileSize {
|
||||||
return b.addDelete(update.Filename, repo, batch)
|
return b.addDelete(update.Filename, repo, batch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size := update.Size
|
||||||
|
|
||||||
|
if !update.Sized {
|
||||||
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
|
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
|
||||||
RunInDir(repo.RepoPath())
|
RunInDir(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil {
|
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
|
||||||
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
|
return nil, fmt.Errorf("Misformatted git cat-file output: %v", err)
|
||||||
} else if int64(size) > setting.Indexer.MaxIndexerFileSize {
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if size > setting.Indexer.MaxIndexerFileSize {
|
||||||
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
|
return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,8 @@ import (
|
||||||
type fileUpdate struct {
|
type fileUpdate struct {
|
||||||
Filename string
|
Filename string
|
||||||
BlobSha string
|
BlobSha string
|
||||||
|
Size int64
|
||||||
|
Sized bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// repoChanges changes (file additions/updates/removals) to a repo
|
// repoChanges changes (file additions/updates/removals) to a repo
|
||||||
|
@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
|
||||||
updates[idxCount] = fileUpdate{
|
updates[idxCount] = fileUpdate{
|
||||||
Filename: entry.Name(),
|
Filename: entry.Name(),
|
||||||
BlobSha: entry.ID.String(),
|
BlobSha: entry.ID.String(),
|
||||||
|
Size: entry.Size(),
|
||||||
|
Sized: true,
|
||||||
}
|
}
|
||||||
idxCount++
|
idxCount++
|
||||||
}
|
}
|
||||||
|
@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) {
|
||||||
// genesisChanges get changes to add repo to the indexer for the first time
|
// genesisChanges get changes to add repo to the indexer for the first time
|
||||||
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
|
func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) {
|
||||||
var changes repoChanges
|
var changes repoChanges
|
||||||
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision).
|
stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision).
|
||||||
RunInDirBytes(repo.RepoPath())
|
RunInDirBytes(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--")
|
cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--")
|
||||||
cmd.AddArguments(updatedFilenames...)
|
cmd.AddArguments(updatedFilenames...)
|
||||||
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
|
lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
Loading…
Reference in New Issue