Fix language stat calculation (#11692)
* Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code
This commit is contained in:
		
							parent
							
								
									ea4c139cd2
								
							
						
					
					
						commit
						9d652002c6
					
				
					 3 changed files with 29 additions and 41 deletions
				
			
		|  | @ -26,22 +26,6 @@ type LanguageStat struct { | |||
| 	CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"` | ||||
| } | ||||
| 
 | ||||
| // specialLanguages defines list of languages that are excluded from the calculation
 | ||||
| // unless they are the only language present in repository. Only languages which under
 | ||||
| // normal circumstances are not considered to be code should be listed here.
 | ||||
| var specialLanguages = map[string]struct{}{ | ||||
| 	"XML":      {}, | ||||
| 	"JSON":     {}, | ||||
| 	"TOML":     {}, | ||||
| 	"YAML":     {}, | ||||
| 	"INI":      {}, | ||||
| 	"SQL":      {}, | ||||
| 	"SVG":      {}, | ||||
| 	"Text":     {}, | ||||
| 	"Markdown": {}, | ||||
| 	"other":    {}, | ||||
| } | ||||
| 
 | ||||
| // LanguageStatList defines a list of language statistics
 | ||||
| type LanguageStatList []*LanguageStat | ||||
| 
 | ||||
|  | @ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 { | |||
| 	langPerc := make(map[string]float32) | ||||
| 	var otherPerc float32 = 100 | ||||
| 	var total int64 | ||||
| 	// Check that repository has at least one non-special language
 | ||||
| 	var skipSpecial bool | ||||
| 
 | ||||
| 	for _, stat := range stats { | ||||
| 		if _, ok := specialLanguages[stat.Language]; !ok { | ||||
| 			skipSpecial = true | ||||
| 			break | ||||
| 		} | ||||
| 	} | ||||
| 	for _, stat := range stats { | ||||
| 		// Exclude specific languages from percentage calculation
 | ||||
| 		if _, ok := specialLanguages[stat.Language]; ok && skipSpecial { | ||||
| 			continue | ||||
| 		} | ||||
| 		total += stat.Size | ||||
| 	} | ||||
| 	if total > 0 { | ||||
| 		for _, stat := range stats { | ||||
| 			// Exclude specific languages from percentage calculation
 | ||||
| 			if _, ok := specialLanguages[stat.Language]; ok && skipSpecial { | ||||
| 				continue | ||||
| 			} | ||||
| 			perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10) | ||||
| 			if perc <= 0.1 { | ||||
| 				continue | ||||
|  | @ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 { | |||
| 			langPerc[stat.Language] = perc | ||||
| 		} | ||||
| 		otherPerc = float32(math.Round(float64(otherPerc)*10) / 10) | ||||
| 	} else { | ||||
| 		otherPerc = 100 | ||||
| 	} | ||||
| 	if otherPerc > 0 { | ||||
| 		langPerc["other"] = otherPerc | ||||
|  |  | |||
|  | @ -19,6 +19,20 @@ import ( | |||
| 
 | ||||
| const fileSizeLimit int64 = 16 * 1024 * 1024 | ||||
| 
 | ||||
| // specialLanguages defines list of languages that are excluded from the calculation
 | ||||
| // unless they are the only language present in repository. Only languages which under
 | ||||
| // normal circumstances are not considered to be code should be listed here.
 | ||||
| var specialLanguages = []string{ | ||||
| 	"XML", | ||||
| 	"JSON", | ||||
| 	"TOML", | ||||
| 	"YAML", | ||||
| 	"INI", | ||||
| 	"SVG", | ||||
| 	"Text", | ||||
| 	"Markdown", | ||||
| } | ||||
| 
 | ||||
| // GetLanguageStats calculates language stats for git repository at specified commit
 | ||||
| func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) { | ||||
| 	r, err := git.PlainOpen(repo.Path) | ||||
|  | @ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
| 
 | ||||
| 	sizes := make(map[string]int64) | ||||
| 	err = tree.Files().ForEach(func(f *object.File) error { | ||||
| 		if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | ||||
| 		if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) || | ||||
| 			enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) { | ||||
| 			return nil | ||||
| 		} | ||||
|  | @ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
| 
 | ||||
| 		language := analyze.GetCodeLanguage(f.Name, content) | ||||
| 		if language == enry.OtherLanguage || language == "" { | ||||
| 			language = "other" | ||||
| 			return nil | ||||
| 		} | ||||
| 
 | ||||
| 		// group languages, such as Pug -> HTML; SCSS -> CSS
 | ||||
| 		group := enry.GetLanguageGroup(language) | ||||
| 		if group != "" { | ||||
| 			language = group | ||||
| 		} | ||||
| 
 | ||||
| 		sizes[language] += f.Size | ||||
|  | @ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err | |||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	if len(sizes) == 0 { | ||||
| 		sizes["other"] = 0 | ||||
| 	// filter special languages unless they are the only language
 | ||||
| 	if len(sizes) > 1 { | ||||
| 		for _, language := range specialLanguages { | ||||
| 			delete(sizes, language) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return sizes, nil | ||||
|  |  | |||
|  | @ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) { | |||
| 	assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha) | ||||
| 	langs, err := repo.GetTopLanguageStats(5) | ||||
| 	assert.NoError(t, err) | ||||
| 	assert.Len(t, langs, 1) | ||||
| 	assert.Equal(t, "other", langs[0].Language) | ||||
| 	assert.Equal(t, float32(100), langs[0].Percentage) | ||||
| 	assert.Empty(t, langs) | ||||
| } | ||||
|  |  | |||
		Loading…
	
		Reference in a new issue