Update emoji dataset with skin tone variants (#11678)
* Update emoji dataset with skin tone variants Since the format of emoji that support skin tone modifiers is predictable we can add different variants into our dataset when generating it so that we can match and properly style most skin tone variants of emoji. No real code change here other than what generates the dataset and the data itself. * use escape unicode sequence in map Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
		
							parent
							
								
									14ca111f33
								
							
						
					
					
						commit
						3af51f1ab7
					
				
					 4 changed files with 3155 additions and 1742 deletions
				
			
		
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										63
									
								
								build/generate-emoji.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										63
									
								
								build/generate-emoji.go
									
									
									
									
										vendored
									
									
								
							|  | @ -19,6 +19,7 @@ import ( | |||
| 	"sort" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
|  | @ -39,6 +40,7 @@ type Emoji struct { | |||
| 	Description    string   `json:"description,omitempty"` | ||||
| 	Aliases        []string `json:"aliases"` | ||||
| 	UnicodeVersion string   `json:"unicode_version,omitempty"` | ||||
| 	SkinTones      bool     `json:"skin_tones,omitempty"` | ||||
| } | ||||
| 
 | ||||
| // Don't include some fields in JSON
 | ||||
|  | @ -47,6 +49,7 @@ func (e Emoji) MarshalJSON() ([]byte, error) { | |||
| 	x := emoji(e) | ||||
| 	x.UnicodeVersion = "" | ||||
| 	x.Description = "" | ||||
| 	x.SkinTones = false | ||||
| 	return json.Marshal(x) | ||||
| } | ||||
| 
 | ||||
|  | @ -75,6 +78,7 @@ var replacer = strings.NewReplacer( | |||
| 	", Description:", ", ", | ||||
| 	", Aliases:", ", ", | ||||
| 	", UnicodeVersion:", ", ", | ||||
| 	", SkinTones:", ", ", | ||||
| ) | ||||
| 
 | ||||
| var emojiRE = regexp.MustCompile(`\{Emoji:"([^"]*)"`) | ||||
|  | @ -102,18 +106,20 @@ func generate() ([]byte, error) { | |||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	var re = regexp.MustCompile(`keycap|registered|copyright`) | ||||
| 	tmp := data[:0] | ||||
| 	var skinTones = make(map[string]string) | ||||
| 
 | ||||
| 	skinTones["\U0001f3fb"] = "Light Skin Tone" | ||||
| 	skinTones["\U0001f3fc"] = "Medium-Light Skin Tone" | ||||
| 	skinTones["\U0001f3fd"] = "Medium Skin Tone" | ||||
| 	skinTones["\U0001f3fe"] = "Medium-Dark Skin Tone" | ||||
| 	skinTones["\U0001f3ff"] = "Dark Skin Tone" | ||||
| 
 | ||||
| 	var tmp Gemoji | ||||
| 
 | ||||
| 	//filter out emoji that require greater than max unicode version
 | ||||
| 	for i := range data { | ||||
| 		val, _ := strconv.ParseFloat(data[i].UnicodeVersion, 64) | ||||
| 		if int(val) <= maxUnicodeVersion { | ||||
| 			// remove these keycaps for now they really complicate matching since
 | ||||
| 			// they include normal letters in them
 | ||||
| 			if re.MatchString(data[i].Description) { | ||||
| 				continue | ||||
| 			} | ||||
| 			tmp = append(tmp, data[i]) | ||||
| 		} | ||||
| 	} | ||||
|  | @ -123,7 +129,6 @@ func generate() ([]byte, error) { | |||
| 		return data[i].Aliases[0] < data[j].Aliases[0] | ||||
| 	}) | ||||
| 
 | ||||
| 	aliasPairs := make([]string, 0) | ||||
| 	aliasMap := make(map[string]int, len(data)) | ||||
| 
 | ||||
| 	for i, e := range data { | ||||
|  | @ -135,7 +140,6 @@ func generate() ([]byte, error) { | |||
| 				continue | ||||
| 			} | ||||
| 			aliasMap[a] = i | ||||
| 			aliasPairs = append(aliasPairs, ":"+a+":", e.Emoji) | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | @ -149,6 +153,43 @@ func generate() ([]byte, error) { | |||
| 		data[i].Aliases = append(data[i].Aliases, "laugh") | ||||
| 	} | ||||
| 
 | ||||
| 	// write a JSON file to use with tribute (write before adding skin tones since we can't support them there yet)
 | ||||
| 	file, _ := json.Marshal(data) | ||||
| 	_ = ioutil.WriteFile("assets/emoji.json", file, 0644) | ||||
| 
 | ||||
| 	// Add skin tones to emoji that support it
 | ||||
| 	var ( | ||||
| 		s              []string | ||||
| 		newEmoji       string | ||||
| 		newDescription string | ||||
| 		newData        Emoji | ||||
| 	) | ||||
| 
 | ||||
| 	for i := range data { | ||||
| 		if data[i].SkinTones { | ||||
| 			for k, v := range skinTones { | ||||
| 				s = strings.Split(data[i].Emoji, "") | ||||
| 
 | ||||
| 				if utf8.RuneCountInString(data[i].Emoji) == 1 { | ||||
| 					s = append(s, k) | ||||
| 				} else { | ||||
| 					// insert into slice after first element because all emoji that support skin tones
 | ||||
| 					// have that modifer placed at this spot
 | ||||
| 					s = append(s, "") | ||||
| 					copy(s[2:], s[1:]) | ||||
| 					s[1] = k | ||||
| 				} | ||||
| 
 | ||||
| 				newEmoji = strings.Join(s, "") | ||||
| 				newDescription = data[i].Description + ": " + v | ||||
| 				newAlias := data[i].Aliases[0] + "_" + strings.ReplaceAll(v, " ", "_") | ||||
| 
 | ||||
| 				newData = Emoji{newEmoji, newDescription, []string{newAlias}, "12.0", false} | ||||
| 				data = append(data, newData) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	// add header
 | ||||
| 	str := replacer.Replace(fmt.Sprintf(hdr, gemojiURL, data)) | ||||
| 
 | ||||
|  | @ -162,10 +203,6 @@ func generate() ([]byte, error) { | |||
| 		return "{" + strconv.QuoteToASCII(s) | ||||
| 	}) | ||||
| 
 | ||||
| 	// write a JSON file to use with tribute
 | ||||
| 	file, _ := json.Marshal(data) | ||||
| 	_ = ioutil.WriteFile("assets/emoji.json", file, 0644) | ||||
| 
 | ||||
| 	// format
 | ||||
| 	return format.Source([]byte(str)) | ||||
| } | ||||
|  |  | |||
|  | @ -9,7 +9,6 @@ import ( | |||
| 	"sort" | ||||
| 	"strings" | ||||
| 	"sync" | ||||
| 	"unicode/utf8" | ||||
| ) | ||||
| 
 | ||||
| // Gemoji is a set of emoji data.
 | ||||
|  | @ -21,6 +20,7 @@ type Emoji struct { | |||
| 	Description    string | ||||
| 	Aliases        []string | ||||
| 	UnicodeVersion string | ||||
| 	SkinTones      bool | ||||
| } | ||||
| 
 | ||||
| var ( | ||||
|  | @ -131,11 +131,12 @@ func ReplaceAliases(s string) string { | |||
| func FindEmojiSubmatchIndex(s string) []int { | ||||
| 	loadMap() | ||||
| 
 | ||||
| 	// if rune and string length are the same then no emoji will be present
 | ||||
| 	// similar performance when there is unicode present but almost 200% faster when not
 | ||||
| 	if utf8.RuneCountInString(s) == len(s) { | ||||
| 	//see if there are any emoji in string before looking for position of specific ones
 | ||||
| 	//no performance difference when there is a match but 10x faster when there are not
 | ||||
| 	if s == ReplaceCodes(s) { | ||||
| 		return nil | ||||
| 	} | ||||
| 
 | ||||
| 	for j := range GemojiData { | ||||
| 		i := strings.Index(s, GemojiData[j].Emoji) | ||||
| 		if i != -1 { | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
		Reference in a new issue