Detect encoding changes while parsing diff (#16330)
* Detect encoding changes while parsing diff
This commit is contained in:
		
							parent
							
								
									2614309a58
								
							
						
					
					
						commit
						4ce32c9e93
					
				
					 1 changed files with 31 additions and 19 deletions
				
			
		|  | @ -32,6 +32,7 @@ import ( | ||||||
| 
 | 
 | ||||||
| 	"github.com/sergi/go-diff/diffmatchpatch" | 	"github.com/sergi/go-diff/diffmatchpatch" | ||||||
| 	stdcharset "golang.org/x/net/html/charset" | 	stdcharset "golang.org/x/net/html/charset" | ||||||
|  | 	"golang.org/x/text/encoding" | ||||||
| 	"golang.org/x/text/transform" | 	"golang.org/x/text/transform" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -883,40 +884,51 @@ parsingLoop: | ||||||
| 
 | 
 | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	// FIXME: There are numerous issues with this:
 | 	// TODO: There are numerous issues with this:
 | ||||||
| 	// - we might want to consider detecting encoding while parsing but...
 | 	// - we might want to consider detecting encoding while parsing but...
 | ||||||
| 	// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
 | 	// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
 | ||||||
| 	// - and this doesn't really account for changes in encoding
 | 	var diffLineTypeBuffers = make(map[DiffLineType]*bytes.Buffer, 3) | ||||||
| 	var buf bytes.Buffer | 	var diffLineTypeDecoders = make(map[DiffLineType]*encoding.Decoder, 3) | ||||||
|  | 	diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer) | ||||||
|  | 	diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer) | ||||||
|  | 	diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer) | ||||||
| 	for _, f := range diff.Files { | 	for _, f := range diff.Files { | ||||||
| 		buf.Reset() | 		for _, buffer := range diffLineTypeBuffers { | ||||||
|  | 			buffer.Reset() | ||||||
|  | 		} | ||||||
| 		for _, sec := range f.Sections { | 		for _, sec := range f.Sections { | ||||||
| 			for _, l := range sec.Lines { | 			for _, l := range sec.Lines { | ||||||
| 				if l.Type == DiffLineSection { | 				if l.Type == DiffLineSection { | ||||||
| 					continue | 					continue | ||||||
| 				} | 				} | ||||||
| 				buf.WriteString(l.Content[1:]) | 				diffLineTypeBuffers[l.Type].WriteString(l.Content[1:]) | ||||||
| 				buf.WriteString("\n") | 				diffLineTypeBuffers[l.Type].WriteString("\n") | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		charsetLabel, err := charset.DetectEncoding(buf.Bytes()) | 		for lineType, buffer := range diffLineTypeBuffers { | ||||||
|  | 			diffLineTypeDecoders[lineType] = nil | ||||||
|  | 			if buffer.Len() == 0 { | ||||||
|  | 				continue | ||||||
|  | 			} | ||||||
|  | 			charsetLabel, err := charset.DetectEncoding(buffer.Bytes()) | ||||||
| 			if charsetLabel != "UTF-8" && err == nil { | 			if charsetLabel != "UTF-8" && err == nil { | ||||||
| 				encoding, _ := stdcharset.Lookup(charsetLabel) | 				encoding, _ := stdcharset.Lookup(charsetLabel) | ||||||
| 				if encoding != nil { | 				if encoding != nil { | ||||||
| 				d := encoding.NewDecoder() | 					diffLineTypeDecoders[lineType] = encoding.NewDecoder() | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
| 		for _, sec := range f.Sections { | 		for _, sec := range f.Sections { | ||||||
| 			for _, l := range sec.Lines { | 			for _, l := range sec.Lines { | ||||||
| 						if l.Type == DiffLineSection { | 				decoder := diffLineTypeDecoders[l.Type] | ||||||
| 							continue | 				if decoder != nil { | ||||||
| 						} | 					if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { | ||||||
| 						if c, _, err := transform.String(d, l.Content[1:]); err == nil { |  | ||||||
| 						l.Content = l.Content[0:1] + c | 						l.Content = l.Content[0:1] + c | ||||||
| 					} | 					} | ||||||
| 				} | 				} | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	diff.NumFiles = len(diff.Files) | 	diff.NumFiles = len(diff.Files) | ||||||
| 	return diff, nil | 	return diff, nil | ||||||
|  |  | ||||||
		Loading…
	
		Reference in a new issue