@@ -7,12 +7,10 @@ use alloc::{
77} ;
88use core:: {
99 any:: Any ,
10- ffi:: CStr ,
1110 fmt:: { self , Debug } ,
1211} ;
1312
1413use anyhow:: { Result , bail} ;
15- use encoding_rs:: SHIFT_JIS ;
1614use object:: Endian as _;
1715
1816use crate :: {
@@ -44,6 +42,16 @@ pub mod x86;
4442pub const OPCODE_INVALID : u16 = u16:: MAX ;
4543pub const OPCODE_DATA : u16 = u16:: MAX - 1 ;
4644
45+ const SUPPORTED_ENCODINGS : [ ( & encoding_rs:: Encoding , & str ) ; 7 ] = [
46+ ( encoding_rs:: UTF_8 , "UTF-8" ) ,
47+ ( encoding_rs:: SHIFT_JIS , "Shift JIS" ) ,
48+ ( encoding_rs:: UTF_16BE , "UTF-16BE" ) ,
49+ ( encoding_rs:: UTF_16LE , "UTF-16LE" ) ,
50+ ( encoding_rs:: WINDOWS_1252 , "Windows-1252" ) ,
51+ ( encoding_rs:: EUC_JP , "EUC-JP" ) ,
52+ ( encoding_rs:: BIG5 , "Big5" ) ,
53+ ] ;
54+
4755/// Represents the type of data associated with an instruction
4856#[ derive( PartialEq ) ]
4957pub enum DataType {
@@ -77,7 +85,7 @@ impl DataType {
7785 let mut strs = Vec :: new ( ) ;
7886 for ( literal, label_override) in self . display_literals ( endian, bytes) {
7987 let label = label_override. unwrap_or_else ( || self . to_string ( ) ) ;
80- strs. push ( format ! ( "{label}: {literal}" ) )
88+ strs. push ( format ! ( "{label}: {literal:? }" ) )
8189 }
8290 strs
8391 }
@@ -164,16 +172,18 @@ impl DataType {
164172 strs. push ( ( format ! ( "{bytes:#?}" ) , None ) ) ;
165173 }
166174 DataType :: String => {
167- if let Ok ( cstr) = CStr :: from_bytes_until_nul ( bytes) {
168- strs. push ( ( format ! ( "{cstr:?}" ) , None ) ) ;
169- }
170175 if let Some ( nul_idx) = bytes. iter ( ) . position ( |& c| c == b'\0' ) {
171- let ( cow, _, had_errors) = SHIFT_JIS . decode ( & bytes[ ..nul_idx] ) ;
172- if !had_errors {
173- let str = format ! ( "{cow:?}" ) ;
174- // Only add the Shift JIS string if it's different from the ASCII string.
175- if !strs. iter ( ) . any ( |x| x. 0 == str) {
176- strs. push ( ( str, Some ( "Shift JIS" . into ( ) ) ) ) ;
176+ let str_bytes = & bytes[ ..nul_idx] ;
177+ // Special case to display (ASCII) as the label for ASCII-only strings.
178+ let ( cow, _, had_errors) = encoding_rs:: UTF_8 . decode ( str_bytes) ;
179+ if !had_errors && cow. is_ascii ( ) {
180+ strs. push ( ( format ! ( "{cow}" ) , Some ( "ASCII" . into ( ) ) ) ) ;
181+ }
182+ for ( encoding, encoding_name) in SUPPORTED_ENCODINGS {
183+ let ( cow, _, had_errors) = encoding. decode ( str_bytes) ;
184+ // Avoid showing ASCII-only strings more than once if the encoding is ASCII-compatible.
185+ if !had_errors && ( !encoding. is_ascii_compatible ( ) || !cow. is_ascii ( ) ) {
186+ strs. push ( ( format ! ( "{cow}" ) , Some ( encoding_name. into ( ) ) ) ) ;
177187 }
178188 }
179189 }
0 commit comments