From 288eb06708b4d92d55bdea107eb06e1255d70c9d Mon Sep 17 00:00:00 2001 From: Jason Smith Date: Thu, 12 Nov 2020 20:26:49 -0800 Subject: [PATCH] UTF longname followup (#20115) --- Marlin/src/sd/SdBaseFile.cpp | 63 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/Marlin/src/sd/SdBaseFile.cpp b/Marlin/src/sd/SdBaseFile.cpp index acc5ba17f2..db2a9e2de9 100644 --- a/Marlin/src/sd/SdBaseFile.cpp +++ b/Marlin/src/sd/SdBaseFile.cpp @@ -1109,8 +1109,8 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { // We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks // needs static bytes addressing. So here just store full UTF-16LE words to re-convert later. uint16_t idx = (n + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding - longFilename[idx] = utf16_ch & 0xFF; - longFilename[idx+1] = (utf16_ch >> 8) & 0xFF; + longFilename[idx] = utf16_ch & 0xFF; + longFilename[idx + 1] = (utf16_ch >> 8) & 0xFF; #else // Replace all multibyte characters to '_' longFilename[n + i] = (utf16_ch > 0xFF) ? '_' : (utf16_ch & 0xFF); @@ -1122,45 +1122,44 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) { } } - // Return if normal file or subdirectory + // Post-process normal file or subdirectory longname, if any if (DIR_IS_FILE_OR_SUBDIR(dir)) { #if ENABLED(UTF_FILENAME_SUPPORT) - // Convert filename from utf-16 to utf-8 as Marlin expects #if LONG_FILENAME_CHARSIZE > 2 // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte // codepoints to 3-byte in-place will break the rest of filename) #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints." #endif - uint16_t currentPos = 0; - LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) { - uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding - uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); - if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' - longFilename[currentPos++] = '_'; - else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte utf-8 char - longFilename[currentPos++] = utf16_ch & 0x007F; - else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte utf-8 char - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); - longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F); - } - else { - #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte utf-8 char - longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); - longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); - longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F); - #else // Encode as '_' - longFilename[currentPos++] = '_'; - #endif - } - - if (0 == utf16_ch) break; // End of filename - } - return currentPos; - #else - return n; + // Is there a long filename to decode? + if (longFilename) { + // Reset n to the start of the long name + n = 0; + for (uint16_t idx = 0; idx < (LONG_FILENAME_LENGTH) / 2; idx += 2) { // idx is fixed since FAT LFN always contains UTF-16LE encoding + uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8); + if (0xD800 == (utf16_ch & 0xF800)) // Surrogate pair - encode as '_' + longFilename[n++] = '_'; + else if (0 == (utf16_ch & 0xFF80)) // Encode as 1-byte UTF-8 char + longFilename[n++] = utf16_ch & 0x007F; + else if (0 == (utf16_ch & 0xF800)) { // Encode as 2-byte UTF-8 char + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x1F); + longFilename[n++] = 0x80 | ( utf16_ch & 0x3F); + } + else { + #if LONG_FILENAME_CHARSIZE > 2 // Encode as 3-byte UTF-8 char + longFilename[n++] = 0xE0 | ((utf16_ch >> 12) & 0x0F); + longFilename[n++] = 0xC0 | ((utf16_ch >> 6) & 0x3F); + longFilename[n++] = 0xC0 | ( utf16_ch & 0x3F); + #else // Encode as '_' + longFilename[n++] = '_'; + #endif + } + if (0 == utf16_ch) break; // End of filename + } // idx + } // longFilename #endif - } + return n; + } // DIR_IS_FILE_OR_SUBDIR } }