@@ -537,9 +537,14 @@ get_extended_header(fp, hdr, header_size, hcrc)
537537 unsigned int * hcrc ;
538538{
539539 char data [LZHEADER_STORAGE ];
540- int name_length ;
540+ int name_length = 0 ;
541541 char dirname [FILENAME_LENGTH ];
542542 int dir_length = 0 ;
543+ #ifdef UNICODE_FILENAME
544+ int name_u_length = 0 ;
545+ char dirname_u [FILENAME_LENGTH * 2 ];
546+ int dir_u_length = 0 ;
547+ #endif
543548 int i ;
544549 ssize_t whole_size = header_size ;
545550 int ext_type ;
@@ -634,6 +639,27 @@ get_extended_header(fp, hdr, header_size, hcrc)
634639#endif
635640
636641 break ;
642+ #ifdef UNICODE_FILENAME
643+ case 0x44 :
644+ #if DUMP_HEADER
645+ if (verbose_listing && verbose > 1 ) printf (" < unicode filename >\n" );
646+ #endif
647+ /* unicode filename */
648+ name_u_length =
649+ get_bytes (hdr -> name_u , header_size - n , sizeof (hdr -> name_u )- 2 );
650+ hdr -> name_u [name_u_length ] = 0 ;
651+ hdr -> name_u [name_u_length + 1 ] = 0 ;
652+ break ;
653+ case 0x45 :
654+ #if DUMP_HEADER
655+ if (verbose_listing && verbose > 1 ) printf (" < unicode directory >\n" );
656+ #endif
657+ /* unicode directory */
658+ dir_u_length = get_bytes (dirname_u , header_size - n , sizeof (dirname_u )- 2 );
659+ dirname_u [dir_u_length ] = 0 ;
660+ dirname_u [dir_u_length + 1 ] = 0 ;
661+ break ;
662+ #endif
637663 case 0x50 :
638664#if DUMP_HEADER
639665 if (verbose_listing && verbose > 1 ) printf (" < UNIX permission >\n" );
@@ -720,6 +746,23 @@ get_extended_header(fp, hdr, header_size, hcrc)
720746 name_length += dir_length ;
721747 }
722748
749+ #ifdef UNICODE_FILENAME
750+ /* concatenate unicode dirname and filename */
751+ if (dir_u_length ) {
752+ if (name_u_length + dir_u_length >= sizeof (hdr -> name_u )) {
753+ warning ("the length of unicode pathname is too long." );
754+ name_u_length = sizeof (hdr -> name_u ) - dir_u_length - 2 ;
755+ hdr -> name_u [name_u_length ] = 0 ;
756+ hdr -> name_u [name_u_length + 1 ] = 0 ;
757+ }
758+ memcpy (& dirname_u [dir_u_length ], hdr -> name_u , name_u_length );
759+ memcpy (hdr -> name_u , dirname_u , name_u_length + dir_u_length );
760+ name_u_length += dir_u_length ;
761+ hdr -> name_u [name_u_length ] = 0 ;
762+ hdr -> name_u [name_u_length + 1 ] = 0 ;
763+ }
764+ #endif
765+
723766 return whole_size ;
724767}
725768
@@ -1214,10 +1257,14 @@ get_header(fp, hdr)
12141257 filename_case = optional_filename_case ;
12151258
12161259 /* kanji code and delimiter conversion */
1217- convert_filename (hdr -> name , strlen (hdr -> name ), sizeof (hdr -> name ),
1218- archive_kanji_code ,
1219- system_kanji_code ,
1220- archive_delim , system_delim , filename_case );
1260+ #ifdef UNICODE_FILENAME
1261+ if (convert_filename_from_unicode (hdr -> name_u , hdr -> name ,
1262+ sizeof (hdr -> name ), system_kanji_code ) == -1 )
1263+ #endif
1264+ convert_filename (hdr -> name , strlen (hdr -> name ), sizeof (hdr -> name ),
1265+ archive_kanji_code ,
1266+ system_kanji_code ,
1267+ archive_delim , system_delim , filename_case );
12211268
12221269 if ((hdr -> unix_mode & UNIX_FILE_SYMLINK ) == UNIX_FILE_SYMLINK ) {
12231270 char * p ;
@@ -1936,7 +1983,7 @@ ConvertUTF8ToEncoding(const char* inUTF8Buf,
19361983#include <iconv.h>
19371984
19381985static int
1939- ConvertEncodingByIconv (const char * src , char * dst , int dstsize ,
1986+ ConvertEncodingByIconv (const char * src , int srclen , char * dst , int dstsize ,
19401987 const char * srcEnc , const char * dstEnc )
19411988{
19421989 iconv_t ic ;
@@ -1949,7 +1996,7 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
19491996 dst_p = & szTmpBuf [0 ];
19501997 iLen = (size_t )sizeof (szTmpBuf )- 1 ;
19511998 src_p = (char * )src ;
1952- sLen = (size_t )strlen ( src ) ;
1999+ sLen = (size_t )srclen ;
19532000 memset (szTmpBuf , 0 , sizeof (szTmpBuf ));
19542001 memset (dst , 0 , dstsize );
19552002
@@ -1973,6 +2020,67 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
19732020}
19742021#endif /* defined(__APPLE__) */
19752022
2023+ #ifdef UNICODE_FILENAME
2024+ int
2025+ convert_filename_from_unicode (name_u , name , size , to_code )
2026+ char * name_u ;
2027+ char * name ;
2028+ int size ;
2029+ int to_code ;
2030+ {
2031+ #if HAVE_ICONV
2032+ char tmp [FILENAME_LENGTH ];
2033+ int to_code_save = NONE ;
2034+ const char * toEnc ;
2035+ int i = 0 ;
2036+
2037+ if (to_code == CODE_CAP ) {
2038+ to_code_save = CODE_CAP ;
2039+ to_code = CODE_SJIS ;
2040+ }
2041+
2042+ switch (to_code ) {
2043+ case CODE_SJIS :
2044+ toEnc = "SJIS" ;
2045+ break ;
2046+ case CODE_EUC :
2047+ toEnc = "EUC-JP" ;
2048+ break ;
2049+ case CODE_UTF8 :
2050+ toEnc = "UTF-8" ;
2051+ break ;
2052+ default :
2053+ return -1 ;
2054+ }
2055+
2056+ while (name_u [i ] != 0x00 || name_u [i + 1 ] != 0x00 ) {
2057+ if ((unsigned char )name_u [i ] == LHA_PATHSEP &&
2058+ (unsigned char )name_u [i + 1 ] == LHA_PATHSEP ) {
2059+ name_u [i ] = 0x2F ; name_u [i + 1 ] = 0x00 ;
2060+ }
2061+ i += 2 ;
2062+ }
2063+
2064+ if (i == 0 )
2065+ return -1 ;
2066+
2067+ if (ConvertEncodingByIconv (name_u , i , tmp , sizeof (tmp ), "UTF-16LE" , toEnc ) == -1 )
2068+ return -1 ;
2069+ strncpy (name , tmp , size );
2070+
2071+ if (to_code_save == CODE_CAP ) {
2072+ sjis_to_cap (tmp , name , sizeof (tmp ));
2073+ strncpy (name , tmp , size );
2074+ name [size - 1 ] = 0 ;
2075+ }
2076+
2077+ return 0 ;
2078+ #else
2079+ return -1 ;
2080+ #endif
2081+ }
2082+ #endif
2083+
19762084char *
19772085sjis_to_utf8 (char * dst , const char * src , size_t dstsize )
19782086{
@@ -1983,7 +2091,7 @@ sjis_to_utf8(char *dst, const char *src, size_t dstsize)
19832091 kCFStringEncodingUseHFSPlusCanonical ) == 0 )
19842092 return dst ;
19852093#elif HAVE_ICONV
1986- if (ConvertEncodingByIconv (src , dst , dstsize , "SJIS" , "UTF-8" ) != -1 )
2094+ if (ConvertEncodingByIconv (src , strlen ( src ), dst , dstsize , "SJIS" , "UTF-8" ) != -1 )
19872095 return dst ;
19882096#else
19892097 error ("not support utf-8 conversion" );
@@ -2007,7 +2115,7 @@ utf8_to_sjis(char *dst, const char *src, size_t dstsize)
20072115 kCFStringEncodingUseHFSPlusCanonical ) == 0 )
20082116 return dst ;
20092117#elif HAVE_ICONV
2010- if (ConvertEncodingByIconv (src , dst , dstsize , "UTF-8" , "SJIS" ) != -1 )
2118+ if (ConvertEncodingByIconv (src , strlen ( src ), dst , dstsize , "UTF-8" , "SJIS" ) != -1 )
20112119 return dst ;
20122120#else
20132121 error ("not support utf-8 conversion" );
0 commit comments