Skip to content

Commit f88fab3

Browse files
committed
Support unicode dirname/filename extended header
1 parent 792a675 commit f88fab3

4 files changed

Lines changed: 139 additions & 10 deletions

File tree

configure.ac

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,21 @@ if test "x$enable_indicator" = xyes; then
245245
[Define to 1 if you want to use the incremental indicator])
246246
fi
247247

248+
# support unicode filename
249+
AC_MSG_CHECKING(whether unicode filename is supported)
250+
AC_ARG_ENABLE(unicode-filename,
251+
AC_HELP_STRING([--enable-unicode-filename],
252+
[support unicode filename [[default=yes]]]),
253+
,
254+
# default
255+
enable_unicode_filename=yes)
256+
257+
AC_MSG_RESULT($enable_unicode_filename)
258+
if test "x$enable_unicode_filename" = xyes; then
259+
AC_DEFINE(UNICODE_FILENAME, 1,
260+
[Define to 1 if you want to support unicode filename])
261+
fi
262+
248263
# support multibyte filename
249264
AC_MSG_CHECKING(kanji code of filename)
250265
AC_ARG_ENABLE(multibyte-filename,

src/header.c

Lines changed: 118 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -569,9 +569,14 @@ get_extended_header(fp, hdr, header_size, hcrc)
569569
unsigned int *hcrc;
570570
{
571571
char data[LZHEADER_STORAGE];
572-
int name_length;
572+
int name_length = 0;
573573
char dirname[FILENAME_LENGTH];
574574
int dir_length = 0;
575+
#ifdef UNICODE_FILENAME
576+
int name_u_length = 0;
577+
char dirname_u[FILENAME_LENGTH*2];
578+
int dir_u_length = 0;
579+
#endif
575580
int i;
576581
ssize_t whole_size = header_size;
577582
int ext_type;
@@ -666,6 +671,27 @@ get_extended_header(fp, hdr, header_size, hcrc)
666671
#endif
667672

668673
break;
674+
#ifdef UNICODE_FILENAME
675+
case 0x44:
676+
#if DUMP_HEADER
677+
if (verbose_listing && verbose > 1) printf(" < unicode filename >\n");
678+
#endif
679+
/* unicode filename */
680+
name_u_length =
681+
get_bytes(hdr->name_u, header_size-n, sizeof(hdr->name_u)-2);
682+
hdr->name_u[name_u_length] = 0;
683+
hdr->name_u[name_u_length+1] = 0;
684+
break;
685+
case 0x45:
686+
#if DUMP_HEADER
687+
if (verbose_listing && verbose > 1) printf(" < unicode directory >\n");
688+
#endif
689+
/* unicode directory */
690+
dir_u_length = get_bytes(dirname_u, header_size-n, sizeof(dirname_u)-2);
691+
dirname_u[dir_u_length] = 0;
692+
dirname_u[dir_u_length+1] = 0;
693+
break;
694+
#endif
669695
case 0x50:
670696
#if DUMP_HEADER
671697
if (verbose_listing && verbose > 1) printf(" < UNIX permission >\n");
@@ -752,6 +778,23 @@ get_extended_header(fp, hdr, header_size, hcrc)
752778
name_length += dir_length;
753779
}
754780

781+
#ifdef UNICODE_FILENAME
782+
/* concatenate unicode dirname and filename */
783+
if (dir_u_length) {
784+
if (name_u_length + dir_u_length >= sizeof(hdr->name_u)) {
785+
warning("the length of unicode pathname is too long.");
786+
name_u_length = sizeof(hdr->name_u) - dir_u_length - 2;
787+
hdr->name_u[name_u_length] = 0;
788+
hdr->name_u[name_u_length + 1] = 0;
789+
}
790+
memcpy(&dirname_u[dir_u_length], hdr->name_u, name_u_length);
791+
memcpy(hdr->name_u, dirname_u, name_u_length + dir_u_length);
792+
name_u_length += dir_u_length;
793+
hdr->name_u[name_u_length] = 0;
794+
hdr->name_u[name_u_length + 1] = 0;
795+
}
796+
#endif
797+
755798
return whole_size;
756799
}
757800

@@ -1282,10 +1325,14 @@ get_header(fp, hdr)
12821325
filename_case = optional_filename_case;
12831326

12841327
/* kanji code and delimiter conversion */
1285-
convert_filename(hdr->name, strlen(hdr->name), sizeof(hdr->name),
1286-
archive_kanji_code,
1287-
system_kanji_code,
1288-
archive_delim, system_delim, filename_case);
1328+
#ifdef UNICODE_FILENAME
1329+
if (convert_filename_from_unicode(hdr->name_u, hdr->name,
1330+
sizeof(hdr->name), system_kanji_code) == -1)
1331+
#endif
1332+
convert_filename(hdr->name, strlen(hdr->name), sizeof(hdr->name),
1333+
archive_kanji_code,
1334+
system_kanji_code,
1335+
archive_delim, system_delim, filename_case);
12891336

12901337
if ((hdr->unix_mode & UNIX_FILE_SYMLINK) == UNIX_FILE_SYMLINK) {
12911338
char *p;
@@ -2004,7 +2051,7 @@ ConvertUTF8ToEncoding(const char* inUTF8Buf,
20042051
#include <iconv.h>
20052052

20062053
static int
2007-
ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
2054+
ConvertEncodingByIconv(const char *src, int srclen, char *dst, int dstsize,
20082055
const char *srcEnc, const char *dstEnc)
20092056
{
20102057
iconv_t ic;
@@ -2017,7 +2064,7 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
20172064
dst_p = &szTmpBuf[0];
20182065
iLen = (size_t)sizeof(szTmpBuf)-1;
20192066
src_p = (char *)src;
2020-
sLen = (size_t)strlen(src);
2067+
sLen = (size_t)srclen;
20212068
memset(szTmpBuf, 0, sizeof(szTmpBuf));
20222069
memset(dst, 0, dstsize);
20232070

@@ -2041,6 +2088,67 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
20412088
}
20422089
#endif /* defined(__APPLE__) */
20432090

2091+
#ifdef UNICODE_FILENAME
2092+
int
2093+
convert_filename_from_unicode(name_u, name, size, to_code)
2094+
char *name_u;
2095+
char *name;
2096+
int size;
2097+
int to_code;
2098+
{
2099+
#if HAVE_ICONV
2100+
char tmp[FILENAME_LENGTH];
2101+
int to_code_save = NONE;
2102+
const char *toEnc;
2103+
int i = 0;
2104+
2105+
if (to_code == CODE_CAP) {
2106+
to_code_save = CODE_CAP;
2107+
to_code = CODE_SJIS;
2108+
}
2109+
2110+
switch (to_code) {
2111+
case CODE_SJIS:
2112+
toEnc = "SJIS";
2113+
break;
2114+
case CODE_EUC:
2115+
toEnc = "EUC-JP";
2116+
break;
2117+
case CODE_UTF8:
2118+
toEnc = "UTF-8";
2119+
break;
2120+
default:
2121+
return -1;
2122+
}
2123+
2124+
while (name_u[i] != 0x00 || name_u[i+1] != 0x00) {
2125+
if ((unsigned char)name_u[i] == LHA_PATHSEP &&
2126+
(unsigned char)name_u[i+1] == LHA_PATHSEP) {
2127+
name_u[i] = 0x2F; name_u[i+1] = 0x00;
2128+
}
2129+
i += 2;
2130+
}
2131+
2132+
if (i == 0)
2133+
return -1;
2134+
2135+
if (ConvertEncodingByIconv(name_u, i, tmp, sizeof(tmp), "UTF-16LE", toEnc) == -1)
2136+
return -1;
2137+
strncpy(name, tmp, size);
2138+
2139+
if (to_code_save == CODE_CAP) {
2140+
sjis_to_cap(tmp, name, sizeof(tmp));
2141+
strncpy(name, tmp, size);
2142+
name[size-1] = 0;
2143+
}
2144+
2145+
return 0;
2146+
#else
2147+
return -1;
2148+
#endif
2149+
}
2150+
#endif
2151+
20442152
char *
20452153
sjis_to_utf8(char *dst, const char *src, size_t dstsize)
20462154
{
@@ -2052,11 +2160,11 @@ sjis_to_utf8(char *dst, const char *src, size_t dstsize)
20522160
kCFStringEncodingUseHFSPlusCanonical) == 0)
20532161
return dst;
20542162
#else
2055-
if (ConvertEncodingByIconv(src, dst, dstsize, "SJIS", "UTF-8-MAC") != -1)
2163+
if (ConvertEncodingByIconv(src, strlen(src), dst, dstsize, "SJIS", "UTF-8-MAC") != -1)
20562164
return dst;
20572165
#endif
20582166
#elif HAVE_ICONV
2059-
if (ConvertEncodingByIconv(src, dst, dstsize, "SJIS", "UTF-8") != -1)
2167+
if (ConvertEncodingByIconv(src, strlen(src), dst, dstsize, "SJIS", "UTF-8") != -1)
20602168
return dst;
20612169
#else
20622170
error("not support utf-8 conversion");
@@ -2085,7 +2193,7 @@ utf8_to_sjis(char *dst, const char *src, size_t dstsize)
20852193
return dst;
20862194
#endif
20872195
#elif HAVE_ICONV
2088-
if (ConvertEncodingByIconv(src, dst, dstsize, "UTF-8", "SJIS") != -1)
2196+
if (ConvertEncodingByIconv(src, strlen(src), dst, dstsize, "UTF-8", "SJIS") != -1)
20892197
return dst;
20902198
#else
20912199
error("not support utf-8 conversion");

src/lha.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ typedef struct LzHeader {
256256
unsigned char attribute;
257257
unsigned char header_level;
258258
char name[FILENAME_LENGTH];
259+
#ifdef UNICODE_FILENAME
260+
char name_u[FILENAME_LENGTH*2];
261+
#endif
259262
char realname[FILENAME_LENGTH];/* real name for symbolic link */
260263
unsigned int crc; /* file CRC */
261264
boolean has_crc; /* file CRC */

src/prototypes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ boolean get_header(FILE *fp, LzHeader *hdr);
3333
int seek_lha_header(FILE *fp);
3434
void init_header(char *name, struct stat *v_stat, LzHeader *hdr);
3535
void write_header(FILE *fp, LzHeader *hdr);
36+
#ifdef UNICODE_FILENAME
37+
int convert_filename_from_unicode(char *name_u, char *name, int size, int to_code);
38+
#endif
3639
char *sjis_to_utf8(char *dst, const char *src, size_t dstsize);
3740
char *utf8_to_sjis(char *dst, const char *src, size_t dstsize);
3841
void euc2sjis(int *p1, int *p2);

0 commit comments

Comments
 (0)