3333
3434import org .jcodings .Encoding ;
3535import org .jcodings .specific .ASCIIEncoding ;
36+ import org .jcodings .specific .UTF16BEEncoding ;
37+ import org .jcodings .specific .UTF16LEEncoding ;
38+ import org .jcodings .specific .UTF32BEEncoding ;
39+ import org .jcodings .specific .UTF32LEEncoding ;
40+ import org .jcodings .specific .UTF8Encoding ;
3641import org .jruby .*;
3742import org .jruby .anno .FrameField ;
3843import org .jruby .anno .JRubyClass ;
5156import org .jruby .util .ByteList ;
5257import org .jruby .util .StringSupport ;
5358import org .jruby .util .TypeConverter ;
59+ import org .jruby .util .func .ObjectObjectIntFunction ;
5460import org .jruby .util .io .EncodingUtils ;
5561import org .jruby .util .io .Getline ;
62+ import org .jruby .util .io .IOEncodable ;
5663import org .jruby .util .io .ModeFlags ;
5764import org .jruby .util .io .OpenFile ;
5865
6269import java .util .Arrays ;
6370import java .util .concurrent .atomic .AtomicReferenceFieldUpdater ;
6471
72+ import static java .lang .Byte .toUnsignedInt ;
6573import static org .jruby .RubyEnumerator .enumeratorize ;
6674import static org .jruby .runtime .Visibility .PRIVATE ;
6775import static org .jruby .util .RubyStringBuilder .str ;
@@ -93,6 +101,10 @@ static class StringIOData {
93101
94102 private static final AtomicReferenceFieldUpdater <StringIOData , Object > LOCKED_UPDATER = AtomicReferenceFieldUpdater .newUpdater (StringIOData .class , Object .class , "owner" );
95103
104+ private static final ThreadLocal <Object > VMODE_VPERM_TL = ThreadLocal .withInitial (() -> EncodingUtils .vmodeVperm (null , null ));
105+ private static final ThreadLocal <int []> FMODE_TL = ThreadLocal .withInitial (() -> new int []{0 });
106+ private static final int [] OFLAGS_UNUSED = new int []{0 };
107+
96108 public static RubyClass createStringIOClass (final Ruby runtime ) {
97109 RubyClass stringIOClass = runtime .defineClass (
98110 "StringIO" , runtime .getObject (), StringIO ::new );
@@ -298,12 +310,22 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
298310 Encoding encoding = null ;
299311
300312 IRubyObject options = ArgsUtil .getOptionsArg (runtime , maybeOptions );
313+ IOEncodable .ConvConfig ioEncodable = new IOEncodable .ConvConfig ();
301314 if (!options .isNil ()) {
302315 argc --;
303- IRubyObject encodingOpt = ArgsUtil .extractKeywordArg (context , "encoding" , (RubyHash ) options );
304- if (!encodingOpt .isNil ()) {
305- encoding = EncodingUtils .toEncoding (context , encodingOpt );
306- }
316+
317+ int [] fmode = {0 };
318+ Object vmodeAndVpermP = VMODE_VPERM_TL .get ();
319+
320+ // switch to per-use oflags if it is ever used in the future
321+ EncodingUtils .extractModeEncoding (context , ioEncodable , vmodeAndVpermP , options , OFLAGS_UNUSED , FMODE_TL .get ());
322+
323+ // clear shared vmodeVperm
324+ EncodingUtils .vmode (vmodeAndVpermP , null );
325+ EncodingUtils .vperm (vmodeAndVpermP , null );
326+
327+ ptr .flags = fmode [0 ];
328+ encoding = ioEncodable .enc ;
307329 }
308330
309331 switch (argc ) {
@@ -312,11 +334,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
312334 final boolean trunc ;
313335 if (mode instanceof RubyFixnum ) {
314336 int flags = RubyFixnum .fix2int (mode );
315- ptr .flags = ModeFlags .getOpenFileFlagsFor (flags );
337+ ptr .flags | = ModeFlags .getOpenFileFlagsFor (flags );
316338 trunc = (flags & ModeFlags .TRUNC ) != 0 ;
317339 } else {
318340 String m = arg1 .convertToString ().toString ();
319- ptr .flags = OpenFile .ioModestrFmode (runtime , m );
341+ ptr .flags | = OpenFile .ioModestrFmode (runtime , m );
320342 trunc = m .length () > 0 && m .charAt (0 ) == 'w' ;
321343 }
322344 string = arg0 .convertToString ();
@@ -329,11 +351,11 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
329351 break ;
330352 case 1 :
331353 string = arg0 .convertToString ();
332- ptr .flags = string .isFrozen () ? OpenFile .READABLE : OpenFile .READWRITE ;
354+ ptr .flags | = string .isFrozen () ? OpenFile .READABLE : OpenFile .READWRITE ;
333355 break ;
334356 case 0 :
335357 string = RubyString .newEmptyString (runtime , runtime .getDefaultExternalEncoding ());
336- ptr .flags = OpenFile .READWRITE ;
358+ ptr .flags | = OpenFile .READWRITE ;
337359 break ;
338360 default :
339361 // should not be possible
@@ -344,6 +366,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
344366 ptr .enc = encoding ;
345367 ptr .pos = 0 ;
346368 ptr .lineno = 0 ;
369+ if ((ptr .flags & OpenFile .SETENC_BY_BOM ) != 0 ) setEncodingByBOM (context );
347370 // funky way of shifting readwrite flags into object flags
348371 flags |= (ptr .flags & OpenFile .READWRITE ) * (STRIO_READABLE / OpenFile .READABLE );
349372 } finally {
@@ -1636,6 +1659,71 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj
16361659 return set_encoding (context , enc );
16371660 }
16381661
1662+ @ JRubyMethod
1663+ public IRubyObject set_encoding_by_bom (ThreadContext context ) {
1664+ if (setEncodingByBOM (context ) == null ) return context .nil ;
1665+
1666+ return context .runtime .getEncodingService ().convertEncodingToRubyEncoding (ptr .enc );
1667+ }
1668+
1669+ private Encoding setEncodingByBOM (ThreadContext context ) {
1670+ Encoding enc = detectBOM (context , ptr .string , (ctx , enc2 , bomlen ) -> {
1671+ ptr .pos = bomlen ;
1672+ if (writable ()) {
1673+ ptr .string .setEncoding (enc2 );
1674+ }
1675+ return enc2 ;
1676+ });
1677+ ptr .enc = enc ;
1678+ return enc ;
1679+ }
1680+
1681+ private static Encoding detectBOM (ThreadContext context , RubyString str , ObjectObjectIntFunction <ThreadContext , Encoding , Encoding > callback ) {
1682+ int p ;
1683+ int len ;
1684+
1685+ ByteList byteList = str .getByteList ();
1686+ byte [] bytes = byteList .unsafeBytes ();
1687+ p = byteList .begin ();
1688+ len = byteList .realSize ();
1689+
1690+ if (len < 1 ) return null ;
1691+ switch (toUnsignedInt (bytes [p ])) {
1692+ case 0xEF :
1693+ if (len < 3 ) break ;
1694+ if (toUnsignedInt (bytes [p + 1 ]) == 0xBB && toUnsignedInt (bytes [p + 2 ]) == 0xBF ) {
1695+ return callback .apply (context , UTF8Encoding .INSTANCE , 3 );
1696+ }
1697+ break ;
1698+
1699+ case 0xFE :
1700+ if (len < 2 ) break ;
1701+ if (toUnsignedInt (bytes [p + 1 ]) == 0xFF ) {
1702+ return callback .apply (context , UTF16BEEncoding .INSTANCE , 2 );
1703+ }
1704+ break ;
1705+
1706+ case 0xFF :
1707+ if (len < 2 ) break ;
1708+ if (toUnsignedInt (bytes [p + 1 ]) == 0xFE ) {
1709+ if (len >= 4 && toUnsignedInt (bytes [p + 2 ]) == 0 && toUnsignedInt (bytes [p + 3 ]) == 0 ) {
1710+ return callback .apply (context , UTF32LEEncoding .INSTANCE , 4 );
1711+ }
1712+ return callback .apply (context , UTF16LEEncoding .INSTANCE , 2 );
1713+ }
1714+ break ;
1715+
1716+ case 0 :
1717+ if (len < 4 ) break ;
1718+ if (toUnsignedInt (bytes [p + 1 ]) == 0 && toUnsignedInt (bytes [p + 2 ]) == 0xFE && toUnsignedInt (bytes [p + 3 ]) == 0xFF ) {
1719+ return callback .apply (context , UTF32BEEncoding .INSTANCE , 4 );
1720+ }
1721+ break ;
1722+ }
1723+ return callback .apply (context , null , 0 );
1724+ }
1725+
1726+
16391727 @ JRubyMethod
16401728 public IRubyObject external_encoding (ThreadContext context ) {
16411729 return context .runtime .getEncodingService ().convertEncodingToRubyEncoding (getEncoding ());
0 commit comments