From c26bed6c75d9c2d2ef4e23ad1a3c51fedbb575ca Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 11:34:57 -0600 Subject: [PATCH 01/23] Use modify that clears codeRange --- ext/java/org/jruby/ext/stringio/StringIO.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 969abbb..e461576 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1610,6 +1610,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) } } else { strioExtend(context, pos, len); + modifyString(ptr.string); ByteList ptrByteList = myString.getByteList(); System.arraycopy(strByteList.getUnsafeBytes(), strByteList.getBegin(), ptrByteList.getUnsafeBytes(), ptrByteList.begin() + pos, len); } From 667acea6dc400ee87420edbc0d1998ac5b9000d4 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 11:36:23 -0600 Subject: [PATCH 02/23] Check incompatible encodings as in C --- ext/java/org/jruby/ext/stringio/StringIO.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index e461576..a147981 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -33,6 +33,7 @@ import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; +import org.jcodings.specific.USASCIIEncoding; import org.jcodings.specific.UTF16BEEncoding; import org.jcodings.specific.UTF16LEEncoding; import org.jcodings.specific.UTF32BEEncoding; @@ -1590,7 +1591,11 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) if (enc != encStr && enc != EncodingUtils.ascii8bitEncoding(runtime) // this is a hack because we don't seem to handle incoming ASCII-8BIT properly in transcoder && encStr != ASCIIEncoding.INSTANCE) { - str = EncodingUtils.strConvEnc(context, str, encStr, enc); + RubyString converted = EncodingUtils.strConvEnc(context, str, encStr, enc); + if (converted == str && encStr != ASCIIEncoding.INSTANCE && encStr != USASCIIEncoding.INSTANCE) { /* conversion failed */ + ptr.string.checkEncoding(str); + } + str = converted; } final ByteList strByteList = str.getByteList(); len = str.size(); From 8f955ec122f79360c8900615becf53f64de8a8b5 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 11:48:22 -0600 Subject: [PATCH 03/23] Check readability during iteration Iterators might close this StringIO so check each loop. --- ext/java/org/jruby/ext/stringio/StringIO.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index a147981..24f939d 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -596,6 +596,8 @@ public IRubyObject each_byte(ThreadContext context, Block block) { // Check the length every iteration, since // the block can modify this string. while (ptr.pos < bytes.length()) { + // check readability for each loop, since it could get closed + checkReadable(); block.yield(context, runtime.newFixnum(bytes.get(ptr.pos++) & 0xFF)); } } finally { @@ -1758,6 +1760,9 @@ public IRubyObject each_codepoint(ThreadContext context, Block block) { final byte[] stringBytes = string.getUnsafeBytes(); int begin = string.getBegin(); for (; ; ) { + // check readability for each loop, since it could get closed + checkReadable(); + int pos = ptr.pos; if (pos >= string.realSize()) return this; From 7403244321670509187bf6de843acef3f096e591 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 12:14:21 -0600 Subject: [PATCH 04/23] Advance pos before yielding, in case of early return See https://github.com/ruby/ruby/pull/3460 --- ext/java/org/jruby/ext/stringio/StringIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 24f939d..c3569cb 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1768,8 +1768,8 @@ public IRubyObject each_codepoint(ThreadContext context, Block block) { int c = StringSupport.codePoint(runtime, enc, stringBytes, begin + pos, stringBytes.length); int n = StringSupport.codeLength(enc, c); - block.yield(context, runtime.newFixnum(c)); ptr.pos = pos + n; + block.yield(context, runtime.newFixnum(c)); } } finally { if (locked) unlock(ptr); From ceda9e5770168e4f2ac6f687ccedc13aea9e3b22 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 12:58:53 -0600 Subject: [PATCH 05/23] Align init and encoding setup with C logic --- ext/java/org/jruby/ext/stringio/StringIO.java | 123 +++++++++--------- 1 file changed, 63 insertions(+), 60 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index c3569cb..b25a93f 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -61,7 +61,6 @@ import org.jruby.util.io.EncodingUtils; import org.jruby.util.io.Getline; import org.jruby.util.io.IOEncodable; -import org.jruby.util.io.ModeFlags; import org.jruby.util.io.OpenFile; import java.lang.invoke.MethodHandle; @@ -137,7 +136,16 @@ public static RubyClass createStringIOClass(final Ruby runtime) { public Encoding getEncoding() { StringIOData ptr = this.ptr; Encoding enc = ptr.enc; - return enc != null ? enc : ptr.string.getEncoding(); + if (enc != null) { + return enc; + } + + RubyString string = ptr.string; + if (!string.isNil()) { + return string.getEncoding(); + } + + return null; } public void setEncoding(Encoding enc) { @@ -292,8 +300,8 @@ public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObje // MRI: strio_init private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1) { Ruby runtime = context.runtime; - RubyString string; - IRubyObject mode; + IRubyObject string = context.nil; + IRubyObject mode = context.nil; StringIOData ptr = this.ptr; @@ -302,72 +310,62 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO IRubyObject maybeOptions = context.nil; switch (argc) { case 1: - maybeOptions = arg0; + maybeOptions = ArgsUtil.getOptionsArg(runtime, arg0); + if (maybeOptions.isNil()) { + string = arg0; + } break; case 2: - maybeOptions = arg1; + string = arg0; + maybeOptions = ArgsUtil.getOptionsArg(runtime, arg1); + if (maybeOptions.isNil()) { + mode = arg1; + } break; } - Encoding encoding = null; - - IRubyObject options = ArgsUtil.getOptionsArg(runtime, maybeOptions); - IOEncodable.ConvConfig ioEncodable = new IOEncodable.ConvConfig(); - if (!options.isNil()) { + if (!maybeOptions.isNil()) { argc--; + } + Object vmodeAndVpermP = VMODE_VPERM_TL.get(); + EncodingUtils.vmode(vmodeAndVpermP, mode); + IOEncodable.ConvConfig ioEncodable = new IOEncodable.ConvConfig(); - int[] fmode = {0}; - Object vmodeAndVpermP = VMODE_VPERM_TL.get(); + // switch to per-use oflags if it is ever used in the future + EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, maybeOptions, OFLAGS_UNUSED, FMODE_TL.get()); - // switch to per-use oflags if it is ever used in the future - EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, options, OFLAGS_UNUSED, FMODE_TL.get()); + // clear shared vmodeVperm + EncodingUtils.vmode(vmodeAndVpermP, null); + EncodingUtils.vperm(vmodeAndVpermP, null); - // clear shared vmodeVperm - EncodingUtils.vmode(vmodeAndVpermP, null); - EncodingUtils.vperm(vmodeAndVpermP, null); + ptr.flags = FMODE_TL.get()[0]; - ptr.flags = fmode[0]; - encoding = ioEncodable.enc; + if (!string.isNil()) { + string = string.convertToString(); + } else if (argc == 0) { + string = RubyString.newEmptyString(runtime, runtime.getDefaultInternalEncoding()); } - switch (argc) { - case 2: - mode = arg1; - final boolean trunc; - if (mode instanceof RubyFixnum) { - int flags = RubyFixnum.fix2int(mode); - ptr.flags |= ModeFlags.getOpenFileFlagsFor(flags); - trunc = (flags & ModeFlags.TRUNC) != 0; - } else { - String m = arg1.convertToString().toString(); - ptr.flags |= OpenFile.ioModestrFmode(runtime, m); - trunc = m.length() > 0 && m.charAt(0) == 'w'; - } - string = arg0.convertToString(); - if ((ptr.flags & OpenFile.WRITABLE) != 0 && string.isFrozen()) { - throw runtime.newErrnoEACCESError("Permission denied"); - } - if (trunc) { - string.resize(0); - } - break; - case 1: - string = arg0.convertToString(); - ptr.flags |= string.isFrozen() ? OpenFile.READABLE : OpenFile.READWRITE; - break; - case 0: - string = RubyString.newEmptyString(runtime, runtime.getDefaultExternalEncoding()); - ptr.flags |= OpenFile.READWRITE; - break; - default: - // should not be possible - throw runtime.newArgumentError(3, 2); + if (!string.isNil() && string.isFrozen()) { + if ((ptr.flags & OpenFile.WRITABLE) != 0) { + throw runtime.newErrnoEACCESError("read-only string"); + } + } else { + if (mode.isNil()) { + ptr.flags |= OpenFile.WRITABLE; + } + } + if (!string.isNil() && (ptr.flags & OpenFile.TRUNC) != 0) { + ((RubyString) string).clear(); + } + ptr.string = (RubyString) string; + if (argc == 1 && !string.isNil()) { + ptr.enc = ((RubyString) string).getEncoding(); + } else { + ptr.enc = ioEncodable.enc; } - - ptr.string = string; - ptr.enc = encoding; ptr.pos = 0; ptr.lineno = 0; - if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) setEncodingByBOM(context); + if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) set_encoding_by_bom(context); // funky way of shifting readwrite flags into object flags flags |= (ptr.flags & OpenFile.READWRITE) * (STRIO_READABLE / OpenFile.READABLE); } finally { @@ -669,20 +667,25 @@ public IRubyObject getbyte(ThreadContext context) { return context.runtime.newFixnum(c); } + // MRI: strio_substr // must be called under lock private RubyString strioSubstr(Ruby runtime, int pos, int len, Encoding enc) { StringIOData ptr = this.ptr; final RubyString string = ptr.string; - final ByteList stringBytes = string.getByteList(); int rlen = string.size() - pos; if (len > rlen) len = rlen; if (len < 0) len = 0; - if (len == 0) return RubyString.newEmptyString(runtime, enc); - string.setByteListShared(); // we only share the byte[] buffer but its easier this way - return RubyString.newStringShared(runtime, stringBytes.getUnsafeBytes(), stringBytes.getBegin() + pos, len, enc); + return encSubseq(runtime, string, pos, len, enc); + } + + // MRI: enc_subseq + private static RubyString encSubseq(Ruby runtime, RubyString str, int pos, int len, Encoding enc) { + str = str.makeShared(runtime, pos, len); + str.setEncoding(enc); + return str; } private static final int CHAR_BIT = 8; From ae66f006674f1c54f96dbd4d059663d1f9211cbf Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 13:06:41 -0600 Subject: [PATCH 06/23] Handle zero-length pread as in C --- ext/java/org/jruby/ext/stringio/StringIO.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index b25a93f..822fe5b 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1147,10 +1147,6 @@ private RubyString preadCommon(ThreadContext context, int argc, IRubyObject arg0 if (len < 0) { throw runtime.newArgumentError("negative length " + len + " given"); } - - if (offset < 0) { - throw runtime.newErrnoEINVALError("pread: Invalid offset argument"); - } } break; default: @@ -1159,6 +1155,17 @@ private RubyString preadCommon(ThreadContext context, int argc, IRubyObject arg0 boolean locked = lock(context, ptr); try { + if (len == 0) { + if (str.isNil()) { + return RubyString.newEmptyString(runtime); + } + return (RubyString) str; + } + + if (offset < 0) { + throw runtime.newErrnoEINVALError("pread: Invalid offset argument"); + } + RubyString myString = ptr.string; if (offset >= myString.size()) { throw context.runtime.newEOFError(); From a517fd6ff24838f71e0348da04979903c09bc93a Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 13:09:45 -0600 Subject: [PATCH 07/23] Only update buffer encoding if not binary See https://bugs.ruby-lang.org/issues/20418 --- ext/java/org/jruby/ext/stringio/StringIO.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 822fe5b..217f59e 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1091,9 +1091,7 @@ private IRubyObject readCommon(ThreadContext context, int argc, IRubyObject arg0 ByteList dataByteList = myString.getByteList(); byte[] dataBytes = dataByteList.getUnsafeBytes(); System.arraycopy(dataBytes, dataByteList.getBegin() + pos, strBytes, strByteList.getBegin(), len); - if (binary) { - string.setEncoding(ASCIIEncoding.INSTANCE); - } else { + if (!binary) { string.setEncoding(myString.getEncoding()); } } From 571a36b1ccd6212755dcf8329ad62d67d64325ed Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 21:38:02 -0600 Subject: [PATCH 08/23] Support the NULL StringIO as in C See https://github.com/ruby/stringio/pull/90 --- ext/java/org/jruby/ext/stringio/StringIO.java | 77 ++++++++++++++----- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 217f59e..51219ff 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -141,7 +141,7 @@ public Encoding getEncoding() { } RubyString string = ptr.string; - if (!string.isNil()) { + if (string != null && !string.isNil()) { return string.getEncoding(); } @@ -334,8 +334,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, maybeOptions, OFLAGS_UNUSED, FMODE_TL.get()); // clear shared vmodeVperm - EncodingUtils.vmode(vmodeAndVpermP, null); - EncodingUtils.vperm(vmodeAndVpermP, null); + clearVmodeVperm(vmodeAndVpermP); ptr.flags = FMODE_TL.get()[0]; @@ -357,7 +356,9 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO if (!string.isNil() && (ptr.flags & OpenFile.TRUNC) != 0) { ((RubyString) string).clear(); } - ptr.string = (RubyString) string; + if (string instanceof RubyString) { + ptr.string = (RubyString) string; + } if (argc == 1 && !string.isNil()) { ptr.enc = ((RubyString) string).getEncoding(); } else { @@ -624,7 +625,7 @@ public IRubyObject eof(ThreadContext context) { } private boolean isEndOfString() { - return ptr.pos >= ptr.string.size(); + return ptr.string == null || ptr.pos >= ptr.string.size(); } @JRubyMethod(name = "getc") @@ -721,21 +722,25 @@ private static int bm_search(byte[] little, int lstart, int llen, byte[] big, in @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context) { + if (ptr.string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding()); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0) { + if (ptr.string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { + if (ptr.string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0, arg1); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { + if (ptr.string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0, arg1, arg2); } @@ -756,6 +761,8 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { } private static final Getline.Callback GETLINE = (context, self, rs, limit, chomp, block) -> { + if (self.isEndOfString()) return context.nil; + if (limit == 0) { return RubyString.newEmptyString(context.runtime, self.getEncoding()); } @@ -772,6 +779,11 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { private static final Getline.Callback GETLINE_YIELD = (context, self, rs, limit, chomp, block) -> { IRubyObject line; + StringIOData ptr = self.ptr; + if (ptr.string == null || ptr.pos > ptr.string.size()) { + return self; + } + if (limit == 0) { throw context.runtime.newArgumentError("invalid limit: 0 for each_line"); } @@ -790,6 +802,11 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { RubyArray ary = (RubyArray) context.runtime.newArray(); IRubyObject line; + StringIOData ptr = self.ptr; + if (ptr.string == null || ptr.pos > ptr.string.size()) { + return null; + } + if (limit == 0) { throw context.runtime.newArgumentError("invalid limit: 0 for readlines"); } @@ -919,10 +936,11 @@ private static int chompNewlineWidth(byte[] bytes, int s, int e) { } @JRubyMethod(name = {"length", "size"}) - public IRubyObject length() { + public IRubyObject length(ThreadContext context) { checkInitialized(); - checkFinalized(); - return getRuntime().newFixnum(ptr.string.size()); + RubyString myString = ptr.string; + if (myString == null) return RubyFixnum.zero(context.runtime); + return getRuntime().newFixnum(myString.size()); } @JRubyMethod(name = "lineno") @@ -995,10 +1013,12 @@ public IRubyObject putc(ThreadContext context, IRubyObject ch) { checkModifiable(); if (ch instanceof RubyString) { + if (ptr.string == null) return context.nil; str = substrString((RubyString) ch, str, runtime); } else { byte c = RubyNumeric.num2chr(ch); + if (ptr.string == null) return context.nil; str = RubyString.newString(runtime, new byte[]{c}); } write(context, str); @@ -1059,6 +1079,10 @@ private IRubyObject readCommon(ThreadContext context, int argc, IRubyObject arg0 break; } case 0: + RubyString myString = ptr.string; + if (myString == null) { + return context.nil; + } len = ptr.string.size(); if (len <= pos) { Encoding enc = binary ? ASCIIEncoding.INSTANCE : getEncoding(); @@ -1287,7 +1311,6 @@ public IRubyObject seek(ThreadContext context, IRubyObject arg0, IRubyObject arg private RubyFixnum seekCommon(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1) { checkFrozen(); - checkFinalized(); Ruby runtime = context.runtime; @@ -1369,10 +1392,13 @@ public IRubyObject truncate(ThreadContext context, IRubyObject len) { boolean locked = lock(context, ptr); try { - int plen = string.size(); if (l < 0) { throw context.runtime.newErrnoEINVALError("negative legnth"); } + if (string == null) { + return RubyFixnum.zero(context.runtime); + } + int plen = string.size(); string.resize(l); ByteList buf = string.getByteList(); if (plen < l) { @@ -1393,6 +1419,8 @@ public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { checkModifiable(); checkReadable(); + if (ptr.string == null) return context.nil; + if (arg.isNil()) return arg; if (arg instanceof RubyInteger) { int len, cc = RubyNumeric.num2int(arg); @@ -1486,6 +1514,7 @@ public IRubyObject ungetbyte(ThreadContext context, IRubyObject arg) { if (arg.isNil()) return arg; checkModifiable(); + if (ptr.string == null) return context.nil; if (arg instanceof RubyInteger) { ungetbyteCommon(context, ((RubyInteger) ((RubyInteger) arg).op_mod(context, 256)).getIntValue()); @@ -1597,6 +1626,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) boolean locked = lock(context, ptr); try { final Encoding enc = getEncoding(); + if (enc == null) return 0; final Encoding encStr = str.getEncoding(); if (enc != encStr && enc != EncodingUtils.ascii8bitEncoding(runtime) // this is a hack because we don't seem to handle incoming ASCII-8BIT properly in transcoder @@ -1639,11 +1669,19 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) @JRubyMethod public IRubyObject set_encoding(ThreadContext context, IRubyObject ext_enc) { - final Encoding enc; + Encoding enc; if ( ext_enc.isNil() ) { enc = EncodingUtils.defaultExternalEncoding(context.runtime); } else { - enc = EncodingUtils.rbToEncoding(context, ext_enc); + enc = context.runtime.getEncodingService().getEncodingFromObjectNoError(ext_enc); + if (enc == null) { + IOEncodable convconfig = new IOEncodable.ConvConfig(); + Object vmodeAndVpermP = VMODE_VPERM_TL.get(); + EncodingUtils.vmode(vmodeAndVpermP, ext_enc.convertToString().prepend(context, context.runtime.newString("r:"))); + EncodingUtils.extractModeEncoding(context, convconfig, vmodeAndVpermP, context.nil, OFLAGS_UNUSED, FMODE_TL.get()); + clearVmodeVperm(vmodeAndVpermP); + enc = convconfig.getEnc2(); + } } StringIOData ptr = this.ptr; @@ -1653,8 +1691,8 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject ext_enc) { ptr.enc = enc; // in read-only mode, StringIO#set_encoding no longer sets the encoding - RubyString string; - if (writable() && (string = ptr.string).getEncoding() != enc) { + RubyString string = ptr.string; + if (string != null && writable() && string.getEncoding() != enc) { string.modify(); string.setEncoding(enc); } @@ -1665,6 +1703,11 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject ext_enc) { return this; } + private static void clearVmodeVperm(Object vmodeAndVpermP) { + EncodingUtils.vmode(vmodeAndVpermP, null); + EncodingUtils.vperm(vmodeAndVpermP, null); + } + @JRubyMethod public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObject ignored) { return set_encoding(context, enc); @@ -2056,12 +2099,6 @@ private void checkInitialized() { } } - private void checkFinalized() { - if (ptr.string == null) { - throw getRuntime().newIOError("not opened"); - } - } - private void checkOpen() { if (closed()) { throw getRuntime().newIOError(RubyIO.CLOSED_STREAM_MSG); From 26436e66a381add48c01001abfad0fc303ae34ed Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 21:40:04 -0600 Subject: [PATCH 09/23] Return zero from truncate See https://github.com/ruby/stringio/commit/16847fea32bda8262bd8bc81ad5297de39592c9c --- ext/java/org/jruby/ext/stringio/StringIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 51219ff..e6d66d2 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1409,7 +1409,7 @@ public IRubyObject truncate(ThreadContext context, IRubyObject len) { if (locked) unlock(ptr); } - return len; + return RubyFixnum.zero(context.runtime); } @JRubyMethod(name = "ungetc") From 1bcc551a9a893b4fcc51d8f431fbdb0e7c7c7d25 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 22:07:27 -0600 Subject: [PATCH 10/23] Duplicate unget logic for buffer management from C --- ext/java/org/jruby/ext/stringio/StringIO.java | 61 ++++++++++++------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index e6d66d2..a115f1e 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1431,7 +1431,6 @@ public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { if (len <= 0) EncodingUtils.encUintChr(context, cc, enc); enc.codeToMbc(cc, buf, 0); ungetbyteCommon(context, buf, 0, len); - return context.nil; } else { arg = arg.convertToString(); enc = getEncoding(); @@ -1440,10 +1439,10 @@ public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { if (enc != enc2 && enc != ASCIIEncoding.INSTANCE) { argStr = EncodingUtils.strConvEnc(context, argStr, enc2, enc); } - ByteList argBytes = argStr.getByteList(); - ungetbyteCommon(context, argBytes.unsafeBytes(), argBytes.begin(), argBytes.realSize()); - return context.nil; + ungetbyteCommon(context, argStr); } + + return context.nil; } private void ungetbyteCommon(ThreadContext context, int c) { @@ -1475,32 +1474,50 @@ private void ungetbyteCommon(ThreadContext context, RubyString ungetBytes) { ungetbyteCommon(context, ungetByteList.unsafeBytes(), ungetByteList.begin(), ungetByteList.realSize()); } - private void ungetbyteCommon(ThreadContext context, byte[] ungetBytes, int ungetBegin, int ungetLen) { - final int start; // = ptr.pos; - - if (ungetLen == 0) return; + private void ungetbyteCommon(ThreadContext context, byte[] ungetBytes, int cp, int cl) { + if (cl == 0) return; StringIOData ptr = this.ptr; boolean locked = lock(context, ptr); try { - RubyString string = ptr.string; - string.modify(); + int pos = ptr.pos, len, rest; + RubyString str = ptr.string; + ByteList strBytelist; + byte[] strBytes; + int s; - int pos = ptr.pos; - if (ungetLen > pos) { - start = 0; + len = str.size(); + rest = pos - len; + if (cl > pos) { + int ex = cl - (rest < 0 ? pos : len); + str.modifyExpand(len + ex); + strBytelist = str.getByteList(); + strBytes = strBytelist.unsafeBytes(); + s = strBytelist.begin(); + strBytelist.setRealSize(len + ex); + if (rest < 0) System.arraycopy(strBytes, s + pos, strBytes, s + cl, -rest); + pos = 0; + } + else { + if (rest > 0) { + str.modifyExpand(len + rest); + strBytelist = str.getByteList(); + strBytelist.setRealSize(len + rest); + } else { + strBytelist = str.getByteList(); + } + strBytes = strBytelist.unsafeBytes(); + s = strBytelist.begin(); + if (rest > cl) Arrays.fill(strBytes, len, rest - cl, (byte) 0); + pos -= cl; + } + if (ungetBytes != null) { + System.arraycopy(ungetBytes, cp, strBytes, s + pos, cl); } else { - start = pos - ungetLen; + System.arraycopy(strBytes, s, strBytes, s + pos, cl); } - - ByteList byteList = string.getByteList(); - - if (isEndOfString()) byteList.length(Math.max(pos, ungetLen)); - - byteList.replace(start, pos - start, ungetBytes, ungetBegin, ungetLen); - - ptr.pos = start; + ptr.pos = pos; } finally { if (locked) unlock(ptr); } From 70fbb46d4580a9104bf78ac6840c11efd01a0fbf Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 22:54:16 -0600 Subject: [PATCH 11/23] Use long math to detect int overflow in expand --- ext/java/org/jruby/ext/stringio/StringIO.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index a115f1e..0e1497c 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -984,8 +984,12 @@ private void strioExtend(ThreadContext context, int pos, int len) { try { RubyString string = ptr.string; final int olen = string.size(); - if (pos + len > olen) { - string.resize(pos + len); + long newSize = (long) pos + len; + if (newSize > Integer.MAX_VALUE) { + throw context.runtime.newArgumentError("string size too big"); + } + if (newSize > olen) { + string.resize((int) newSize); if (pos > olen) { modifyString(string); ByteList ptrByteList = string.getByteList(); From 8967fd80af9f2293bcf666db08a2d28368c8c0cd Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 22:59:34 -0600 Subject: [PATCH 12/23] Modify string and clear coderange after write See https://github.com/ruby/stringio/pull/77 --- ext/java/org/jruby/ext/stringio/StringIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 0e1497c..398a699 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1676,7 +1676,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) } } else { strioExtend(context, pos, len); - modifyString(ptr.string); + modifyString(myString); ByteList ptrByteList = myString.getByteList(); System.arraycopy(strByteList.getUnsafeBytes(), strByteList.getBegin(), ptrByteList.getUnsafeBytes(), ptrByteList.begin() + pos, len); } From 9dbb5758664a42556205b3ee00ac98c5ab8d4988 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 23:32:51 -0600 Subject: [PATCH 13/23] Align write encoding check with C --- ext/java/org/jruby/ext/stringio/StringIO.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 398a699..202919f 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1649,9 +1649,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) final Encoding enc = getEncoding(); if (enc == null) return 0; final Encoding encStr = str.getEncoding(); - if (enc != encStr && enc != EncodingUtils.ascii8bitEncoding(runtime) - // this is a hack because we don't seem to handle incoming ASCII-8BIT properly in transcoder - && encStr != ASCIIEncoding.INSTANCE) { + if (enc != encStr && enc != ASCIIEncoding.INSTANCE && enc != USASCIIEncoding.INSTANCE) { RubyString converted = EncodingUtils.strConvEnc(context, str, encStr, enc); if (converted == str && encStr != ASCIIEncoding.INSTANCE && encStr != USASCIIEncoding.INSTANCE) { /* conversion failed */ ptr.string.checkEncoding(str); From 7a2e141317c5706a78b5c98c130d3a748430b72b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 23:40:41 -0600 Subject: [PATCH 14/23] Don't ignore failures on JRuby anymore --- .github/workflows/ubuntu.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index da7da82..75af42a 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -47,7 +47,6 @@ jobs: if: >- matrix.ruby != 'truffleruby-head' run: bundle exec rake - continue-on-error: ${{ startsWith(matrix.ruby, 'jruby') }} - name: Install gem run: | gem install pkg/*.gem From 18c7d46d8c1aba7681f76473e8a4263d87d2692b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Tue, 18 Feb 2025 23:40:55 -0600 Subject: [PATCH 15/23] Clarify to which C function these correspond --- ext/java/org/jruby/ext/stringio/StringIO.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 202919f..f5424eb 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1546,13 +1546,14 @@ public IRubyObject ungetbyte(ThreadContext context, IRubyObject arg) { return context.nil; } - // MRI: strio_write + // MRI: strio_write_m @JRubyMethod(name = "write") public IRubyObject write(ThreadContext context, IRubyObject arg) { Ruby runtime = context.runtime; return RubyFixnum.newFixnum(runtime, stringIOWrite(context, runtime, arg)); } + // MRI: strio_write_m @JRubyMethod(name = "write") public IRubyObject write(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { Ruby runtime = context.runtime; @@ -1562,6 +1563,7 @@ public IRubyObject write(ThreadContext context, IRubyObject arg0, IRubyObject ar return RubyFixnum.newFixnum(runtime, len); } + // MRI: strio_write_m @JRubyMethod(name = "write") public IRubyObject write(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { Ruby runtime = context.runtime; @@ -1572,6 +1574,7 @@ public IRubyObject write(ThreadContext context, IRubyObject arg0, IRubyObject ar return RubyFixnum.newFixnum(runtime, len); } + // MRI: strio_write_m @JRubyMethod(name = "write", required = 1, rest = true) public IRubyObject write(ThreadContext context, IRubyObject[] args) { Arity.checkArgumentCount(context, args, 1, -1); From 247173b772da916bb06af432f1f40ec9d312030b Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 09:35:39 -0600 Subject: [PATCH 16/23] Add rb_enc_check that ignores empty str1 The version of this method in JRuby does not support passing an encoding for the first argument, which means downstream code in enc_compatible_latter will not reject cases StringIO is expected to reject. This patch hacks a version of rb_enc_check that works with current JRuby versions and exhibits the behavior required. A future JRuby update will improve this API and this hack should become a fallback at that point (for older JRuby versions). --- ext/java/org/jruby/ext/stringio/StringIO.java | 47 ++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index f5424eb..5fa78c0 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -55,6 +55,7 @@ import org.jruby.runtime.marshal.DataType; import org.jruby.util.ArraySupport; import org.jruby.util.ByteList; +import org.jruby.util.CodeRangeable; import org.jruby.util.StringSupport; import org.jruby.util.TypeConverter; import org.jruby.util.func.ObjectObjectIntFunction; @@ -1655,7 +1656,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) if (enc != encStr && enc != ASCIIEncoding.INSTANCE && enc != USASCIIEncoding.INSTANCE) { RubyString converted = EncodingUtils.strConvEnc(context, str, encStr, enc); if (converted == str && encStr != ASCIIEncoding.INSTANCE && encStr != USASCIIEncoding.INSTANCE) { /* conversion failed */ - ptr.string.checkEncoding(str); + rb_enc_check_hack(context, enc, str); } str = converted; } @@ -1689,6 +1690,50 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) return len; } + /* + This hack inlines the JRuby version of rb_enc_check (RubString.checkEncoding) because it only supports str1 being + a true string. This breaks an expectation in StringIO test "test_write_encoding_conversion". + + If the StringIO string is blank, logic downstream from "rb_enc_check" in "encoding_compatible_latter" will simply + choose the second encoding rather than fail as the test expects. + + See discussion in https://github.com/ruby/stringio/pull/116. + */ + private static void rb_enc_check_hack(ThreadContext context, Encoding enc, CodeRangeable str) { + CodeRangeable fakeCodeRangeable = new EncodingOnlyCodeRangeable(enc); + Encoding enc1 = StringSupport.areCompatible(fakeCodeRangeable, str); + if (enc1 == null) throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " + + enc1 + " and " + str.getByteList().getEncoding()); + } + + private static class EncodingOnlyCodeRangeable implements CodeRangeable { + private final Encoding enc; + + public EncodingOnlyCodeRangeable(Encoding enc) {this.enc = enc;} + @Override + public int getCodeRange() {return 0;} + @Override + public int scanForCodeRange() {return 0;} + @Override + public boolean isCodeRangeValid() {return false;} + @Override + public void setCodeRange(int codeRange) {} + @Override + public void clearCodeRange() {} + @Override + public void keepCodeRange() {} + @Override + public void modifyAndKeepCodeRange() {} + @Override + public Encoding checkEncoding(CodeRangeable other) {return null;} + @Override + public ByteList getByteList() {return new ByteList(0, enc);} + @Override + public void modify() {} + @Override + public void modify(int length) {} + } + @JRubyMethod public IRubyObject set_encoding(ThreadContext context, IRubyObject ext_enc) { Encoding enc; From 4ed7b0549c177967a42463914812af640e0f4ba1 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 09:45:16 -0600 Subject: [PATCH 17/23] Add JRuby to windows CI --- .github/workflows/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index d9722d4..5b3d117 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -9,7 +9,7 @@ jobs: with: engine: cruby min_version: 2.7 - versions: '["mswin", "mingw"]' + versions: '["mswin", "mingw", "jruby-head"]' build: needs: ruby-versions From dbe236ff9bac28995620c27d4770df8c8ad533d8 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 16:01:52 -0600 Subject: [PATCH 18/23] Init with default external encoding --- ext/java/org/jruby/ext/stringio/StringIO.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 5fa78c0..6b2c8e9 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -342,7 +342,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO if (!string.isNil()) { string = string.convertToString(); } else if (argc == 0) { - string = RubyString.newEmptyString(runtime, runtime.getDefaultInternalEncoding()); + string = RubyString.newEmptyString(runtime, runtime.getDefaultExternalEncoding()); } if (!string.isNil() && string.isFrozen()) { From 2f83c479d5520557d65f2e07f672d70d9aa66fd6 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 16:23:47 -0600 Subject: [PATCH 19/23] Minor tweaks to align getline behavior with C --- ext/java/org/jruby/ext/stringio/StringIO.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 6b2c8e9..a396c49 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -699,7 +699,7 @@ private static void bm_init_skip(int[] skip, byte[] pat, int patPtr, int m) { skip[c] = m; } while ((--m) > 0) { - skip[pat[patPtr++]] = m; + skip[Byte.toUnsignedInt(pat[patPtr++])] = m; } } @@ -716,7 +716,7 @@ private static int bm_search(byte[] little, int lstart, int llen, byte[] big, in j--; } if (j < 0) return k + 1; - i += skip[big[i + bstart] & 0xFF]; + i += skip[Byte.toUnsignedInt(big[i + bstart])]; } return -1; } @@ -762,9 +762,10 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { } private static final Getline.Callback GETLINE = (context, self, rs, limit, chomp, block) -> { - if (self.isEndOfString()) return context.nil; + self.checkReadable(); if (limit == 0) { + if (self.ptr.string == null) return context.nil; return RubyString.newEmptyString(context.runtime, self.getEncoding()); } @@ -913,7 +914,7 @@ private IRubyObject getline(ThreadContext context, final IRubyObject rs, int lim p = rsByteList.getBegin(); bm_init_skip(skip, rsBytes, p, n); if ((pos2 = bm_search(rsBytes, p, n, stringBytes, s, e - s, skip)) >= 0) { - e = s + pos2 + n; + e = s + pos2 + (chomp ? 0 : n); } } } From 7108323712ca62ba4ea647f3433d09ea897ba620 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 17:04:41 -0600 Subject: [PATCH 20/23] Handle all initialize forms and modes --- ext/java/org/jruby/ext/stringio/StringIO.java | 71 +++++++++++++------ 1 file changed, 51 insertions(+), 20 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index a396c49..645c047 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -262,7 +262,7 @@ protected StringIO(Ruby runtime, RubyClass klass) { super(runtime, klass); } - @JRubyMethod(visibility = PRIVATE) + @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context) { if (ptr == null) { ptr = new StringIOData(); @@ -270,11 +270,11 @@ public IRubyObject initialize(ThreadContext context) { // does not dispatch quite right and is not really necessary for us //Helpers.invokeSuper(context, this, metaClass, "initialize", IRubyObject.NULL_ARRAY, Block.NULL_BLOCK); - strioInit(context, 0, null, null); + strioInit(context, 0, null, null, null); return this; } - @JRubyMethod(visibility = PRIVATE) + @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context, IRubyObject arg0) { if (ptr == null) { ptr = new StringIOData(); @@ -282,11 +282,11 @@ public IRubyObject initialize(ThreadContext context, IRubyObject arg0) { // does not dispatch quite right and is not really necessary for us //Helpers.invokeSuper(context, this, metaClass, "initialize", IRubyObject.NULL_ARRAY, Block.NULL_BLOCK); - strioInit(context, 1, arg0, null); + strioInit(context, 1, arg0, null, null); return this; } - @JRubyMethod(visibility = PRIVATE) + @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { if (ptr == null) { ptr = new StringIOData(); @@ -294,15 +294,27 @@ public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObje // does not dispatch quite right and is not really necessary for us //Helpers.invokeSuper(context, this, metaClass, "initialize", IRubyObject.NULL_ARRAY, Block.NULL_BLOCK); - strioInit(context, 2, arg0, arg1); + strioInit(context, 2, arg0, arg1, null); + return this; + } + + @JRubyMethod(visibility = PRIVATE, keywords = true) + public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { + if (ptr == null) { + ptr = new StringIOData(); + } + + // does not dispatch quite right and is not really necessary for us + //Helpers.invokeSuper(context, this, metaClass, "initialize", IRubyObject.NULL_ARRAY, Block.NULL_BLOCK); + strioInit(context, 3, arg0, arg1, arg2); return this; } // MRI: strio_init - private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1) { + private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { Ruby runtime = context.runtime; IRubyObject string = context.nil; - IRubyObject mode = context.nil; + IRubyObject vmode = context.nil; StringIOData ptr = this.ptr; @@ -320,7 +332,15 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO string = arg0; maybeOptions = ArgsUtil.getOptionsArg(runtime, arg1); if (maybeOptions.isNil()) { - mode = arg1; + vmode = arg1; + } + break; + case 3: + string = arg0; + vmode = arg1; + maybeOptions = ArgsUtil.getOptionsArg(runtime, arg2); + if (maybeOptions.isNil()) { + throw context.runtime.newArgumentError(argc, 0, 2); } break; } @@ -328,17 +348,18 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO argc--; } Object vmodeAndVpermP = VMODE_VPERM_TL.get(); - EncodingUtils.vmode(vmodeAndVpermP, mode); + EncodingUtils.vmode(vmodeAndVpermP, vmode); IOEncodable.ConvConfig ioEncodable = new IOEncodable.ConvConfig(); + int[] fmode = FMODE_TL.get(); // switch to per-use oflags if it is ever used in the future - EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, maybeOptions, OFLAGS_UNUSED, FMODE_TL.get()); + EncodingUtils.extractModeEncoding(context, ioEncodable, vmodeAndVpermP, maybeOptions, OFLAGS_UNUSED, fmode); + ptr.flags = fmode[0]; + vmode = EncodingUtils.vmode(vmodeAndVpermP); // clear shared vmodeVperm clearVmodeVperm(vmodeAndVpermP); - ptr.flags = FMODE_TL.get()[0]; - if (!string.isNil()) { string = string.convertToString(); } else if (argc == 0) { @@ -350,7 +371,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO throw runtime.newErrnoEACCESError("read-only string"); } } else { - if (mode.isNil()) { + if (vmode.isNil()) { ptr.flags |= OpenFile.WRITABLE; } } @@ -1257,15 +1278,15 @@ public IRubyObject readlines(ThreadContext context, IRubyObject[] args) { } // MRI: strio_reopen - @JRubyMethod(name = "reopen") + @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context) { // reset the state - strioInit(context, 0, null, null); + strioInit(context, 0, null, null, null); return this; } // MRI: strio_reopen - @JRubyMethod(name = "reopen") + @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context, IRubyObject arg0) { checkFrozen(); @@ -1274,17 +1295,27 @@ public IRubyObject reopen(ThreadContext context, IRubyObject arg0) { } // reset the state - strioInit(context, 1, arg0, null); + strioInit(context, 1, arg0, null, null); return this; } // MRI: strio_reopen - @JRubyMethod(name = "reopen") + @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { checkFrozen(); // reset the state - strioInit(context, 2, arg0, arg1); + strioInit(context, 2, arg0, arg1, null); + return this; + } + + // MRI: strio_reopen + @JRubyMethod(name = "reopen", keywords = true) + public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { + checkFrozen(); + + // reset the state + strioInit(context, 3, arg0, arg1, arg2); return this; } From ee25a6da4f2ae22a277db4953377de2c70ac07b8 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 18:07:13 -0600 Subject: [PATCH 21/23] Guard access to ptr behind frozen check --- ext/java/org/jruby/ext/stringio/StringIO.java | 133 +++++++++--------- 1 file changed, 69 insertions(+), 64 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 645c047..6327a17 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -91,7 +91,14 @@ static class StringIOData { int flags; volatile Object owner; } - StringIOData ptr; + private StringIOData ptr; + + // MRI: get_strio, StringIO macro + private StringIOData getPtr() { + // equivalent to rb_io_taint_check without tainting + checkFrozen(); + return ptr; + } private static final String STRINGIO_VERSION = "3.1.4"; @@ -135,7 +142,7 @@ public static RubyClass createStringIOClass(final Ruby runtime) { // mri: get_enc public Encoding getEncoding() { - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); Encoding enc = ptr.enc; if (enc != null) { return enc; @@ -150,7 +157,7 @@ public Encoding getEncoding() { } public void setEncoding(Encoding enc) { - ptr.enc = enc; + getPtr().enc = enc; } @JRubyMethod(name = "new", rest = true, meta = true) @@ -250,7 +257,7 @@ private static IRubyObject yieldOrReturn(ThreadContext context, Block block, Str try { val = block.yield(context, strio); } finally { - strio.ptr.string = null; + strio.getPtr().string = null; strio.flags &= ~STRIO_READWRITE; } } @@ -264,7 +271,7 @@ protected StringIO(Ruby runtime, RubyClass klass) { @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context) { - if (ptr == null) { + if (getPtr() == null) { ptr = new StringIOData(); } @@ -276,7 +283,7 @@ public IRubyObject initialize(ThreadContext context) { @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context, IRubyObject arg0) { - if (ptr == null) { + if (getPtr() == null) { ptr = new StringIOData(); } @@ -288,7 +295,7 @@ public IRubyObject initialize(ThreadContext context, IRubyObject arg0) { @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { - if (ptr == null) { + if (getPtr() == null) { ptr = new StringIOData(); } @@ -300,7 +307,7 @@ public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObje @JRubyMethod(visibility = PRIVATE, keywords = true) public IRubyObject initialize(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { - if (ptr == null) { + if (getPtr() == null) { ptr = new StringIOData(); } @@ -316,7 +323,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO IRubyObject string = context.nil; IRubyObject vmode = context.nil; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -404,7 +411,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject other) { if (this == otherIO) return this; - ptr = otherIO.ptr; + ptr = otherIO.getPtr(); flags = flags & ~STRIO_READWRITE | otherIO.flags & STRIO_READWRITE; return this; @@ -412,7 +419,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject other) { @JRubyMethod public IRubyObject binmode(ThreadContext context) { - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); ptr.enc = EncodingUtils.ascii8bitEncoding(context.runtime); if (writable()) ptr.string.setEncoding(ptr.enc); @@ -471,7 +478,7 @@ public IRubyObject closed_p() { public IRubyObject close_read(ThreadContext context) { // ~ checkReadable() : checkInitialized(); - if ( (ptr.flags & OpenFile.READABLE) == 0 ) { + if ( (getPtr().flags & OpenFile.READABLE) == 0 ) { throw context.runtime.newIOError("not opened for reading"); } int flags = this.flags; @@ -491,7 +498,7 @@ public IRubyObject closed_read_p() { public IRubyObject close_write(ThreadContext context) { // ~ checkWritable() : checkInitialized(); - if ( (ptr.flags & OpenFile.WRITABLE) == 0 ) { + if ( (getPtr().flags & OpenFile.WRITABLE) == 0 ) { throw context.runtime.newIOError("not opened for writing"); } int flags = this.flags; @@ -608,7 +615,7 @@ public IRubyObject each_byte(ThreadContext context, Block block) { if (!block.isGiven()) return enumeratorize(runtime, this, "each_byte"); checkReadable(); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -642,11 +649,13 @@ public IRubyObject each_char(final ThreadContext context, final Block block) { @JRubyMethod(name = {"eof", "eof?"}) public IRubyObject eof(ThreadContext context) { checkReadable(); + StringIOData ptr = getPtr(); if (ptr.pos < ptr.string.size()) return context.fals; return context.tru; } private boolean isEndOfString() { + StringIOData ptr = getPtr(); return ptr.string == null || ptr.pos >= ptr.string.size(); } @@ -656,7 +665,7 @@ public IRubyObject getc(ThreadContext context) { if (isEndOfString()) return context.nil; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -679,7 +688,7 @@ public IRubyObject getbyte(ThreadContext context) { if (isEndOfString()) return context.nil; int c; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { c = ptr.string.getByteList().get(ptr.pos++) & 0xFF; @@ -693,7 +702,7 @@ public IRubyObject getbyte(ThreadContext context) { // MRI: strio_substr // must be called under lock private RubyString strioSubstr(Ruby runtime, int pos, int len, Encoding enc) { - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); final RubyString string = ptr.string; int rlen = string.size() - pos; @@ -744,25 +753,25 @@ private static int bm_search(byte[] little, int lstart, int llen, byte[] big, in @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context) { - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding()); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0) { - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0, arg1); } @JRubyMethod(name = "gets", writes = FrameField.LASTLINE) public IRubyObject gets(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; return Getline.getlineCall(context, GETLINE, this, getEncoding(), arg0, arg1, arg2); } @@ -786,7 +795,7 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { self.checkReadable(); if (limit == 0) { - if (self.ptr.string == null) return context.nil; + if (self.getPtr().string == null) return context.nil; return RubyString.newEmptyString(context.runtime, self.getEncoding()); } @@ -802,7 +811,7 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { private static final Getline.Callback GETLINE_YIELD = (context, self, rs, limit, chomp, block) -> { IRubyObject line; - StringIOData ptr = self.ptr; + StringIOData ptr = self.getPtr(); if (ptr.string == null || ptr.pos > ptr.string.size()) { return self; } @@ -825,7 +834,7 @@ public IRubyObject gets(ThreadContext context, IRubyObject[] args) { RubyArray ary = (RubyArray) context.runtime.newArray(); IRubyObject line; - StringIOData ptr = self.ptr; + StringIOData ptr = self.getPtr(); if (ptr.string == null || ptr.pos > ptr.string.size()) { return null; } @@ -857,7 +866,7 @@ private IRubyObject getline(ThreadContext context, final IRubyObject rs, int lim return context.nil; } - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); Encoding enc = getEncoding(); boolean locked = lock(context, ptr); @@ -961,19 +970,19 @@ private static int chompNewlineWidth(byte[] bytes, int s, int e) { @JRubyMethod(name = {"length", "size"}) public IRubyObject length(ThreadContext context) { checkInitialized(); - RubyString myString = ptr.string; + RubyString myString = getPtr().string; if (myString == null) return RubyFixnum.zero(context.runtime); return getRuntime().newFixnum(myString.size()); } @JRubyMethod(name = "lineno") public IRubyObject lineno(ThreadContext context) { - return context.runtime.newFixnum(ptr.lineno); + return context.runtime.newFixnum(getPtr().lineno); } @JRubyMethod(name = "lineno=", required = 1) public IRubyObject set_lineno(ThreadContext context, IRubyObject arg) { - ptr.lineno = RubyNumeric.fix2int(arg); + getPtr().lineno = RubyNumeric.fix2int(arg); return context.nil; } @@ -982,7 +991,7 @@ public IRubyObject set_lineno(ThreadContext context, IRubyObject arg) { public IRubyObject pos(ThreadContext context) { checkInitialized(); - return context.runtime.newFixnum(ptr.pos); + return context.runtime.newFixnum(getPtr().pos); } @JRubyMethod(name = "pos=", required = 1) @@ -995,13 +1004,13 @@ public IRubyObject set_pos(IRubyObject arg) { if (p > Integer.MAX_VALUE) throw getRuntime().newArgumentError("JRuby does not support StringIO larger than " + Integer.MAX_VALUE + " bytes"); - ptr.pos = (int)p; + getPtr().pos = (int)p; return arg; } private void strioExtend(ThreadContext context, int pos, int len) { - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1040,12 +1049,12 @@ public IRubyObject putc(ThreadContext context, IRubyObject ch) { checkModifiable(); if (ch instanceof RubyString) { - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; str = substrString((RubyString) ch, str, runtime); } else { byte c = RubyNumeric.num2chr(ch); - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; str = RubyString.newString(runtime, new byte[]{c}); } write(context, str); @@ -1077,7 +1086,7 @@ private IRubyObject readCommon(ThreadContext context, int argc, IRubyObject arg0 IRubyObject str = context.nil; boolean binary = false; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); int pos = ptr.pos; boolean locked = lock(context, ptr); @@ -1173,7 +1182,7 @@ public IRubyObject pread(ThreadContext context, IRubyObject arg0, IRubyObject ar @SuppressWarnings("fallthrough") private RubyString preadCommon(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { IRubyObject str = context.nil; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); Ruby runtime = context.runtime; int offset; final RubyString string; @@ -1288,7 +1297,7 @@ public IRubyObject reopen(ThreadContext context) { // MRI: strio_reopen @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context, IRubyObject arg0) { - checkFrozen(); + checkModifiable(); if (!(arg0 instanceof RubyString)) { return initialize_copy(context, arg0); @@ -1302,7 +1311,7 @@ public IRubyObject reopen(ThreadContext context, IRubyObject arg0) { // MRI: strio_reopen @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject arg1) { - checkFrozen(); + checkModifiable(); // reset the state strioInit(context, 2, arg0, arg1, null); @@ -1312,7 +1321,7 @@ public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject a // MRI: strio_reopen @JRubyMethod(name = "reopen", keywords = true) public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) { - checkFrozen(); + checkModifiable(); // reset the state strioInit(context, 3, arg0, arg1, arg2); @@ -1323,7 +1332,7 @@ public IRubyObject reopen(ThreadContext context, IRubyObject arg0, IRubyObject a public IRubyObject rewind(ThreadContext context) { checkInitialized(); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1347,7 +1356,7 @@ public IRubyObject seek(ThreadContext context, IRubyObject arg0, IRubyObject arg } private RubyFixnum seekCommon(ThreadContext context, int argc, IRubyObject arg0, IRubyObject arg1) { - checkFrozen(); + checkModifiable(); Ruby runtime = context.runtime; @@ -1360,7 +1369,7 @@ private RubyFixnum seekCommon(ThreadContext context, int argc, IRubyObject arg0, checkOpen(); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1390,7 +1399,7 @@ private RubyFixnum seekCommon(ThreadContext context, int argc, IRubyObject arg0, @JRubyMethod(name = "string=", required = 1) public IRubyObject set_string(ThreadContext context, IRubyObject arg) { checkFrozen(); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1407,7 +1416,7 @@ public IRubyObject set_string(ThreadContext context, IRubyObject arg) { @JRubyMethod(name = "string") public IRubyObject string(ThreadContext context) { - RubyString string = ptr.string; + RubyString string = getPtr().string; if (string == null) return context.nil; return string; @@ -1424,7 +1433,7 @@ public IRubyObject truncate(ThreadContext context, IRubyObject len) { checkWritable(); int l = RubyFixnum.fix2int(len); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); RubyString string = ptr.string; boolean locked = lock(context, ptr); @@ -1456,7 +1465,7 @@ public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { checkModifiable(); checkReadable(); - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; if (arg.isNil()) return arg; if (arg instanceof RubyInteger) { @@ -1483,7 +1492,7 @@ public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { } private void ungetbyteCommon(ThreadContext context, int c) { - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1514,7 +1523,7 @@ private void ungetbyteCommon(ThreadContext context, RubyString ungetBytes) { private void ungetbyteCommon(ThreadContext context, byte[] ungetBytes, int cp, int cl) { if (cl == 0) return; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1568,7 +1577,7 @@ public IRubyObject ungetbyte(ThreadContext context, IRubyObject arg) { if (arg.isNil()) return arg; checkModifiable(); - if (ptr.string == null) return context.nil; + if (getPtr().string == null) return context.nil; if (arg instanceof RubyInteger) { ungetbyteCommon(context, ((RubyInteger) ((RubyInteger) arg).op_mod(context, 256)).getIntValue()); @@ -1678,7 +1687,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) RubyString str = arg.asString(); int len, olen; - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1783,7 +1792,7 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject ext_enc) { } } - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -1819,12 +1828,14 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj @JRubyMethod public IRubyObject set_encoding_by_bom(ThreadContext context) { - if (setEncodingByBOM(context) == null) return context.nil; + StringIOData ptr = getPtr(); + + if (setEncodingByBOM(context, ptr) == null) return context.nil; return context.runtime.getEncodingService().convertEncodingToRubyEncoding(ptr.enc); } - private Encoding setEncodingByBOM(ThreadContext context) { + private Encoding setEncodingByBOM(ThreadContext context, StringIOData ptr) { Encoding enc = detectBOM(context, ptr.string, (ctx, enc2, bomlen) -> { ptr.pos = bomlen; if (writable()) { @@ -1900,7 +1911,7 @@ public IRubyObject each_codepoint(ThreadContext context, Block block) { checkReadable(); - StringIOData ptr = this.ptr; + StringIOData ptr = this.getPtr(); boolean locked = lock(context, ptr); try { @@ -2148,25 +2159,19 @@ public IRubyObject puts(ThreadContext context, IRubyObject[] args) { return GenericWritable.puts(context, this, args); } - /* rb: check_modifiable */ - public void checkFrozen() { - super.checkFrozen(); - checkInitialized(); - } - private boolean readable() { return (flags & STRIO_READABLE) != 0 - && (ptr.flags & OpenFile.READABLE) != 0; + && (getPtr().flags & OpenFile.READABLE) != 0; } private boolean writable() { return (flags & STRIO_WRITABLE) != 0 - && (ptr.flags & OpenFile.WRITABLE) != 0; + && (getPtr().flags & OpenFile.WRITABLE) != 0; } private boolean closed() { return !((flags & STRIO_READWRITE) != 0 - && (ptr.flags & OpenFile.READWRITE) != 0); + && (getPtr().flags & OpenFile.READWRITE) != 0); } /* rb: readable */ @@ -2189,11 +2194,11 @@ private void checkWritable() { private void checkModifiable() { checkFrozen(); - if (ptr.string.isFrozen()) throw getRuntime().newIOError("not modifiable string"); + if (getPtr().string.isFrozen()) throw getRuntime().newIOError("not modifiable string"); } private void checkInitialized() { - if (ptr == null) { + if (getPtr() == null) { throw getRuntime().newIOError("uninitialized stream"); } } From ac375b66784b242e8a3abb6a43a2092aba5ffb7c Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 18:14:53 -0600 Subject: [PATCH 22/23] Modify string even if it didn't expand String data might be shared, so modify to ensure we have a writable version. --- ext/java/org/jruby/ext/stringio/StringIO.java | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 6327a17..2ac0ebb 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1551,6 +1551,7 @@ private void ungetbyteCommon(ThreadContext context, byte[] ungetBytes, int cp, i strBytelist = str.getByteList(); strBytelist.setRealSize(len + rest); } else { + str.modify(); strBytelist = str.getByteList(); } strBytes = strBytelist.unsafeBytes(); From 88d5e0c33494d00040f953b43ab3925b9f7b2669 Mon Sep 17 00:00:00 2001 From: Charles Oliver Nutter Date: Wed, 19 Feb 2025 18:44:26 -0600 Subject: [PATCH 23/23] Use newer rb_enc_check from JRuby if available This method is added to JRuby in 9.4.13.0 and avoids the overhead of creating a fake CodeRangeable. See jruby/jruby#8643 --- ext/java/org/jruby/ext/stringio/StringIO.java | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/ext/java/org/jruby/ext/stringio/StringIO.java b/ext/java/org/jruby/ext/stringio/StringIO.java index 2ac0ebb..4c88a49 100644 --- a/ext/java/org/jruby/ext/stringio/StringIO.java +++ b/ext/java/org/jruby/ext/stringio/StringIO.java @@ -1633,19 +1633,22 @@ public IRubyObject write(ThreadContext context, IRubyObject[] args) { private static final MethodHandle CAT_WITH_CODE_RANGE; private static final MethodHandle MODIFY_AND_CLEAR_CODE_RANGE; private static final MethodHandle SUBSTR_ENC; + private static final MethodHandle CHECK_ENCODING; static { - MethodHandle cat, modify, substr; + MethodHandle cat, modify, substr, checkEncoding; MethodHandles.Lookup lookup = MethodHandles.lookup(); try { cat = lookup.findVirtual(RubyString.class, "catWithCodeRange", MethodType.methodType(RubyString.class, RubyString.class)); modify = lookup.findVirtual(RubyString.class, "modifyAndClearCodeRange", MethodType.methodType(void.class)); substr = lookup.findVirtual(RubyString.class, "substrEnc", MethodType.methodType(IRubyObject.class, Ruby.class, int.class, int.class)); + checkEncoding = lookup.findStatic(RubyEncoding.class, "checkEncoding", MethodType.methodType(void.class, ThreadContext.class, Encoding.class, CodeRangeable.class)); } catch (NoSuchMethodException | IllegalAccessException ex) { try { cat = lookup.findVirtual(RubyString.class, "cat19", MethodType.methodType(RubyString.class, RubyString.class)); modify = lookup.findVirtual(RubyString.class, "modify19", MethodType.methodType(void.class)); substr = lookup.findVirtual(RubyString.class, "substr19", MethodType.methodType(IRubyObject.class, Ruby.class, int.class, int.class)); + checkEncoding = lookup.findStatic(StringIO.class, "checkEncoding", MethodType.methodType(void.class, ThreadContext.class, Encoding.class, CodeRangeable.class)); } catch (NoSuchMethodException | IllegalAccessException ex2) { throw new ExceptionInInitializerError(ex2); } @@ -1654,6 +1657,7 @@ public IRubyObject write(ThreadContext context, IRubyObject[] args) { CAT_WITH_CODE_RANGE = cat; MODIFY_AND_CLEAR_CODE_RANGE = modify; SUBSTR_ENC = substr; + CHECK_ENCODING = checkEncoding; } private static void catString(RubyString myString, RubyString str) { @@ -1698,7 +1702,7 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) if (enc != encStr && enc != ASCIIEncoding.INSTANCE && enc != USASCIIEncoding.INSTANCE) { RubyString converted = EncodingUtils.strConvEnc(context, str, encStr, enc); if (converted == str && encStr != ASCIIEncoding.INSTANCE && encStr != USASCIIEncoding.INSTANCE) { /* conversion failed */ - rb_enc_check_hack(context, enc, str); + rb_enc_check(context, enc, str); } str = converted; } @@ -1732,16 +1736,20 @@ private long stringIOWrite(ThreadContext context, Ruby runtime, IRubyObject arg) return len; } - /* - This hack inlines the JRuby version of rb_enc_check (RubString.checkEncoding) because it only supports str1 being - a true string. This breaks an expectation in StringIO test "test_write_encoding_conversion". - - If the StringIO string is blank, logic downstream from "rb_enc_check" in "encoding_compatible_latter" will simply - choose the second encoding rather than fail as the test expects. + private static void rb_enc_check(ThreadContext context, Encoding enc, CodeRangeable str) { + try { + CHECK_ENCODING.invokeExact(context, enc, str); + } catch (Throwable t) { + Helpers.throwException(t); + } + } - See discussion in https://github.com/ruby/stringio/pull/116. + /** + * Fallback version of rb_enc_check logic for JRuby prior to 9.4.13.0 that did not have a version accepting enc. + * + * See discussion in https://github.com/ruby/stringio/pull/116. */ - private static void rb_enc_check_hack(ThreadContext context, Encoding enc, CodeRangeable str) { + private static void checkEncoding(ThreadContext context, Encoding enc, CodeRangeable str) { CodeRangeable fakeCodeRangeable = new EncodingOnlyCodeRangeable(enc); Encoding enc1 = StringSupport.areCompatible(fakeCodeRangeable, str); if (enc1 == null) throw context.runtime.newEncodingCompatibilityError("incompatible character encodings: " +