[DOC] Enhanced RDoc for some encoding methods (#5598)

In String, treats: #b #scrub #scrub! #unicode_normalize #unicode_normalize! #encode #encode! Also adds a note to IO.new (suggested by @jeremyevans).
Merged-By: BurdetteLamar <BurdetteLamar@Yahoo.com>
2025-08-15 13:39:04 +02:00 · 2022-02-25 13:12:59 -06:00 · 2022-02-25 13:12:59 -06:00 · 26ffda2fd2 · 2022-02-26 04:13:23 +09:00
commit 26ffda2fd2
parent 189ac52bba
3 changed files with 131 additions and 97 deletions
--- a/transcode.c
+++ b/transcode.c
@ -2801,16 +2801,11 @@ str_encode_associate(VALUE str, int encidx)

 /*
 *  call-seq:
- *     str.encode!(encoding, **options)   -> str
- *     str.encode!(dst_encoding, src_encoding, **options)   -> str
+ *    encode!(dst_encoding = Encoding.default_internal, **enc_opts) -> self
+ *    encode!(dst_encoding, src_encoding, **enc_opts)   -> self
+ *
+ *  Like #encode, but applies encoding changes to +self+; returns +self+.
 *
- *  The first form transcodes the contents of <i>str</i> from
- *  str.encoding to +encoding+.
- *  The second form transcodes the contents of <i>str</i> from
- *  src_encoding to dst_encoding.
- *  The +options+ keyword arguments give details for conversion. See String#encode
- *  for details.
- *  Returns the string even if no changes were made.
 */

 static VALUE
@ -2837,58 +2832,50 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx);

 /*
 *  call-seq:
- *     str.encode(encoding, **options)   -> str
- *     str.encode(dst_encoding, src_encoding, **options)   -> str
- *     str.encode(**options)   -> str
+ *    encode(dst_encoding = Encoding.default_internal, **enc_opts) -> string
+ *    encode(dst_encoding, src_encoding, **enc_opts)   -> string
 *
- *  The first form returns a copy of +str+ transcoded
- *  to encoding +encoding+.
- *  The second form returns a copy of +str+ transcoded
- *  from src_encoding to dst_encoding.
- *  The last form returns a copy of +str+ transcoded to
- *  <tt>Encoding.default_internal</tt>.
+ *  Returns a copy of +self+ transcoded as determined by +dst_encoding+.
+ *  By default, raises an exception if +self+
+ *  contains an invalid byte or a character not defined in +dst_encoding+;
+ *  that behavior may be modified by encoding options; see below.
 *
- *  By default, the first and second form raise
- *  Encoding::UndefinedConversionError for characters that are
- *  undefined in the destination encoding, and
- *  Encoding::InvalidByteSequenceError for invalid byte sequences
- *  in the source encoding. The last form by default does not raise
- *  exceptions but uses replacement strings.
+ *  With no arguments:
 *
- *  The +options+ keyword arguments give details for conversion.
- *  The arguments are:
+ *  - Uses the same encoding if <tt>Encoding.default_internal</tt> is +nil+
+ *    (the default):
+ *
+ *      Encoding.default_internal # => nil
+ *      s = "Ruby\x99".force_encoding('Windows-1252')
+ *      s.encoding                # => #<Encoding:Windows-1252>
+ *      s.bytes                   # => [82, 117, 98, 121, 153]
+ *      t = s.encode              # => "Ruby\x99"
+ *      t.encoding                # => #<Encoding:Windows-1252>
+ *      t.bytes                   # => [82, 117, 98, 121, 226, 132, 162]
+ *
+ *  - Otherwise, uses the encoding <tt>Encoding.default_internal</tt>:
+ *
+ *      Encoding.default_internal = 'UTF-8'
+ *      t = s.encode              # => "Ruby™"
+ *      t.encoding                # => #<Encoding:UTF-8>
+ *
+ *  With only argument +dst_encoding+ given, uses that encoding:
+ *
+ *    s = "Ruby\x99".force_encoding('Windows-1252')
+ *    s.encoding            # => #<Encoding:Windows-1252>
+ *    t = s.encode('UTF-8') # => "Ruby™"
+ *    t.encoding            # => #<Encoding:UTF-8>
+ *
+ *  With arguments +dst_encoding+ and +src_encoding+ given,
+ *  interprets +self+ using +src_encoding+, encodes the new string using +dst_encoding+:
+ *
+ *    s = "Ruby\x99"
+ *    t = s.encode('UTF-8', 'Windows-1252') # => "Ruby™"
+ *    t.encoding                            # => #<Encoding:UTF-8>
+ *
+ *  Optional keyword arguments +enc_opts+ specify encoding options;
+ *  see {Encoding Options}[rdoc-ref:encoding.rdoc@Encoding+Options].
 *
- *  :invalid ::
- *    If the value is +:replace+, #encode replaces invalid byte sequences in
- *    +str+ with the replacement character.  The default is to raise the
- *    Encoding::InvalidByteSequenceError exception
- *  :undef ::
- *    If the value is +:replace+, #encode replaces characters which are
- *    undefined in the destination encoding with the replacement character.
- *    The default is to raise the Encoding::UndefinedConversionError.
- *  :replace ::
- *    Sets the replacement string to the given value. The default replacement
- *    string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
- *  :fallback ::
- *    Sets the replacement string by the given object for undefined
- *    character.  The object should be a Hash, a Proc, a Method, or an
- *    object which has [] method.
- *    Its key is an undefined character encoded in the source encoding
- *    of current transcoder. Its value can be any encoding until it
- *    can be converted into the destination encoding of the transcoder.
- *  :xml ::
- *    The value must be +:text+ or +:attr+.
- *    If the value is +:text+ #encode replaces undefined characters with their
- *    (upper-case hexadecimal) numeric character references. '&', '<', and '>'
- *    are converted to "&amp;", "&lt;", and "&gt;", respectively.
- *    If the value is +:attr+, #encode also quotes the replacement result
- *    (using '"'), and replaces '"' with "&quot;".
- *  :cr_newline ::
- *    Replaces LF ("\n") with CR ("\r") if value is true.
- *  :crlf_newline ::
- *    Replaces LF ("\n") with CRLF ("\r\n") if value is true.
- *  :universal_newline ::
- *    Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true.
 */

 static VALUE