[DOC] Tweaks for String#byterindex (#13485)

2025-08-15 13:39:04 +02:00 · 2025-06-25 09:51:45 -05:00 · 2025-06-25 09:51:45 -05:00 · ec071c849f
commit ec071c849f
parent 077dbb8d42
2 changed files with 68 additions and 42 deletions
--- a/string.c
+++ b/string.c
@ -4954,7 +4954,7 @@ str_ensure_byte_pos(VALUE str, long pos)
 *
 *    s = 'foo'          # => "foo"
 *    s.size             # => 3 # Three 1-byte characters.
-      s.bytesize         # => 3 # Three bytes.
+ *    s.bytesize         # => 3 # Three bytes.
 *    s.byteindex('f')   # => 0
 *    s.byteindex('o')   # => 1
 *    s.byteindex('oo')  # => 1
@ -5260,65 +5260,90 @@ rb_str_byterindex(VALUE str, VALUE sub, long pos)
    return str_rindex(str, sub, s, enc);
 }
 /*
 *  call-seq:
- *    byterindex(substring, offset = self.bytesize) -> integer or nil
+ *    byterindex(object, offset = self.bytesize) -> integer or nil
 *    byterindex(regexp, offset = self.bytesize) -> integer or nil
 *
- *  Returns the Integer byte-based index of the _last_ occurrence of the given +substring+,
+ *  Returns the 0-based integer index of a substring of +self+
- *  or +nil+ if none found:
+ *  that is the _last_ match for the given +object+ (a string or Regexp) and +offset+,
 *  or +nil+ if there is no such substring;
 *  the returned index is the count of _bytes_ (not characters).
 *
- *    'foo'.byterindex('f') # => 0
+ *  When +object+ is a string,
- *    'foo'.byterindex('o') # => 2
+ *  returns the index of the _last_ found substring equal to +object+:
 *    'foo'.byterindex('oo') # => 1
 *    'foo'.byterindex('ooo') # => nil
 *
- *  Returns the Integer byte-based index of the _last_ match for the given Regexp +regexp+,
+ *    s = 'foo'           # => "foo"
- *  or +nil+ if none found:
+ *    s.size              # => 3 # Three 1-byte characters.
 *    s.bytesize          # => 3 # Three bytes.
 *    s.byterindex('f')   # => 0
      s.byterindex('o')   # => 2
      s.byterindex('oo')  # => 1
      s.byterindex('ooo') # => nil
 *
- *    'foo'.byterindex(/f/) # => 0
+ *  When +object+ is a Regexp,
- *    'foo'.byterindex(/o/) # => 2
+ *  returns the index of the last found substring matching +object+;
- *    'foo'.byterindex(/oo/) # => 1
+ *  updates {Regexp-related global variables}[rdoc-ref:Regexp@Global+Variables]:
 *    'foo'.byterindex(/ooo/) # => nil
 *
- *  The _last_ match means starting at the possible last position, not
+ *    s = 'foo'
- *  the last of longest matches.
+ *    s.byterindex(/f/)   # => 0
 *    $~                  # => #<MatchData "f">
 *    s.byterindex(/o/)   # => 2
 *    s.byterindex(/oo/)  # => 1
 *    s.byterindex(/ooo/) # => nil
 *    $~                  # => nil
 *
- *    'foo'.byterindex(/o+/) # => 2
+ *  The last match means starting at the possible last position,
- *    $~ #=> #<MatchData "o">
+ *  not the last of the longest matches:
 *
- *  To get the last longest match, needs to combine with negative
+ *    s = 'foo'
- *  lookbehind.
+ *    s.byterindex(/o+/) # => 2
 *    $~                 #=> #<MatchData "o">
 *
- *    'foo'.byterindex(/(?<!o)o+/) # => 1
+ *  To get the last longest match, use a negative lookbehind:
 *    $~ #=> #<MatchData "oo">
 *
- *  Or String#byteindex with negative lookforward.
+ *    s = 'foo'
 *    s.byterindex(/(?<!o)o+/) # => 1
 *    $~                       # => #<MatchData "oo">
 *
- *    'foo'.byteindex(/o+(?!.*o)/) # => 1
+ *  Or use method #byteindex with negative lookahead:
 *    $~ #=> #<MatchData "oo">
 *
- *  Integer argument +offset+, if given and non-negative, specifies the maximum starting byte-based position in the
+ *    s = 'foo'
- *  string to _end_ the search:
+ *    s.byteindex(/o+(?!.*o)/) # => 1
 *    $~                       #=> #<MatchData "oo">
 *
- *    'foo'.byterindex('o', 0) # => nil
+ *  \Integer argument +offset+, if given, specifies the 0-based index
- *    'foo'.byterindex('o', 1) # => 1
+ *  of the byte where searching is to end.
 *    'foo'.byterindex('o', 2) # => 2
 *    'foo'.byterindex('o', 3) # => 2
 *
- *  If +offset+ is a negative Integer, the maximum starting position in the
+ *  When +offset+ is non-negative,
- *  string to _end_ the search is the sum of the string's length and +offset+:
+ *  searching ends at byte position +offset+:
 *
- *    'foo'.byterindex('o', -1) # => 2
+ *    s = 'foo'
- *    'foo'.byterindex('o', -2) # => 1
+ *    s.byterindex('o', 0) # => nil
- *    'foo'.byterindex('o', -3) # => nil
+ *    s.byterindex('o', 1) # => 1
- *    'foo'.byterindex('o', -4) # => nil
+ *    s.byterindex('o', 2) # => 2
 *    s.byterindex('o', 3) # => 2
 *
- *  If +offset+ does not land on character (codepoint) boundary, +IndexError+ is
+ *  When +offset+ is negative, counts backward from the end of +self+:
 *  raised.
 *
- *  Related: String#byteindex.
+ *    s = 'foo'
 *    s.byterindex('o', -1) # => 2
 *    s.byterindex('o', -2) # => 1
 *    s.byterindex('o', -3) # => nil
 *
 *  Raises IndexError if the byte at +offset+ is not the first byte of a character:
 *
 *    s = "\uFFFF\uFFFF"        # => "\uFFFF\uFFFF"
 *    s.size                    # => 2 # Two 3-byte characters.
 *    s.bytesize                # => 6 # Six bytes.
 *    s.byterindex("\uFFFF")    # => 3
 *    s.byterindex("\uFFFF", 1) # Raises IndexError
 *    s.byterindex("\uFFFF", 2) # Raises IndexError
 *    s.byterindex("\uFFFF", 3) # => 3
 *    s.byterindex("\uFFFF", 4) # Raises IndexError
 *    s.byterindex("\uFFFF", 5) # Raises IndexError
 *    s.byterindex("\uFFFF", 6) # => nil
 *
 *  Related: see {Querying}[rdoc-ref:String@Querying].
 */
 static VALUE
--- a/string.rb
+++ b/string.rb
@ -343,6 +343,7 @@
 # - #=~: Returns the index of the first substring that matches a given
 #   Regexp or other object; returns +nil+ if no match is found.
 # - #byteindex: Returns the byte index of the first occurrence of a given substring.
 # - #byterindex: Returns the byte index of the last occurrence of a given substring.
 # - #index: Returns the index of the _first_ occurrence of a given substring;
 #   returns +nil+ if none found.
 # - #rindex: Returns the index of the _last_ occurrence of a given substring;