diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 132c3747fc..80cfa8ab76 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -88,6 +88,7 @@ Gem::Specification.new do |spec| "src/enc/pm_big5.c", "src/enc/pm_cp51932.c", "src/enc/pm_cp949.c", + "src/enc/pm_cp950.c", "src/enc/pm_euc_jp.c", "src/enc/pm_gbk.c", "src/enc/pm_shift_jis.c", diff --git a/prism/enc/pm_cp950.c b/prism/enc/pm_cp950.c new file mode 100644 index 0000000000..1b7a0995ac --- /dev/null +++ b/prism/enc/pm_cp950.c @@ -0,0 +1,57 @@ +#include "prism/enc/pm_encoding.h" + +static size_t +pm_encoding_cp950_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters + if (*b < 0x80) { + return 1; + } + + // These are the double byte characters + if ( + (n > 1) && + ((b[0] >= 0x81 && b[0] <= 0xFE) && + ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) + ) { + return 2; + } + + return 0; +} + +static size_t +pm_encoding_cp950_alpha_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp950_char_width(b, n) == 1) { + return pm_encoding_ascii_alpha_char(b, n); + } else { + return 0; + } +} + +static size_t +pm_encoding_cp950_alnum_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp950_char_width(b, n) == 1) { + return pm_encoding_ascii_alnum_char(b, n); + } else { + return 0; + } +} + +static bool +pm_encoding_cp950_isupper_char(const uint8_t *b, ptrdiff_t n) { + if (pm_encoding_cp950_char_width(b, n) == 1) { + return pm_encoding_ascii_isupper_char(b, n); + } else { + return 0; + } +} + +/** cp950 encoding */ +pm_encoding_t pm_encoding_cp950 = { + .name = "cp950", + .char_width = pm_encoding_cp950_char_width, + .alnum_char = pm_encoding_cp950_alnum_char, + .alpha_char = pm_encoding_cp950_alpha_char, + .isupper_char = pm_encoding_cp950_isupper_char, + .multibyte = true +}; diff --git a/prism/enc/pm_encoding.h b/prism/enc/pm_encoding.h index 698abc8be9..5b79902389 100644 --- a/prism/enc/pm_encoding.h +++ b/prism/enc/pm_encoding.h @@ -165,6 +165,7 @@ extern pm_encoding_t pm_encoding_cp850; extern pm_encoding_t pm_encoding_cp852; extern pm_encoding_t pm_encoding_cp855; extern pm_encoding_t pm_encoding_cp949; +extern pm_encoding_t pm_encoding_cp950; extern pm_encoding_t pm_encoding_euc_jp; extern pm_encoding_t pm_encoding_gb1988; extern pm_encoding_t pm_encoding_gbk; diff --git a/prism/prism.c b/prism/prism.c index 881ea89a29..960b652db8 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -6233,6 +6233,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star ENCODING2("CP932", "csWindows31J", pm_encoding_windows_31j); ENCODING1("CP936", pm_encoding_gbk); ENCODING1("CP949", pm_encoding_cp949); + ENCODING1("CP950", pm_encoding_cp950); ENCODING1("CP1250", pm_encoding_windows_1250); ENCODING1("CP1251", pm_encoding_windows_1251); ENCODING1("CP1252", pm_encoding_windows_1252); diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index 463cb95121..28992fcf1b 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -69,6 +69,7 @@ module Prism Encoding::Big5_HKSCS => 0x00...0x10000, Encoding::Big5_UAO => 0x00...0x10000, Encoding::CP949 => 0x00...0x10000, + Encoding::CP950 => 0x00...0x10000, Encoding::CP51932 => 0x00...0x10000, Encoding::GBK => 0x00...0x10000, Encoding::Shift_JIS => 0x00...0x10000,