mirror of
https://github.com/ruby/ruby.git
synced 2025-09-15 16:44:01 +02:00
Rename the big5-hkscs stuff to something more generic and add UAO sharing common code.
Merge the Big5 extensions into pm_big5.c
This commit is contained in:
parent
a4003bb8dc
commit
e16ff17374
6 changed files with 66 additions and 55 deletions
|
@ -85,7 +85,6 @@ Gem::Specification.new do |spec|
|
||||||
"lib/prism/visitor.rb",
|
"lib/prism/visitor.rb",
|
||||||
"src/diagnostic.c",
|
"src/diagnostic.c",
|
||||||
"src/enc/pm_big5.c",
|
"src/enc/pm_big5.c",
|
||||||
"src/enc/pm_big5_hkscs.c",
|
|
||||||
"src/enc/pm_cp51932.c",
|
"src/enc/pm_cp51932.c",
|
||||||
"src/enc/pm_euc_jp.c",
|
"src/enc/pm_euc_jp.c",
|
||||||
"src/enc/pm_gbk.c",
|
"src/enc/pm_gbk.c",
|
||||||
|
|
|
@ -15,6 +15,22 @@ pm_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
pm_encoding_big5_star_char_width(const uint8_t *b, ptrdiff_t n) {
|
||||||
|
// These are the single byte characters.
|
||||||
|
if (*b < 0x80) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// These are the double byte characters.
|
||||||
|
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
|
||||||
|
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
if (pm_encoding_big5_char_width(b, n) == 1) {
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
||||||
|
@ -24,6 +40,15 @@ pm_encoding_big5_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
pm_encoding_big5_star_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
|
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
||||||
|
return pm_encoding_ascii_alpha_char(b, n);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
if (pm_encoding_big5_char_width(b, n) == 1) {
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
||||||
|
@ -33,6 +58,15 @@ pm_encoding_big5_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
pm_encoding_big5_star_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
|
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
||||||
|
return pm_encoding_ascii_alnum_char(b, n);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
if (pm_encoding_big5_char_width(b, n) == 1) {
|
if (pm_encoding_big5_char_width(b, n) == 1) {
|
||||||
|
@ -42,6 +76,15 @@ pm_encoding_big5_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
pm_encoding_big5_star_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
||||||
|
if (pm_encoding_big5_star_char_width(b, n) == 1) {
|
||||||
|
return pm_encoding_ascii_isupper_char(b, n);
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Big5 encoding */
|
/** Big5 encoding */
|
||||||
pm_encoding_t pm_encoding_big5 = {
|
pm_encoding_t pm_encoding_big5 = {
|
||||||
.name = "big5",
|
.name = "big5",
|
||||||
|
@ -51,3 +94,23 @@ pm_encoding_t pm_encoding_big5 = {
|
||||||
.isupper_char = pm_encoding_big5_isupper_char,
|
.isupper_char = pm_encoding_big5_isupper_char,
|
||||||
.multibyte = true
|
.multibyte = true
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Big5-HKSCS encoding */
|
||||||
|
pm_encoding_t pm_encoding_big5_hkscs = {
|
||||||
|
.name = "big5-hkscs",
|
||||||
|
.char_width = pm_encoding_big5_star_char_width,
|
||||||
|
.alnum_char = pm_encoding_big5_star_alnum_char,
|
||||||
|
.alpha_char = pm_encoding_big5_star_alpha_char,
|
||||||
|
.isupper_char = pm_encoding_big5_star_isupper_char,
|
||||||
|
.multibyte = true
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Big5-UAO encoding */
|
||||||
|
pm_encoding_t pm_encoding_big5_uao = {
|
||||||
|
.name = "big5-uao",
|
||||||
|
.char_width = pm_encoding_big5_star_char_width,
|
||||||
|
.alnum_char = pm_encoding_big5_star_alnum_char,
|
||||||
|
.alpha_char = pm_encoding_big5_star_alpha_char,
|
||||||
|
.isupper_char = pm_encoding_big5_star_isupper_char,
|
||||||
|
.multibyte = true
|
||||||
|
};
|
||||||
|
|
|
@ -1,54 +0,0 @@
|
||||||
#include "prism/enc/pm_encoding.h"
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
pm_encoding_big5_hkscs_char_width(const uint8_t *b, ptrdiff_t n) {
|
|
||||||
// These are the single byte characters.
|
|
||||||
if (*b < 0x80) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// These are the double byte characters.
|
|
||||||
if ((n > 1) && (b[0] >= 0x87 && b[0] <= 0xFE) &&
|
|
||||||
((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) {
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
pm_encoding_big5_hkscs_alpha_char(const uint8_t *b, ptrdiff_t n) {
|
|
||||||
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
|
|
||||||
return pm_encoding_ascii_alpha_char(b, n);
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
pm_encoding_big5_hkscs_alnum_char(const uint8_t *b, ptrdiff_t n) {
|
|
||||||
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
|
|
||||||
return pm_encoding_ascii_alnum_char(b, n);
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool
|
|
||||||
pm_encoding_big5_hkscs_isupper_char(const uint8_t *b, ptrdiff_t n) {
|
|
||||||
if (pm_encoding_big5_hkscs_char_width(b, n) == 1) {
|
|
||||||
return pm_encoding_ascii_isupper_char(b, n);
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Big5 encoding */
|
|
||||||
pm_encoding_t pm_encoding_big5_hkscs = {
|
|
||||||
.name = "big5-hkscs",
|
|
||||||
.char_width = pm_encoding_big5_hkscs_char_width,
|
|
||||||
.alnum_char = pm_encoding_big5_hkscs_alnum_char,
|
|
||||||
.alpha_char = pm_encoding_big5_hkscs_alpha_char,
|
|
||||||
.isupper_char = pm_encoding_big5_hkscs_isupper_char,
|
|
||||||
.multibyte = true
|
|
||||||
};
|
|
|
@ -159,6 +159,7 @@ extern pm_encoding_t pm_encoding_ascii;
|
||||||
extern pm_encoding_t pm_encoding_ascii_8bit;
|
extern pm_encoding_t pm_encoding_ascii_8bit;
|
||||||
extern pm_encoding_t pm_encoding_big5;
|
extern pm_encoding_t pm_encoding_big5;
|
||||||
extern pm_encoding_t pm_encoding_big5_hkscs;
|
extern pm_encoding_t pm_encoding_big5_hkscs;
|
||||||
|
extern pm_encoding_t pm_encoding_big5_uao;
|
||||||
extern pm_encoding_t pm_encoding_cp51932;
|
extern pm_encoding_t pm_encoding_cp51932;
|
||||||
extern pm_encoding_t pm_encoding_cp850;
|
extern pm_encoding_t pm_encoding_cp850;
|
||||||
extern pm_encoding_t pm_encoding_cp852;
|
extern pm_encoding_t pm_encoding_cp852;
|
||||||
|
|
|
@ -6091,6 +6091,7 @@ parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *star
|
||||||
ENCODING1("BINARY", pm_encoding_ascii_8bit);
|
ENCODING1("BINARY", pm_encoding_ascii_8bit);
|
||||||
ENCODING1("Big5", pm_encoding_big5);
|
ENCODING1("Big5", pm_encoding_big5);
|
||||||
ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
|
ENCODING1("Big5-HKSCS", pm_encoding_big5_hkscs);
|
||||||
|
ENCODING1("Big5-UAO", pm_encoding_big5_uao);
|
||||||
break;
|
break;
|
||||||
case 'C': case 'c':
|
case 'C': case 'c':
|
||||||
ENCODING1("CP437", pm_encoding_ibm437);
|
ENCODING1("CP437", pm_encoding_ibm437);
|
||||||
|
|
|
@ -65,6 +65,7 @@ module Prism
|
||||||
Encoding::Windows_874 => 0x00...0x100,
|
Encoding::Windows_874 => 0x00...0x100,
|
||||||
Encoding::Big5 => 0x00...0x10000,
|
Encoding::Big5 => 0x00...0x10000,
|
||||||
Encoding::Big5_HKSCS => 0x00...0x10000,
|
Encoding::Big5_HKSCS => 0x00...0x10000,
|
||||||
|
Encoding::Big5_UAO => 0x00...0x10000,
|
||||||
Encoding::CP51932 => 0x00...0x10000,
|
Encoding::CP51932 => 0x00...0x10000,
|
||||||
Encoding::GBK => 0x00...0x10000,
|
Encoding::GBK => 0x00...0x10000,
|
||||||
Encoding::Shift_JIS => 0x00...0x10000,
|
Encoding::Shift_JIS => 0x00...0x10000,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue