diff --git a/ext/standard/html.c b/ext/standard/html.c
index 8109bfe12a8..7cbedc71c99 100644
--- a/ext/standard/html.c
+++ b/ext/standard/html.c
@@ -150,7 +150,7 @@ static entity_table_t ent_uni_greek[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
"thetasym", "upsih",
NULL, NULL, NULL,
- "piv"
+ "piv"
};
static entity_table_t ent_uni_punct[] = {
@@ -160,7 +160,7 @@ static entity_table_t ent_uni_punct[] = {
NULL, NULL, NULL, "ndash", "mdash", NULL, NULL, NULL,
/* 8216 */
"lsquo", "rsquo", "sbquo", NULL, "ldquo", "rdquo", "bdquo", NULL,
- "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip",
+ "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "permil", NULL,
/* 8242 */
"prime", "Prime", NULL, NULL, NULL, NULL, NULL, "lsaquo", "rsaquo", NULL,
@@ -200,39 +200,39 @@ static entity_table_t ent_uni_8592_9002[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8656 (0x21d0) */
- "lArr", "uArr", "rArr", "dArr", "hArr", "vArr", NULL, NULL,
- NULL, NULL, "lAarr", "rAarr", NULL, "rarrw", NULL, NULL,
+ "lArr", "uArr", "rArr", "dArr", "hArr", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8672 (0x21e0) */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8704 (0x2200) */
- "forall", "comp", "part", "exist", "nexist", "empty", NULL, "nabla",
- "isin", "notin", "epsis", "ni", "notni", "bepsi", NULL, "prod",
+ "forall", NULL, "part", "exist", NULL, "empty", NULL, "nabla",
+ "isin", "notin", NULL, "ni", NULL, NULL, NULL, "prod",
/* 8720 (0x2210) */
- "coprod", "sum", "minus", "mnplus", "plusdo", NULL, "setmn", "lowast",
- "compfn", NULL, "radic", NULL, NULL, "prop", "infin", "ang90",
+ NULL, "sum", "minus", NULL, NULL, NULL, NULL, "lowast",
+ NULL, NULL, "radic", NULL, NULL, "prop", "infin", NULL,
/* 8736 (0x2220) */
- "ang", "angmsd", "angsph", "mid", "nmid", "par", "npar", "and",
- "or", "cap", "cup", "int", NULL, NULL, "conint", NULL,
+ "ang", NULL, NULL, NULL, NULL, NULL, NULL, "and",
+ "or", "cap", "cup", "int", NULL, NULL, NULL, NULL,
/* 8752 (0x2230) */
- NULL, NULL, NULL, NULL, "there4", "becaus", NULL, NULL,
- NULL, NULL, NULL, NULL, "sim", "bsim", NULL, NULL,
+ NULL, NULL, NULL, NULL, "there4", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, "sim", NULL, NULL, NULL,
/* 8768 (0x2240) */
- "wreath", "nsim", NULL, "sime", "nsime", "cong", NULL, "ncong",
- "asymp", "nap", "ape", NULL, "bcong", "asymp", "bump", "bumpe",
+ NULL, NULL, NULL, NULL, NULL, "cong", NULL, NULL,
+ "asymp", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8784 (0x2250) */
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8800 (0x2260) */
- "ne", "equiv", NULL, NULL, "le", "ge", "lE", "gE",
- "lnE", "gnE", "Lt", "Gt", "twixt", NULL, "nlt", "ngt",
+ "ne", "equiv", NULL, NULL, "le", "ge", NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8816 (0x2270) */
- "nles", "nges", "lsim", "gsim", NULL, NULL, "lg", "gl",
- NULL, NULL, "pr", "sc", "cupre", "sscue", "prsim", "scsim",
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8832 (0x2280) */
- "npr", "nsc", "sub", "sup", "nsub", "nsup", "sube", "supe",
+ NULL, NULL, "sub", "sup", "nsub", NULL, "sube", "supe",
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* 8848 (0x2290) */
NULL, NULL, NULL, NULL, NULL, "oplus", NULL, "otimes",
diff --git a/ext/standard/tests/strings/html_entity_decode_html4.phpt b/ext/standard/tests/strings/html_entity_decode_html4.phpt
new file mode 100644
index 00000000000..3c92bf6fb79
--- /dev/null
+++ b/ext/standard/tests/strings/html_entity_decode_html4.phpt
@@ -0,0 +1,516 @@
+--TEST--
+html_entity_decode() conformance check (HTML 4)
+--FILE--
+
+--EXPECT--
+22
+26
+3c
+3e
+c2a0
+c2a1
+c2a2
+c2a3
+c2a4
+c2a5
+c2a6
+c2a7
+c2a8
+c2a9
+c2aa
+c2ab
+c2ac
+c2ad
+c2ae
+c2af
+c2b0
+c2b1
+c2b2
+c2b3
+c2b4
+c2b5
+c2b6
+c2b7
+c2b8
+c2b9
+c2ba
+c2bb
+c2bc
+c2bd
+c2be
+c2bf
+c380
+c381
+c382
+c383
+c384
+c385
+c386
+c387
+c388
+c389
+c38a
+c38b
+c38c
+c38d
+c38e
+c38f
+c390
+c391
+c392
+c393
+c394
+c395
+c396
+c397
+c398
+c399
+c39a
+c39b
+c39c
+c39d
+c39e
+c39f
+c3a0
+c3a1
+c3a2
+c3a3
+c3a4
+c3a5
+c3a6
+c3a7
+c3a8
+c3a9
+c3aa
+c3ab
+c3ac
+c3ad
+c3ae
+c3af
+c3b0
+c3b1
+c3b2
+c3b3
+c3b4
+c3b5
+c3b6
+c3b7
+c3b8
+c3b9
+c3ba
+c3bb
+c3bc
+c3bd
+c3be
+c3bf
+c592
+c593
+c5a0
+c5a1
+c5b8
+c692
+cb86
+cb9c
+ce91
+ce92
+ce93
+ce94
+ce95
+ce96
+ce97
+ce98
+ce99
+ce9a
+ce9b
+ce9c
+ce9d
+ce9e
+ce9f
+cea0
+cea1
+cea3
+cea4
+cea5
+cea6
+cea7
+cea8
+cea9
+ceb1
+ceb2
+ceb3
+ceb4
+ceb5
+ceb6
+ceb7
+ceb8
+ceb9
+ceba
+cebb
+cebc
+cebd
+cebe
+cebf
+cf80
+cf81
+cf82
+cf83
+cf84
+cf85
+cf86
+cf87
+cf88
+cf89
+cf91
+cf92
+cf96
+e28082
+e28083
+e28089
+e2808c
+e2808d
+e2808e
+e2808f
+e28093
+e28094
+e28098
+e28099
+e2809a
+e2809c
+e2809d
+e2809e
+e280a0
+e280a1
+e280a2
+e280a6
+e280b0
+e280b2
+e280b3
+e280b9
+e280ba
+e280be
+e28184
+e282ac
+e28491
+e28498
+e2849c
+e284a2
+e284b5
+e28690
+e28691
+e28692
+e28693
+e28694
+e286b5
+e28790
+e28791
+e28792
+e28793
+e28794
+e28880
+e28882
+e28883
+e28885
+e28887
+e28888
+e28889
+e2888b
+e2888f
+e28891
+e28892
+e28897
+e2889a
+e2889d
+e2889e
+e288a0
+e288a7
+e288a8
+e288a9
+e288aa
+e288ab
+e288b4
+e288bc
+e28985
+e28988
+e289a0
+e289a1
+e289a4
+e289a5
+e28a82
+e28a83
+e28a84
+e28a86
+e28a87
+e28a95
+e28a97
+e28aa5
+e28b85
+e28c88
+e28c89
+e28c8a
+e28c8b
+e28ca9
+e28caa
+e2978a
+e299a0
+e299a3
+e299a5
+e299a6
diff --git a/ext/standard/tests/strings/htmlentities_html4.phpt b/ext/standard/tests/strings/htmlentities_html4.phpt
new file mode 100644
index 00000000000..3f700e828a4
--- /dev/null
+++ b/ext/standard/tests/strings/htmlentities_html4.phpt
@@ -0,0 +1,305 @@
+--TEST--
+htmlentities() conformance check (HTML 4)
+--FILE--
+> 6),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x10000) {
+ $retval = pack('C3',
+ 0xe0 | ($k >> 12),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x200000) {
+ $retval = pack('C4',
+ 0xf0 | ($k >> 18),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else if ($k < 0x4000000) {
+ $retval = pack('C5',
+ 0xf8 | ($k >> 24),
+ 0x80 | (($k >> 18) & 0x3f),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ } else {
+ $retval = pack('C6',
+ 0xfc | ($k >> 30),
+ 0x80 | (($k >> 24) & 0x3f),
+ 0x80 | (($k >> 18) & 0x3f),
+ 0x80 | (($k >> 12) & 0x3f),
+ 0x80 | (($k >> 6) & 0x3f),
+ 0x80 | ($k & 0x3f));
+ }
+ return $retval;
+}
+
+for ($i = 0; $i < 0x110000; $i++) {
+ if ($i >= 0xd800 && $i < 0xe000)
+ continue;
+ $str = utf32_utf8($i);
+ $result = htmlentities($str, ENT_QUOTES, 'UTF-8');
+ if ($str != $result) {
+ printf("%s\tU+%05X\n", $result, $i);
+ }
+}
+?>
+--EXPECT--
+" U+00022
+& U+00026
+' U+00027
+< U+0003C
+> U+0003E
+ U+000A0
+¡ U+000A1
+¢ U+000A2
+£ U+000A3
+¤ U+000A4
+¥ U+000A5
+¦ U+000A6
+§ U+000A7
+¨ U+000A8
+© U+000A9
+ª U+000AA
+« U+000AB
+¬ U+000AC
+ U+000AD
+® U+000AE
+¯ U+000AF
+° U+000B0
+± U+000B1
+² U+000B2
+³ U+000B3
+´ U+000B4
+µ U+000B5
+¶ U+000B6
+· U+000B7
+¸ U+000B8
+¹ U+000B9
+º U+000BA
+» U+000BB
+¼ U+000BC
+½ U+000BD
+¾ U+000BE
+¿ U+000BF
+À U+000C0
+Á U+000C1
+Â U+000C2
+Ã U+000C3
+Ä U+000C4
+Å U+000C5
+Æ U+000C6
+Ç U+000C7
+È U+000C8
+É U+000C9
+Ê U+000CA
+Ë U+000CB
+Ì U+000CC
+Í U+000CD
+Î U+000CE
+Ï U+000CF
+Ð U+000D0
+Ñ U+000D1
+Ò U+000D2
+Ó U+000D3
+Ô U+000D4
+Õ U+000D5
+Ö U+000D6
+× U+000D7
+Ø U+000D8
+Ù U+000D9
+Ú U+000DA
+Û U+000DB
+Ü U+000DC
+Ý U+000DD
+Þ U+000DE
+ß U+000DF
+à U+000E0
+á U+000E1
+â U+000E2
+ã U+000E3
+ä U+000E4
+å U+000E5
+æ U+000E6
+ç U+000E7
+è U+000E8
+é U+000E9
+ê U+000EA
+ë U+000EB
+ì U+000EC
+í U+000ED
+î U+000EE
+ï U+000EF
+ð U+000F0
+ñ U+000F1
+ò U+000F2
+ó U+000F3
+ô U+000F4
+õ U+000F5
+ö U+000F6
+÷ U+000F7
+ø U+000F8
+ù U+000F9
+ú U+000FA
+û U+000FB
+ü U+000FC
+ý U+000FD
+þ U+000FE
+ÿ U+000FF
+Œ U+00152
+œ U+00153
+Š U+00160
+š U+00161
+Ÿ U+00178
+ƒ U+00192
+ˆ U+002C6
+˜ U+002DC
+Α U+00391
+Β U+00392
+Γ U+00393
+Δ U+00394
+Ε U+00395
+Ζ U+00396
+Η U+00397
+Θ U+00398
+Ι U+00399
+Κ U+0039A
+Λ U+0039B
+Μ U+0039C
+Ν U+0039D
+Ξ U+0039E
+Ο U+0039F
+Π U+003A0
+Ρ U+003A1
+Σ U+003A3
+Τ U+003A4
+Υ U+003A5
+Φ U+003A6
+Χ U+003A7
+Ψ U+003A8
+Ω U+003A9
+α U+003B1
+β U+003B2
+γ U+003B3
+δ U+003B4
+ε U+003B5
+ζ U+003B6
+η U+003B7
+θ U+003B8
+ι U+003B9
+κ U+003BA
+λ U+003BB
+μ U+003BC
+ν U+003BD
+ξ U+003BE
+ο U+003BF
+π U+003C0
+ρ U+003C1
+ς U+003C2
+σ U+003C3
+τ U+003C4
+υ U+003C5
+φ U+003C6
+χ U+003C7
+ψ U+003C8
+ω U+003C9
+ϑ U+003D1
+ϒ U+003D2
+ϖ U+003D6
+ U+02002
+ U+02003
+ U+02009
+ U+0200C
+ U+0200D
+ U+0200E
+ U+0200F
+– U+02013
+— U+02014
+‘ U+02018
+’ U+02019
+‚ U+0201A
+“ U+0201C
+” U+0201D
+„ U+0201E
+† U+02020
+‡ U+02021
+• U+02022
+… U+02026
+‰ U+02030
+′ U+02032
+″ U+02033
+‹ U+02039
+› U+0203A
+‾ U+0203E
+⁄ U+02044
+€ U+020AC
+ℑ U+02111
+℘ U+02118
+ℜ U+0211C
+™ U+02122
+ℵ U+02135
+← U+02190
+↑ U+02191
+→ U+02192
+↓ U+02193
+↔ U+02194
+↵ U+021B5
+⇐ U+021D0
+⇑ U+021D1
+⇒ U+021D2
+⇓ U+021D3
+⇔ U+021D4
+∀ U+02200
+∂ U+02202
+∃ U+02203
+∅ U+02205
+∇ U+02207
+∈ U+02208
+∉ U+02209
+∋ U+0220B
+∏ U+0220F
+∑ U+02211
+− U+02212
+∗ U+02217
+√ U+0221A
+∝ U+0221D
+∞ U+0221E
+∠ U+02220
+∧ U+02227
+∨ U+02228
+∩ U+02229
+∪ U+0222A
+∫ U+0222B
+∴ U+02234
+∼ U+0223C
+≅ U+02245
+≈ U+02248
+≠ U+02260
+≡ U+02261
+≤ U+02264
+≥ U+02265
+⊂ U+02282
+⊃ U+02283
+⊄ U+02284
+⊆ U+02286
+⊇ U+02287
+⊕ U+02295
+⊗ U+02297
+⊥ U+022A5
+⋅ U+022C5
+⌈ U+02308
+⌉ U+02309
+⌊ U+0230A
+⌋ U+0230B
+〈 U+02329
+〉 U+0232A
+◊ U+025CA
+♠ U+02660
+♣ U+02663
+♥ U+02665
+♦ U+02666