mirror of
https://github.com/php/php-src.git
synced 2025-08-15 13:38:49 +02:00

* Update libmagic to 5.45
This also cleans up magicdata.patch: changes that are already in upstream file
were removed from that patch file.
There are five (expected) test output changes.
All these were also checked with the file command.
- bug77961.phpt changes because there's now an early error-return in the
`if (ts == FILE_BADSIZE) {` branch.
- cve-2014-1943.phpt and cve-2014-1943-mb.phpt change because now the crafted
data is recognised as a simh file.
- bug71434.phpt now properly recognises it as a Python file.
- ext/fileinfo/tests/finfo_file_basic.phpt more specific mime type.
* Adjust memory requirement for s390x fileinfo run
The larger database causes a higher memory usage.
Similar to 962c082a5b
.
* [ci skip] NEWS
500 lines
10 KiB
C
500 lines
10 KiB
C
/*-
|
|
* Copyright (c) 2018 Christos Zoulas
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Parse JSON object serialization format (RFC-7159)
|
|
*/
|
|
|
|
#ifndef TEST
|
|
#include "file.h"
|
|
|
|
#ifndef lint
|
|
FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
|
|
#endif
|
|
|
|
#include "magic.h"
|
|
#else
|
|
#include <stdio.h>
|
|
#include <stddef.h>
|
|
#endif
|
|
#include <string.h>
|
|
|
|
#ifdef DEBUG
|
|
#include <stdio.h>
|
|
#define DPRINTF(a, b, c) \
|
|
printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
|
|
(int)(b - c), (const char *)(c))
|
|
#define __file_debugused
|
|
#else
|
|
#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
|
|
#define __file_debugused __attribute__((__unused__))
|
|
#endif
|
|
|
|
#define JSON_ARRAY 0
|
|
#define JSON_CONSTANT 1
|
|
#define JSON_NUMBER 2
|
|
#define JSON_OBJECT 3
|
|
#define JSON_STRING 4
|
|
#define JSON_ARRAYN 5
|
|
#define JSON_MAX 6
|
|
|
|
/*
|
|
* if JSON_COUNT != 0:
|
|
* count all the objects, require that we have the whole data file
|
|
* otherwise:
|
|
* stop if we find an object or an array
|
|
*/
|
|
#ifndef JSON_COUNT
|
|
#define JSON_COUNT 0
|
|
#endif
|
|
|
|
static int json_parse(const unsigned char **, const unsigned char *, size_t *,
|
|
size_t);
|
|
|
|
static int
|
|
json_isspace(const unsigned char uc)
|
|
{
|
|
switch (uc) {
|
|
case ' ':
|
|
case '\n':
|
|
case '\r':
|
|
case '\t':
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int
|
|
json_isdigit(unsigned char uc)
|
|
{
|
|
switch (uc) {
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static int
|
|
json_isxdigit(unsigned char uc)
|
|
{
|
|
if (json_isdigit(uc))
|
|
return 1;
|
|
switch (uc) {
|
|
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
return 1;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
static const unsigned char *
|
|
json_skip_space(const unsigned char *uc, const unsigned char *ue)
|
|
{
|
|
while (uc < ue && json_isspace(*uc))
|
|
uc++;
|
|
return uc;
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
json_parse_string(const unsigned char **ucp, const unsigned char *ue,
|
|
size_t lvl __file_debugused)
|
|
{
|
|
const unsigned char *uc = *ucp;
|
|
size_t i;
|
|
|
|
DPRINTF("Parse string: ", uc, *ucp);
|
|
while (uc < ue) {
|
|
switch (*uc++) {
|
|
case '\0':
|
|
goto out;
|
|
case '\\':
|
|
if (uc == ue)
|
|
goto out;
|
|
switch (*uc++) {
|
|
case '\0':
|
|
goto out;
|
|
case '"':
|
|
case '\\':
|
|
case '/':
|
|
case 'b':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
continue;
|
|
case 'u':
|
|
if (ue - uc < 4) {
|
|
uc = ue;
|
|
goto out;
|
|
}
|
|
for (i = 0; i < 4; i++)
|
|
if (!json_isxdigit(*uc++))
|
|
goto out;
|
|
continue;
|
|
default:
|
|
goto out;
|
|
}
|
|
case '"':
|
|
DPRINTF("Good string: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return 1;
|
|
default:
|
|
continue;
|
|
}
|
|
}
|
|
out:
|
|
DPRINTF("Bad string: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
|
|
size_t *st, size_t lvl)
|
|
{
|
|
const unsigned char *uc = *ucp;
|
|
|
|
DPRINTF("Parse array: ", uc, *ucp);
|
|
while (uc < ue) {
|
|
uc = json_skip_space(uc, ue);
|
|
if (uc == ue)
|
|
goto out;
|
|
if (*uc == ']')
|
|
goto done;
|
|
if (!json_parse(&uc, ue, st, lvl + 1))
|
|
goto out;
|
|
if (uc == ue)
|
|
goto out;
|
|
switch (*uc) {
|
|
case ',':
|
|
uc++;
|
|
continue;
|
|
case ']':
|
|
done:
|
|
st[JSON_ARRAYN]++;
|
|
DPRINTF("Good array: ", uc, *ucp);
|
|
*ucp = uc + 1;
|
|
return 1;
|
|
default:
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
DPRINTF("Bad array: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
|
|
size_t *st, size_t lvl)
|
|
{
|
|
const unsigned char *uc = *ucp;
|
|
DPRINTF("Parse object: ", uc, *ucp);
|
|
while (uc < ue) {
|
|
uc = json_skip_space(uc, ue);
|
|
if (uc == ue)
|
|
goto out;
|
|
if (*uc == '}') {
|
|
uc++;
|
|
goto done;
|
|
}
|
|
if (*uc++ != '"') {
|
|
DPRINTF("not string", uc, *ucp);
|
|
goto out;
|
|
}
|
|
DPRINTF("next field", uc, *ucp);
|
|
if (!json_parse_string(&uc, ue, lvl)) {
|
|
DPRINTF("not string", uc, *ucp);
|
|
goto out;
|
|
}
|
|
uc = json_skip_space(uc, ue);
|
|
if (uc == ue)
|
|
goto out;
|
|
if (*uc++ != ':') {
|
|
DPRINTF("not colon", uc, *ucp);
|
|
goto out;
|
|
}
|
|
if (!json_parse(&uc, ue, st, lvl + 1)) {
|
|
DPRINTF("not json", uc, *ucp);
|
|
goto out;
|
|
}
|
|
if (uc == ue)
|
|
goto out;
|
|
switch (*uc++) {
|
|
case ',':
|
|
continue;
|
|
case '}': /* { */
|
|
done:
|
|
DPRINTF("Good object: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return 1;
|
|
default:
|
|
DPRINTF("not more", uc, *ucp);
|
|
*ucp = uc - 1;
|
|
goto out;
|
|
}
|
|
}
|
|
out:
|
|
DPRINTF("Bad object: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return 0;
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
json_parse_number(const unsigned char **ucp, const unsigned char *ue,
|
|
size_t lvl __file_debugused)
|
|
{
|
|
const unsigned char *uc = *ucp;
|
|
int got = 0;
|
|
|
|
DPRINTF("Parse number: ", uc, *ucp);
|
|
if (uc == ue)
|
|
return 0;
|
|
if (*uc == '-')
|
|
uc++;
|
|
|
|
for (; uc < ue; uc++) {
|
|
if (!json_isdigit(*uc))
|
|
break;
|
|
got = 1;
|
|
}
|
|
if (uc == ue)
|
|
goto out;
|
|
if (*uc == '.')
|
|
uc++;
|
|
for (; uc < ue; uc++) {
|
|
if (!json_isdigit(*uc))
|
|
break;
|
|
got = 1;
|
|
}
|
|
if (uc == ue)
|
|
goto out;
|
|
if (got && (*uc == 'e' || *uc == 'E')) {
|
|
uc++;
|
|
got = 0;
|
|
if (uc == ue)
|
|
goto out;
|
|
if (*uc == '+' || *uc == '-')
|
|
uc++;
|
|
for (; uc < ue; uc++) {
|
|
if (!json_isdigit(*uc))
|
|
break;
|
|
got = 1;
|
|
}
|
|
}
|
|
out:
|
|
if (!got)
|
|
DPRINTF("Bad number: ", uc, *ucp);
|
|
else
|
|
DPRINTF("Good number: ", uc, *ucp);
|
|
*ucp = uc;
|
|
return got;
|
|
}
|
|
|
|
/*ARGSUSED*/
|
|
static int
|
|
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
|
|
const char *str, size_t len, size_t lvl __file_debugused)
|
|
{
|
|
const unsigned char *uc = *ucp;
|
|
|
|
DPRINTF("Parse const: ", uc, *ucp);
|
|
*ucp += --len - 1;
|
|
if (*ucp > ue)
|
|
*ucp = ue;
|
|
for (; uc < ue && --len;) {
|
|
if (*uc++ != *++str) {
|
|
DPRINTF("Bad const: ", uc, *ucp);
|
|
return 0;
|
|
}
|
|
}
|
|
DPRINTF("Good const: ", uc, *ucp);
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
json_parse(const unsigned char **ucp, const unsigned char *ue,
|
|
size_t *st, size_t lvl)
|
|
{
|
|
const unsigned char *uc, *ouc;
|
|
int rv = 0;
|
|
int t;
|
|
|
|
ouc = uc = json_skip_space(*ucp, ue);
|
|
if (uc == ue)
|
|
goto out;
|
|
|
|
// Avoid recursion
|
|
if (lvl > 500) {
|
|
DPRINTF("Too many levels", uc, *ucp);
|
|
return 0;
|
|
}
|
|
#if JSON_COUNT
|
|
/* bail quickly if not counting */
|
|
if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
|
|
return 1;
|
|
#endif
|
|
|
|
DPRINTF("Parse general: ", uc, *ucp);
|
|
switch (*uc++) {
|
|
case '"':
|
|
rv = json_parse_string(&uc, ue, lvl + 1);
|
|
t = JSON_STRING;
|
|
break;
|
|
case '[':
|
|
rv = json_parse_array(&uc, ue, st, lvl + 1);
|
|
t = JSON_ARRAY;
|
|
break;
|
|
case '{': /* '}' */
|
|
rv = json_parse_object(&uc, ue, st, lvl + 1);
|
|
t = JSON_OBJECT;
|
|
break;
|
|
case 't':
|
|
rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
|
|
t = JSON_CONSTANT;
|
|
break;
|
|
case 'f':
|
|
rv = json_parse_const(&uc, ue, "false", sizeof("false"),
|
|
lvl + 1);
|
|
t = JSON_CONSTANT;
|
|
break;
|
|
case 'n':
|
|
rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
|
|
t = JSON_CONSTANT;
|
|
break;
|
|
default:
|
|
--uc;
|
|
rv = json_parse_number(&uc, ue, lvl + 1);
|
|
t = JSON_NUMBER;
|
|
break;
|
|
}
|
|
if (rv)
|
|
st[t]++;
|
|
uc = json_skip_space(uc, ue);
|
|
out:
|
|
DPRINTF("End general: ", uc, *ucp);
|
|
*ucp = uc;
|
|
if (lvl == 0) {
|
|
if (!rv)
|
|
return 0;
|
|
if (uc == ue)
|
|
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
|
|
if (*ouc == *uc && json_parse(&uc, ue, st, 1))
|
|
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
|
|
else
|
|
return 0;
|
|
}
|
|
return rv;
|
|
}
|
|
|
|
#ifndef TEST
|
|
int
|
|
file_is_json(struct magic_set *ms, const struct buffer *b)
|
|
{
|
|
const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
|
|
const unsigned char *ue = uc + b->flen;
|
|
size_t st[JSON_MAX];
|
|
int mime = ms->flags & MAGIC_MIME;
|
|
int jt;
|
|
|
|
|
|
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
|
|
return 0;
|
|
|
|
memset(st, 0, sizeof(st));
|
|
|
|
if ((jt = json_parse(&uc, ue, st, 0)) == 0)
|
|
return 0;
|
|
|
|
if (mime == MAGIC_MIME_ENCODING)
|
|
return 1;
|
|
if (mime) {
|
|
if (file_printf(ms, "application/%s",
|
|
jt == 1 ? "json" : "x-ndjson") == -1)
|
|
return -1;
|
|
return 1;
|
|
}
|
|
if (file_printf(ms, "%sJSON text data",
|
|
jt == 1 ? "" : "New Line Delimited ") == -1)
|
|
return -1;
|
|
#if JSON_COUNT
|
|
#define P(n) st[n], st[n] > 1 ? "s" : ""
|
|
if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
|
|
"u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
|
|
"u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
|
|
"u >1array%s)",
|
|
P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
|
|
P(JSON_NUMBER), P(JSON_ARRAYN))
|
|
== -1)
|
|
return -1;
|
|
#endif
|
|
return 1;
|
|
}
|
|
|
|
#else
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include <stdio.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <err.h>
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int fd;
|
|
struct stat st;
|
|
unsigned char *p;
|
|
size_t stats[JSON_MAX];
|
|
|
|
if ((fd = open(argv[1], O_RDONLY)) == -1)
|
|
err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
|
|
|
|
if (fstat(fd, &st) == -1)
|
|
err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
|
|
|
|
if ((p = CAST(char *, malloc(st.st_size))) == NULL)
|
|
err(EXIT_FAILURE, "Can't allocate %jd bytes",
|
|
(intmax_t)st.st_size);
|
|
if (read(fd, p, st.st_size) != st.st_size)
|
|
err(EXIT_FAILURE, "Can't read %jd bytes",
|
|
(intmax_t)st.st_size);
|
|
memset(stats, 0, sizeof(stats));
|
|
printf("is json %d\n", json_parse((const unsigned char **)&p,
|
|
p + st.st_size, stats, 0));
|
|
return 0;
|
|
}
|
|
#endif
|