php-src/ext/fileinfo/libmagic/is_json.c
Niels Dossche b7c5813c98
Update libmagic to 5.45 (#13369)
* Update libmagic to 5.45

This also cleans up magicdata.patch: changes that are already in upstream file
were removed from that patch file.

There are five (expected) test output changes.
All these were also checked with the file command.

  - bug77961.phpt changes because there's now an early error-return in the
    `if (ts == FILE_BADSIZE) {` branch.
  - cve-2014-1943.phpt and cve-2014-1943-mb.phpt change because now the crafted
    data is recognised as a simh file.
  - bug71434.phpt now properly recognises it as a Python file.
  - ext/fileinfo/tests/finfo_file_basic.phpt more specific mime type.

* Adjust memory requirement for s390x fileinfo run

The larger database causes a higher memory usage.
Similar to 962c082a5b.

* [ci skip] NEWS
2024-02-13 21:11:57 +01:00

500 lines
10 KiB
C

/*-
* Copyright (c) 2018 Christos Zoulas
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Parse JSON object serialization format (RFC-7159)
*/
#ifndef TEST
#include "file.h"
#ifndef lint
FILE_RCSID("@(#)$File: is_json.c,v 1.30 2022/09/27 19:12:40 christos Exp $")
#endif
#include "magic.h"
#else
#include <stdio.h>
#include <stddef.h>
#endif
#include <string.h>
#ifdef DEBUG
#include <stdio.h>
#define DPRINTF(a, b, c) \
printf("%*s%s [%.2x/%c] %.*s\n", (int)lvl, "", (a), *(b), *(b), \
(int)(b - c), (const char *)(c))
#define __file_debugused
#else
#define DPRINTF(a, b, c) do { } while (/*CONSTCOND*/0)
#define __file_debugused __attribute__((__unused__))
#endif
#define JSON_ARRAY 0
#define JSON_CONSTANT 1
#define JSON_NUMBER 2
#define JSON_OBJECT 3
#define JSON_STRING 4
#define JSON_ARRAYN 5
#define JSON_MAX 6
/*
* if JSON_COUNT != 0:
* count all the objects, require that we have the whole data file
* otherwise:
* stop if we find an object or an array
*/
#ifndef JSON_COUNT
#define JSON_COUNT 0
#endif
static int json_parse(const unsigned char **, const unsigned char *, size_t *,
size_t);
static int
json_isspace(const unsigned char uc)
{
switch (uc) {
case ' ':
case '\n':
case '\r':
case '\t':
return 1;
default:
return 0;
}
}
static int
json_isdigit(unsigned char uc)
{
switch (uc) {
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
return 1;
default:
return 0;
}
}
static int
json_isxdigit(unsigned char uc)
{
if (json_isdigit(uc))
return 1;
switch (uc) {
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
return 1;
default:
return 0;
}
}
static const unsigned char *
json_skip_space(const unsigned char *uc, const unsigned char *ue)
{
while (uc < ue && json_isspace(*uc))
uc++;
return uc;
}
/*ARGSUSED*/
static int
json_parse_string(const unsigned char **ucp, const unsigned char *ue,
size_t lvl __file_debugused)
{
const unsigned char *uc = *ucp;
size_t i;
DPRINTF("Parse string: ", uc, *ucp);
while (uc < ue) {
switch (*uc++) {
case '\0':
goto out;
case '\\':
if (uc == ue)
goto out;
switch (*uc++) {
case '\0':
goto out;
case '"':
case '\\':
case '/':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
continue;
case 'u':
if (ue - uc < 4) {
uc = ue;
goto out;
}
for (i = 0; i < 4; i++)
if (!json_isxdigit(*uc++))
goto out;
continue;
default:
goto out;
}
case '"':
DPRINTF("Good string: ", uc, *ucp);
*ucp = uc;
return 1;
default:
continue;
}
}
out:
DPRINTF("Bad string: ", uc, *ucp);
*ucp = uc;
return 0;
}
static int
json_parse_array(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc = *ucp;
DPRINTF("Parse array: ", uc, *ucp);
while (uc < ue) {
uc = json_skip_space(uc, ue);
if (uc == ue)
goto out;
if (*uc == ']')
goto done;
if (!json_parse(&uc, ue, st, lvl + 1))
goto out;
if (uc == ue)
goto out;
switch (*uc) {
case ',':
uc++;
continue;
case ']':
done:
st[JSON_ARRAYN]++;
DPRINTF("Good array: ", uc, *ucp);
*ucp = uc + 1;
return 1;
default:
goto out;
}
}
out:
DPRINTF("Bad array: ", uc, *ucp);
*ucp = uc;
return 0;
}
static int
json_parse_object(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc = *ucp;
DPRINTF("Parse object: ", uc, *ucp);
while (uc < ue) {
uc = json_skip_space(uc, ue);
if (uc == ue)
goto out;
if (*uc == '}') {
uc++;
goto done;
}
if (*uc++ != '"') {
DPRINTF("not string", uc, *ucp);
goto out;
}
DPRINTF("next field", uc, *ucp);
if (!json_parse_string(&uc, ue, lvl)) {
DPRINTF("not string", uc, *ucp);
goto out;
}
uc = json_skip_space(uc, ue);
if (uc == ue)
goto out;
if (*uc++ != ':') {
DPRINTF("not colon", uc, *ucp);
goto out;
}
if (!json_parse(&uc, ue, st, lvl + 1)) {
DPRINTF("not json", uc, *ucp);
goto out;
}
if (uc == ue)
goto out;
switch (*uc++) {
case ',':
continue;
case '}': /* { */
done:
DPRINTF("Good object: ", uc, *ucp);
*ucp = uc;
return 1;
default:
DPRINTF("not more", uc, *ucp);
*ucp = uc - 1;
goto out;
}
}
out:
DPRINTF("Bad object: ", uc, *ucp);
*ucp = uc;
return 0;
}
/*ARGSUSED*/
static int
json_parse_number(const unsigned char **ucp, const unsigned char *ue,
size_t lvl __file_debugused)
{
const unsigned char *uc = *ucp;
int got = 0;
DPRINTF("Parse number: ", uc, *ucp);
if (uc == ue)
return 0;
if (*uc == '-')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
if (uc == ue)
goto out;
if (*uc == '.')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
if (uc == ue)
goto out;
if (got && (*uc == 'e' || *uc == 'E')) {
uc++;
got = 0;
if (uc == ue)
goto out;
if (*uc == '+' || *uc == '-')
uc++;
for (; uc < ue; uc++) {
if (!json_isdigit(*uc))
break;
got = 1;
}
}
out:
if (!got)
DPRINTF("Bad number: ", uc, *ucp);
else
DPRINTF("Good number: ", uc, *ucp);
*ucp = uc;
return got;
}
/*ARGSUSED*/
static int
json_parse_const(const unsigned char **ucp, const unsigned char *ue,
const char *str, size_t len, size_t lvl __file_debugused)
{
const unsigned char *uc = *ucp;
DPRINTF("Parse const: ", uc, *ucp);
*ucp += --len - 1;
if (*ucp > ue)
*ucp = ue;
for (; uc < ue && --len;) {
if (*uc++ != *++str) {
DPRINTF("Bad const: ", uc, *ucp);
return 0;
}
}
DPRINTF("Good const: ", uc, *ucp);
return 1;
}
static int
json_parse(const unsigned char **ucp, const unsigned char *ue,
size_t *st, size_t lvl)
{
const unsigned char *uc, *ouc;
int rv = 0;
int t;
ouc = uc = json_skip_space(*ucp, ue);
if (uc == ue)
goto out;
// Avoid recursion
if (lvl > 500) {
DPRINTF("Too many levels", uc, *ucp);
return 0;
}
#if JSON_COUNT
/* bail quickly if not counting */
if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAYN]))
return 1;
#endif
DPRINTF("Parse general: ", uc, *ucp);
switch (*uc++) {
case '"':
rv = json_parse_string(&uc, ue, lvl + 1);
t = JSON_STRING;
break;
case '[':
rv = json_parse_array(&uc, ue, st, lvl + 1);
t = JSON_ARRAY;
break;
case '{': /* '}' */
rv = json_parse_object(&uc, ue, st, lvl + 1);
t = JSON_OBJECT;
break;
case 't':
rv = json_parse_const(&uc, ue, "true", sizeof("true"), lvl + 1);
t = JSON_CONSTANT;
break;
case 'f':
rv = json_parse_const(&uc, ue, "false", sizeof("false"),
lvl + 1);
t = JSON_CONSTANT;
break;
case 'n':
rv = json_parse_const(&uc, ue, "null", sizeof("null"), lvl + 1);
t = JSON_CONSTANT;
break;
default:
--uc;
rv = json_parse_number(&uc, ue, lvl + 1);
t = JSON_NUMBER;
break;
}
if (rv)
st[t]++;
uc = json_skip_space(uc, ue);
out:
DPRINTF("End general: ", uc, *ucp);
*ucp = uc;
if (lvl == 0) {
if (!rv)
return 0;
if (uc == ue)
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 1 : 0;
if (*ouc == *uc && json_parse(&uc, ue, st, 1))
return (st[JSON_ARRAYN] || st[JSON_OBJECT]) ? 2 : 0;
else
return 0;
}
return rv;
}
#ifndef TEST
int
file_is_json(struct magic_set *ms, const struct buffer *b)
{
const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
const unsigned char *ue = uc + b->flen;
size_t st[JSON_MAX];
int mime = ms->flags & MAGIC_MIME;
int jt;
if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
return 0;
memset(st, 0, sizeof(st));
if ((jt = json_parse(&uc, ue, st, 0)) == 0)
return 0;
if (mime == MAGIC_MIME_ENCODING)
return 1;
if (mime) {
if (file_printf(ms, "application/%s",
jt == 1 ? "json" : "x-ndjson") == -1)
return -1;
return 1;
}
if (file_printf(ms, "%sJSON text data",
jt == 1 ? "" : "New Line Delimited ") == -1)
return -1;
#if JSON_COUNT
#define P(n) st[n], st[n] > 1 ? "s" : ""
if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
"u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
"u constant%s, %" SIZE_T_FORMAT "u number%s, %" SIZE_T_FORMAT
"u >1array%s)",
P(JSON_OBJECT), P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT),
P(JSON_NUMBER), P(JSON_ARRAYN))
== -1)
return -1;
#endif
return 1;
}
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <err.h>
int
main(int argc, char *argv[])
{
int fd;
struct stat st;
unsigned char *p;
size_t stats[JSON_MAX];
if ((fd = open(argv[1], O_RDONLY)) == -1)
err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
if (fstat(fd, &st) == -1)
err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
if ((p = CAST(char *, malloc(st.st_size))) == NULL)
err(EXIT_FAILURE, "Can't allocate %jd bytes",
(intmax_t)st.st_size);
if (read(fd, p, st.st_size) != st.st_size)
err(EXIT_FAILURE, "Can't read %jd bytes",
(intmax_t)st.st_size);
memset(stats, 0, sizeof(stats));
printf("is json %d\n", json_parse((const unsigned char **)&p,
p + st.st_size, stats, 0));
return 0;
}
#endif