php-src/ext/standard/file.h
Christoph M. Becker 3b0f051193 Allow empty $escape to eschew escaping CSV
Albeit CSV is still a widespread data exchange format, it has never been
officially standardized.  There exists, however, the “informational” RFC
4180[1] which has no notion of escape characters, but rather defines
`escaped` as strings enclosed in double-quotes where contained
double-quotes have to be doubled.  While this concept is supported by
PHP's implementation (`$enclosure`), the `$escape` sometimes interferes,
so that `fgetcsv()` is unable to correctly parse externally generated
CSV, and `fputcsv()` is sometimes generating non-compliant CSV.  Since
PHP's `$escape` concept is availble for many years, we cannot drop it
for BC reasons (even though many consider it as bug).  Instead we allow
to pass an empty string as `$escape` parameter to the respective
functions, which results in ignoring/omitting any escaping, and as such
is more inline with RFC 4180.  It is noteworthy that this is almost no
userland BC break, since formerly most functions did not accept an empty
string, and failed in this case.  The only exception was `str_getcsv()`
which did accept an empty string, and used a backslash as escape
character then (which appears to be unintended behavior, anyway).

The changed functions are `fputcsv()`, `fgetcsv()` and `str_getcsv()`,
and also the `::setCsvControl()`, `::getCsvControl()`, `::fputcsv()`,
and `::fgetcsv()` methods of `SplFileObject`.

The implementation also changes the type of the escape parameter of the
PHP_APIs `php_fgetcsv()` and `php_fputcsv()` from `char` to `int`, where
`PHP_CSV_NO_ESCAPE` means to ignore/omit escaping.  The parameter
accepts the same values as `isalpha()` and friends, i.e. “the value of
which shall be representable as an `unsigned char` or shall equal the
value of the macro `EOF`.  If the argument has any other value, the
behavior is undefined.”  This is a subtle BC break, since the character
`chr(128)` has the value `-1` if `char` is signed, and so likely would
be confused with `EOF` when converted to `int`.  We consider this BC
break to be acceptable, since it's rather unlikely that anybody uses
`chr(128)` as escape character, and it easily can be fixed by casting
all `escape` arguments to `unsigned char`.

This patch implements the feature requests 38301[2] and 51496[3].

[1] <https://tools.ietf.org/html/rfc4180>
[2] <https://bugs.php.net/bug.php?id=38301>
[3] <https://bugs.php.net/bug.php?id=51496>
2018-12-15 14:38:15 +01:00

146 lines
4.6 KiB
C

/*
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2018 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Author: Rasmus Lerdorf <rasmus@lerdorf.on.ca> |
+----------------------------------------------------------------------+
*/
#ifndef FILE_H
#define FILE_H
#include "php_network.h"
PHP_MINIT_FUNCTION(file);
PHP_MSHUTDOWN_FUNCTION(file);
PHP_FUNCTION(tempnam);
PHP_NAMED_FUNCTION(php_if_tmpfile);
PHP_NAMED_FUNCTION(php_if_fopen);
PHPAPI PHP_FUNCTION(fclose);
PHP_FUNCTION(popen);
PHP_FUNCTION(pclose);
PHPAPI PHP_FUNCTION(feof);
PHPAPI PHP_FUNCTION(fread);
PHPAPI PHP_FUNCTION(fgetc);
PHPAPI PHP_FUNCTION(fgets);
PHP_FUNCTION(fscanf);
PHPAPI PHP_FUNCTION(fgetss);
PHP_FUNCTION(fgetcsv);
PHP_FUNCTION(fputcsv);
PHPAPI PHP_FUNCTION(fwrite);
PHPAPI PHP_FUNCTION(fflush);
PHPAPI PHP_FUNCTION(rewind);
PHPAPI PHP_FUNCTION(ftell);
PHPAPI PHP_FUNCTION(fseek);
PHP_FUNCTION(mkdir);
PHP_FUNCTION(rmdir);
PHPAPI PHP_FUNCTION(fpassthru);
PHP_FUNCTION(readfile);
PHP_FUNCTION(umask);
PHP_FUNCTION(rename);
PHP_FUNCTION(unlink);
PHP_FUNCTION(copy);
PHP_FUNCTION(file);
PHP_FUNCTION(file_get_contents);
PHP_FUNCTION(file_put_contents);
PHP_FUNCTION(get_meta_tags);
PHP_FUNCTION(flock);
PHP_FUNCTION(fd_set);
PHP_FUNCTION(fd_isset);
#if HAVE_REALPATH || defined(ZTS)
PHP_FUNCTION(realpath);
#endif
#ifdef HAVE_FNMATCH
PHP_FUNCTION(fnmatch);
#endif
PHP_NAMED_FUNCTION(php_if_ftruncate);
PHP_NAMED_FUNCTION(php_if_fstat);
PHP_FUNCTION(sys_get_temp_dir);
PHP_MINIT_FUNCTION(user_streams);
PHPAPI int php_le_stream_context(void);
PHPAPI int php_set_sock_blocking(php_socket_t socketd, int block);
PHPAPI int php_copy_file(const char *src, const char *dest);
PHPAPI int php_copy_file_ex(const char *src, const char *dest, int src_chk);
PHPAPI int php_copy_file_ctx(const char *src, const char *dest, int src_chk, php_stream_context *ctx);
PHPAPI int php_mkdir_ex(const char *dir, zend_long mode, int options);
PHPAPI int php_mkdir(const char *dir, zend_long mode);
#define PHP_CSV_NO_ESCAPE EOF
PHPAPI void php_fgetcsv(php_stream *stream, char delimiter, char enclosure, int escape_char, size_t buf_len, char *buf, zval *return_value);
PHPAPI size_t php_fputcsv(php_stream *stream, zval *fields, char delimiter, char enclosure, int escape_char);
#define META_DEF_BUFSIZE 8192
#define PHP_FILE_USE_INCLUDE_PATH 1
#define PHP_FILE_IGNORE_NEW_LINES 2
#define PHP_FILE_SKIP_EMPTY_LINES 4
#define PHP_FILE_APPEND 8
#define PHP_FILE_NO_DEFAULT_CONTEXT 16
typedef enum _php_meta_tags_token {
TOK_EOF = 0,
TOK_OPENTAG,
TOK_CLOSETAG,
TOK_SLASH,
TOK_EQUAL,
TOK_SPACE,
TOK_ID,
TOK_STRING,
TOK_OTHER
} php_meta_tags_token;
typedef struct _php_meta_tags_data {
php_stream *stream;
int ulc;
int lc;
char *input_buffer;
char *token_data;
int token_len;
int in_meta;
} php_meta_tags_data;
php_meta_tags_token php_next_meta_token(php_meta_tags_data *);
typedef struct {
int pclose_ret;
size_t def_chunk_size;
zend_long auto_detect_line_endings;
zend_long default_socket_timeout;
char *user_agent; /* for the http wrapper */
char *from_address; /* for the ftp and http wrappers */
const char *user_stream_current_filename; /* for simple recursion protection */
php_stream_context *default_context;
HashTable *stream_wrappers; /* per-request copy of url_stream_wrappers_hash */
HashTable *stream_filters; /* per-request copy of stream_filters_hash */
HashTable *wrapper_errors; /* key: wrapper address; value: linked list of char* */
int pclose_wait;
#if defined(HAVE_GETHOSTBYNAME_R)
struct hostent tmp_host_info;
char *tmp_host_buf;
size_t tmp_host_buf_len;
#endif
} php_file_globals;
#ifdef ZTS
#define FG(v) ZEND_TSRMG(file_globals_id, php_file_globals *, v)
extern PHPAPI int file_globals_id;
#else
#define FG(v) (file_globals.v)
extern PHPAPI php_file_globals file_globals;
#endif
#endif /* FILE_H */