LCOV - differential code coverage report
Current view: top level - src/backend/commands - copyfromparse.c (source / functions) Coverage Total Hit LBC UIC UBC GBC GIC GNC CBC EUB ECB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 75.4 % 651 491 21 64 75 21 183 30 257 64 210 3
Current Date: 2023-04-08 15:15:32 Functions: 88.9 % 18 16 1 1 6 10 1 6
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * copyfromparse.c
       4                 :  *      Parse CSV/text/binary format for COPY FROM.
       5                 :  *
       6                 :  * This file contains routines to parse the text, CSV and binary input
       7                 :  * formats.  The main entry point is NextCopyFrom(), which parses the
       8                 :  * next input line and returns it as Datums.
       9                 :  *
      10                 :  * In text/CSV mode, the parsing happens in multiple stages:
      11                 :  *
      12                 :  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
      13                 :  *                1.          2.            3.           4.
      14                 :  *
      15                 :  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
      16                 :  *    places it into 'raw_buf'.
      17                 :  *
      18                 :  * 2. CopyConvertBuf() calls the encoding conversion function to convert
      19                 :  *    the data in 'raw_buf' from client to server encoding, placing the
      20                 :  *    converted result in 'input_buf'.
      21                 :  *
      22                 :  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
      23                 :  *    It is responsible for finding the next newline marker, taking quote and
      24                 :  *    escape characters into account according to the COPY options.  The line
      25                 :  *    is copied into 'line_buf', with quotes and escape characters still
      26                 :  *    intact.
      27                 :  *
      28                 :  * 4. CopyReadAttributesText/CSV() function takes the input line from
      29                 :  *    'line_buf', and splits it into fields, unescaping the data as required.
      30                 :  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
      31                 :  *    pointers to each field.
      32                 :  *
      33                 :  * If encoding conversion is not required, a shortcut is taken in step 2 to
      34                 :  * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
      35                 :  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
      36                 :  * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
      37                 :  * the data is valid in the current encoding.
      38                 :  *
      39                 :  * In binary mode, the pipeline is much simpler.  Input is loaded into
      40                 :  * 'raw_buf', and encoding conversion is done in the datatype-specific
      41                 :  * receive functions, if required.  'input_buf' and 'line_buf' are not used,
      42                 :  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
      43                 :  * data when it's passed the receive function.
      44                 :  *
      45                 :  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
      46                 :  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
      47                 :  * and 'attribute_buf' are expanded on demand, to hold the longest line
      48                 :  * encountered so far.
      49                 :  *
      50                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
      51                 :  * Portions Copyright (c) 1994, Regents of the University of California
      52                 :  *
      53                 :  *
      54                 :  * IDENTIFICATION
      55                 :  *    src/backend/commands/copyfromparse.c
      56                 :  *
      57                 :  *-------------------------------------------------------------------------
      58                 :  */
      59                 : #include "postgres.h"
      60                 : 
      61                 : #include <ctype.h>
      62                 : #include <unistd.h>
      63                 : #include <sys/stat.h>
      64                 : 
      65                 : #include "commands/copy.h"
      66                 : #include "commands/copyfrom_internal.h"
      67                 : #include "commands/progress.h"
      68                 : #include "executor/executor.h"
      69                 : #include "libpq/libpq.h"
      70                 : #include "libpq/pqformat.h"
      71                 : #include "mb/pg_wchar.h"
      72                 : #include "miscadmin.h"
      73                 : #include "pgstat.h"
      74                 : #include "port/pg_bswap.h"
      75                 : #include "utils/builtins.h"
      76                 : #include "utils/memutils.h"
      77                 : #include "utils/rel.h"
      78                 : 
      79                 : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
      80                 : #define OCTVALUE(c) ((c) - '0')
      81                 : 
      82                 : /*
      83                 :  * These macros centralize code used to process line_buf and input_buf buffers.
      84                 :  * They are macros because they often do continue/break control and to avoid
      85                 :  * function call overhead in tight COPY loops.
      86                 :  *
      87                 :  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
      88                 :  * prevent the continue/break processing from working.  We end the "if (1)"
      89                 :  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
      90                 :  * any "else" in the calling code, and to avoid any compiler warnings about
      91                 :  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
      92                 :  */
      93                 : 
      94                 : /*
      95                 :  * This keeps the character read at the top of the loop in the buffer
      96                 :  * even if there is more than one read-ahead.
      97                 :  */
      98                 : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
      99                 : if (1) \
     100                 : { \
     101                 :     if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
     102                 :     { \
     103                 :         input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
     104                 :         need_data = true; \
     105                 :         continue; \
     106                 :     } \
     107                 : } else ((void) 0)
     108                 : 
     109                 : /* This consumes the remainder of the buffer and breaks */
     110                 : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
     111                 : if (1) \
     112                 : { \
     113                 :     if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
     114                 :     { \
     115                 :         if (extralen) \
     116                 :             input_buf_ptr = copy_buf_len; /* consume the partial character */ \
     117                 :         /* backslash just before EOF, treat as data char */ \
     118                 :         result = true; \
     119                 :         break; \
     120                 :     } \
     121                 : } else ((void) 0)
     122                 : 
     123                 : /*
     124                 :  * Transfer any approved data to line_buf; must do this to be sure
     125                 :  * there is some room in input_buf.
     126                 :  */
     127                 : #define REFILL_LINEBUF \
     128                 : if (1) \
     129                 : { \
     130                 :     if (input_buf_ptr > cstate->input_buf_index) \
     131                 :     { \
     132                 :         appendBinaryStringInfo(&cstate->line_buf, \
     133                 :                              cstate->input_buf + cstate->input_buf_index, \
     134                 :                                input_buf_ptr - cstate->input_buf_index); \
     135                 :         cstate->input_buf_index = input_buf_ptr; \
     136                 :     } \
     137                 : } else ((void) 0)
     138                 : 
     139                 : /* Undo any read-ahead and jump out of the block. */
     140                 : #define NO_END_OF_COPY_GOTO \
     141                 : if (1) \
     142                 : { \
     143                 :     input_buf_ptr = prev_raw_ptr + 1; \
     144                 :     goto not_end_of_copy; \
     145                 : } else ((void) 0)
     146                 : 
     147                 : /* NOTE: there's a copy of this in copyto.c */
     148                 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
     149                 : 
     150                 : 
     151                 : /* non-export function prototypes */
     152                 : static bool CopyReadLine(CopyFromState cstate);
     153                 : static bool CopyReadLineText(CopyFromState cstate);
     154                 : static int  CopyReadAttributesText(CopyFromState cstate);
     155                 : static int  CopyReadAttributesCSV(CopyFromState cstate);
     156                 : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     157                 :                                      Oid typioparam, int32 typmod,
     158                 :                                      bool *isnull);
     159                 : 
     160                 : 
     161                 : /* Low-level communications functions */
     162                 : static int  CopyGetData(CopyFromState cstate, void *databuf,
     163                 :                         int minread, int maxread);
     164                 : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
     165                 : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
     166                 : static void CopyLoadInputBuf(CopyFromState cstate);
     167                 : static int  CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
     168                 : 
     169                 : void
     170 CBC         411 : ReceiveCopyBegin(CopyFromState cstate)
     171                 : {
     172                 :     StringInfoData buf;
     173             411 :     int         natts = list_length(cstate->attnumlist);
     174             411 :     int16       format = (cstate->opts.binary ? 1 : 0);
     175                 :     int         i;
     176                 : 
     177             411 :     pq_beginmessage(&buf, 'G');
     178             411 :     pq_sendbyte(&buf, format);  /* overall format */
     179             411 :     pq_sendint16(&buf, natts);
     180            1368 :     for (i = 0; i < natts; i++)
     181             957 :         pq_sendint16(&buf, format); /* per-column formats */
     182             411 :     pq_endmessage(&buf);
     183             411 :     cstate->copy_src = COPY_FRONTEND;
     184             411 :     cstate->fe_msgbuf = makeStringInfo();
     185                 :     /* We *must* flush here to ensure FE knows it can send. */
     186             411 :     pq_flush();
     187             411 : }
     188                 : 
     189                 : void
     190               7 : ReceiveCopyBinaryHeader(CopyFromState cstate)
     191                 : {
     192                 :     char        readSig[11];
     193                 :     int32       tmp;
     194                 : 
     195                 :     /* Signature */
     196               7 :     if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
     197               7 :         memcmp(readSig, BinarySignature, 11) != 0)
     198 UBC           0 :         ereport(ERROR,
     199                 :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     200                 :                  errmsg("COPY file signature not recognized")));
     201                 :     /* Flags field */
     202 CBC           7 :     if (!CopyGetInt32(cstate, &tmp))
     203 UBC           0 :         ereport(ERROR,
     204                 :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     205                 :                  errmsg("invalid COPY file header (missing flags)")));
     206 CBC           7 :     if ((tmp & (1 << 16)) != 0)
     207 UBC           0 :         ereport(ERROR,
     208                 :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     209                 :                  errmsg("invalid COPY file header (WITH OIDS)")));
     210 CBC           7 :     tmp &= ~(1 << 16);
     211               7 :     if ((tmp >> 16) != 0)
     212 UBC           0 :         ereport(ERROR,
     213                 :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     214                 :                  errmsg("unrecognized critical flags in COPY file header")));
     215                 :     /* Header extension length */
     216 CBC           7 :     if (!CopyGetInt32(cstate, &tmp) ||
     217               7 :         tmp < 0)
     218 UBC           0 :         ereport(ERROR,
     219                 :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     220                 :                  errmsg("invalid COPY file header (missing length)")));
     221                 :     /* Skip extension header, if present */
     222 CBC           7 :     while (tmp-- > 0)
     223                 :     {
     224 UBC           0 :         if (CopyReadBinaryData(cstate, readSig, 1) != 1)
     225               0 :             ereport(ERROR,
     226                 :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     227                 :                      errmsg("invalid COPY file header (wrong length)")));
     228                 :     }
     229 CBC           7 : }
     230                 : 
     231                 : /*
     232                 :  * CopyGetData reads data from the source (file or frontend)
     233                 :  *
     234                 :  * We attempt to read at least minread, and at most maxread, bytes from
     235                 :  * the source.  The actual number of bytes read is returned; if this is
     236                 :  * less than minread, EOF was detected.
     237                 :  *
     238                 :  * Note: when copying from the frontend, we expect a proper EOF mark per
     239                 :  * protocol; if the frontend simply drops the connection, we raise error.
     240                 :  * It seems unwise to allow the COPY IN to complete normally in that case.
     241                 :  *
     242                 :  * NB: no data conversion is applied here.
     243                 :  */
     244                 : static int
     245          215790 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
     246                 : {
     247          215790 :     int         bytesread = 0;
     248                 : 
     249          215790 :     switch (cstate->copy_src)
     250                 :     {
     251            1003 :         case COPY_FILE:
     252            1003 :             bytesread = fread(databuf, 1, maxread, cstate->copy_file);
     253            1003 :             if (ferror(cstate->copy_file))
     254 UBC           0 :                 ereport(ERROR,
     255                 :                         (errcode_for_file_access(),
     256                 :                          errmsg("could not read from COPY file: %m")));
     257 CBC        1003 :             if (bytesread == 0)
     258             446 :                 cstate->raw_reached_eof = true;
     259            1003 :             break;
     260          200922 :         case COPY_FRONTEND:
     261          401470 :             while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
     262                 :             {
     263                 :                 int         avail;
     264                 : 
     265          401430 :                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
     266                 :                 {
     267                 :                     /* Try to receive another message */
     268                 :                     int         mtype;
     269                 :                     int         maxmsglen;
     270                 : 
     271          200882 :             readmessage:
     272          200882 :                     HOLD_CANCEL_INTERRUPTS();
     273          200882 :                     pq_startmsgread();
     274          200882 :                     mtype = pq_getbyte();
     275          200882 :                     if (mtype == EOF)
     276 UBC           0 :                         ereport(ERROR,
     277                 :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     278                 :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     279                 :                     /* Validate message type and set packet size limit */
     280                 :                     switch (mtype)
     281                 :                     {
     282 CBC      200548 :                         case 'd':   /* CopyData */
     283          200548 :                             maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
     284          200548 :                             break;
     285             334 :                         case 'c':   /* CopyDone */
     286                 :                         case 'f':   /* CopyFail */
     287                 :                         case 'H':   /* Flush */
     288                 :                         case 'S':   /* Sync */
     289             334 :                             maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
     290             334 :                             break;
     291 UBC           0 :                         default:
     292               0 :                             ereport(ERROR,
     293                 :                                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
     294                 :                                      errmsg("unexpected message type 0x%02X during COPY from stdin",
     295                 :                                             mtype)));
     296                 :                             maxmsglen = 0;  /* keep compiler quiet */
     297                 :                             break;
     298                 :                     }
     299                 :                     /* Now collect the message body */
     300 CBC      200882 :                     if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
     301 UBC           0 :                         ereport(ERROR,
     302                 :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     303                 :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     304 CBC      200882 :                     RESUME_CANCEL_INTERRUPTS();
     305                 :                     /* ... and process it */
     306                 :                     switch (mtype)
     307                 :                     {
     308          200548 :                         case 'd':   /* CopyData */
     309          200548 :                             break;
     310             334 :                         case 'c':   /* CopyDone */
     311                 :                             /* COPY IN correctly terminated by frontend */
     312             334 :                             cstate->raw_reached_eof = true;
     313             334 :                             return bytesread;
     314 UBC           0 :                         case 'f':   /* CopyFail */
     315               0 :                             ereport(ERROR,
     316                 :                                     (errcode(ERRCODE_QUERY_CANCELED),
     317                 :                                      errmsg("COPY from stdin failed: %s",
     318                 :                                             pq_getmsgstring(cstate->fe_msgbuf))));
     319                 :                             break;
     320               0 :                         case 'H':   /* Flush */
     321                 :                         case 'S':   /* Sync */
     322                 : 
     323                 :                             /*
     324                 :                              * Ignore Flush/Sync for the convenience of client
     325                 :                              * libraries (such as libpq) that may send those
     326                 :                              * without noticing that the command they just
     327                 :                              * sent was COPY.
     328                 :                              */
     329               0 :                             goto readmessage;
     330               0 :                         default:
     331               0 :                             Assert(false);  /* NOT REACHED */
     332                 :                     }
     333                 :                 }
     334 CBC      200548 :                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
     335          200548 :                 if (avail > maxread)
     336 UBC           0 :                     avail = maxread;
     337 CBC      200548 :                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
     338          200548 :                 databuf = (void *) ((char *) databuf + avail);
     339          200548 :                 maxread -= avail;
     340          200548 :                 bytesread += avail;
     341                 :             }
     342          200588 :             break;
     343           13865 :         case COPY_CALLBACK:
     344           13865 :             bytesread = cstate->data_source_cb(databuf, minread, maxread);
     345           13865 :             break;
     346                 :     }
     347                 : 
     348          215456 :     return bytesread;
     349                 : }
     350                 : 
     351                 : 
     352                 : /*
     353                 :  * These functions do apply some data conversion
     354                 :  */
     355                 : 
     356                 : /*
     357                 :  * CopyGetInt32 reads an int32 that appears in network byte order
     358                 :  *
     359                 :  * Returns true if OK, false if EOF
     360                 :  */
     361                 : static inline bool
     362              93 : CopyGetInt32(CopyFromState cstate, int32 *val)
     363                 : {
     364                 :     uint32      buf;
     365                 : 
     366              93 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     367                 :     {
     368 UBC           0 :         *val = 0;               /* suppress compiler warning */
     369               0 :         return false;
     370                 :     }
     371 CBC          93 :     *val = (int32) pg_ntoh32(buf);
     372              93 :     return true;
     373                 : }
     374                 : 
     375                 : /*
     376                 :  * CopyGetInt16 reads an int16 that appears in network byte order
     377                 :  */
     378                 : static inline bool
     379              21 : CopyGetInt16(CopyFromState cstate, int16 *val)
     380                 : {
     381                 :     uint16      buf;
     382                 : 
     383              21 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     384                 :     {
     385 UBC           0 :         *val = 0;               /* suppress compiler warning */
     386               0 :         return false;
     387                 :     }
     388 CBC          21 :     *val = (int16) pg_ntoh16(buf);
     389              21 :     return true;
     390                 : }
     391                 : 
     392                 : 
     393                 : /*
     394                 :  * Perform encoding conversion on data in 'raw_buf', writing the converted
     395                 :  * data into 'input_buf'.
     396                 :  *
     397                 :  * On entry, there must be some data to convert in 'raw_buf'.
     398                 :  */
     399                 : static void
     400          430876 : CopyConvertBuf(CopyFromState cstate)
     401                 : {
     402                 :     /*
     403                 :      * If the file and server encoding are the same, no encoding conversion is
     404                 :      * required.  However, we still need to verify that the input is valid for
     405                 :      * the encoding.
     406                 :      */
     407          430876 :     if (!cstate->need_transcoding)
     408                 :     {
     409                 :         /*
     410                 :          * When conversion is not required, input_buf and raw_buf are the
     411                 :          * same.  raw_buf_len is the total number of bytes in the buffer, and
     412                 :          * input_buf_len tracks how many of those bytes have already been
     413                 :          * verified.
     414                 :          */
     415          430876 :         int         preverifiedlen = cstate->input_buf_len;
     416          430876 :         int         unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
     417                 :         int         nverified;
     418                 : 
     419          430876 :         if (unverifiedlen == 0)
     420                 :         {
     421                 :             /*
     422                 :              * If no more raw data is coming, report the EOF to the caller.
     423                 :              */
     424          216071 :             if (cstate->raw_reached_eof)
     425             633 :                 cstate->input_reached_eof = true;
     426          216071 :             return;
     427                 :         }
     428                 : 
     429                 :         /*
     430                 :          * Verify the new data, including any residual unverified bytes from
     431                 :          * previous round.
     432                 :          */
     433          214805 :         nverified = pg_encoding_verifymbstr(cstate->file_encoding,
     434          214805 :                                             cstate->raw_buf + preverifiedlen,
     435                 :                                             unverifiedlen);
     436          214805 :         if (nverified == 0)
     437                 :         {
     438                 :             /*
     439                 :              * Could not verify anything.
     440                 :              *
     441                 :              * If there is no more raw input data coming, it means that there
     442                 :              * was an incomplete multi-byte sequence at the end.  Also, if
     443                 :              * there's "enough" input left, we should be able to verify at
     444                 :              * least one character, and a failure to do so means that we've
     445                 :              * hit an invalid byte sequence.
     446                 :              */
     447 UBC           0 :             if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
     448               0 :                 cstate->input_reached_error = true;
     449               0 :             return;
     450                 :         }
     451 CBC      214805 :         cstate->input_buf_len += nverified;
     452                 :     }
     453                 :     else
     454                 :     {
     455                 :         /*
     456                 :          * Encoding conversion is needed.
     457                 :          */
     458                 :         int         nbytes;
     459                 :         unsigned char *src;
     460                 :         int         srclen;
     461                 :         unsigned char *dst;
     462                 :         int         dstlen;
     463                 :         int         convertedlen;
     464                 : 
     465 UBC           0 :         if (RAW_BUF_BYTES(cstate) == 0)
     466                 :         {
     467                 :             /*
     468                 :              * If no more raw data is coming, report the EOF to the caller.
     469                 :              */
     470               0 :             if (cstate->raw_reached_eof)
     471               0 :                 cstate->input_reached_eof = true;
     472               0 :             return;
     473                 :         }
     474                 : 
     475                 :         /*
     476                 :          * First, copy down any unprocessed data.
     477                 :          */
     478               0 :         nbytes = INPUT_BUF_BYTES(cstate);
     479               0 :         if (nbytes > 0 && cstate->input_buf_index > 0)
     480               0 :             memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
     481                 :                     nbytes);
     482               0 :         cstate->input_buf_index = 0;
     483               0 :         cstate->input_buf_len = nbytes;
     484               0 :         cstate->input_buf[nbytes] = '\0';
     485                 : 
     486               0 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     487               0 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     488               0 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     489               0 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     490                 : 
     491                 :         /*
     492                 :          * Do the conversion.  This might stop short, if there is an invalid
     493                 :          * byte sequence in the input.  We'll convert as much as we can in
     494                 :          * that case.
     495                 :          *
     496                 :          * Note: Even if we hit an invalid byte sequence, we don't report the
     497                 :          * error until all the valid bytes have been consumed.  The input
     498                 :          * might contain an end-of-input marker (\.), and we don't want to
     499                 :          * report an error if the invalid byte sequence is after the
     500                 :          * end-of-input marker.  We might unnecessarily convert some data
     501                 :          * after the end-of-input marker as long as it's valid for the
     502                 :          * encoding, but that's harmless.
     503                 :          */
     504               0 :         convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
     505                 :                                                      cstate->file_encoding,
     506                 :                                                      GetDatabaseEncoding(),
     507                 :                                                      src, srclen,
     508                 :                                                      dst, dstlen,
     509                 :                                                      true);
     510               0 :         if (convertedlen == 0)
     511                 :         {
     512                 :             /*
     513                 :              * Could not convert anything.  If there is no more raw input data
     514                 :              * coming, it means that there was an incomplete multi-byte
     515                 :              * sequence at the end.  Also, if there is plenty of input left,
     516                 :              * we should be able to convert at least one character, so a
     517                 :              * failure to do so must mean that we've hit a byte sequence
     518                 :              * that's invalid.
     519                 :              */
     520               0 :             if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
     521               0 :                 cstate->input_reached_error = true;
     522               0 :             return;
     523                 :         }
     524               0 :         cstate->raw_buf_index += convertedlen;
     525               0 :         cstate->input_buf_len += strlen((char *) dst);
     526                 :     }
     527                 : }
     528                 : 
     529                 : /*
     530                 :  * Report an encoding or conversion error.
     531                 :  */
     532                 : static void
     533               0 : CopyConversionError(CopyFromState cstate)
     534                 : {
     535               0 :     Assert(cstate->raw_buf_len > 0);
     536               0 :     Assert(cstate->input_reached_error);
     537                 : 
     538               0 :     if (!cstate->need_transcoding)
     539                 :     {
     540                 :         /*
     541                 :          * Everything up to input_buf_len was successfully verified, and
     542                 :          * input_buf_len points to the invalid or incomplete character.
     543                 :          */
     544               0 :         report_invalid_encoding(cstate->file_encoding,
     545               0 :                                 cstate->raw_buf + cstate->input_buf_len,
     546               0 :                                 cstate->raw_buf_len - cstate->input_buf_len);
     547                 :     }
     548                 :     else
     549                 :     {
     550                 :         /*
     551                 :          * raw_buf_index points to the invalid or untranslatable character. We
     552                 :          * let the conversion routine report the error, because it can provide
     553                 :          * a more specific error message than we could here.  An earlier call
     554                 :          * to the conversion routine in CopyConvertBuf() detected that there
     555                 :          * is an error, now we call the conversion routine again with
     556                 :          * noError=false, to have it throw the error.
     557                 :          */
     558                 :         unsigned char *src;
     559                 :         int         srclen;
     560                 :         unsigned char *dst;
     561                 :         int         dstlen;
     562                 : 
     563               0 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     564               0 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     565               0 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     566               0 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     567                 : 
     568               0 :         (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
     569                 :                                              cstate->file_encoding,
     570                 :                                              GetDatabaseEncoding(),
     571                 :                                              src, srclen,
     572                 :                                              dst, dstlen,
     573                 :                                              false);
     574                 : 
     575                 :         /*
     576                 :          * The conversion routine should have reported an error, so this
     577                 :          * should not be reached.
     578                 :          */
     579               0 :         elog(ERROR, "encoding conversion failed without error");
     580                 :     }
     581                 : }
     582                 : 
     583                 : /*
     584                 :  * Load more data from data source to raw_buf.
     585                 :  *
     586                 :  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
     587                 :  * beginning of the buffer, and we load new data after that.
     588                 :  */
     589                 : static void
     590 CBC      215456 : CopyLoadRawBuf(CopyFromState cstate)
     591                 : {
     592                 :     int         nbytes;
     593                 :     int         inbytes;
     594                 : 
     595                 :     /*
     596                 :      * In text mode, if encoding conversion is not required, raw_buf and
     597                 :      * input_buf point to the same buffer.  Their len/index better agree, too.
     598                 :      */
     599          215456 :     if (cstate->raw_buf == cstate->input_buf)
     600                 :     {
     601          215438 :         Assert(!cstate->need_transcoding);
     602          215438 :         Assert(cstate->raw_buf_index == cstate->input_buf_index);
     603          215438 :         Assert(cstate->input_buf_len <= cstate->raw_buf_len);
     604                 :     }
     605                 : 
     606                 :     /*
     607                 :      * Copy down the unprocessed data if any.
     608                 :      */
     609          215456 :     nbytes = RAW_BUF_BYTES(cstate);
     610          215456 :     if (nbytes > 0 && cstate->raw_buf_index > 0)
     611 UBC           0 :         memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
     612                 :                 nbytes);
     613 CBC      215456 :     cstate->raw_buf_len -= cstate->raw_buf_index;
     614          215456 :     cstate->raw_buf_index = 0;
     615                 : 
     616                 :     /*
     617                 :      * If raw_buf and input_buf are in fact the same buffer, adjust the
     618                 :      * input_buf variables, too.
     619                 :      */
     620          215456 :     if (cstate->raw_buf == cstate->input_buf)
     621                 :     {
     622          215438 :         cstate->input_buf_len -= cstate->input_buf_index;
     623          215438 :         cstate->input_buf_index = 0;
     624                 :     }
     625                 : 
     626                 :     /* Load more data */
     627          215456 :     inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
     628          215456 :                           1, RAW_BUF_SIZE - cstate->raw_buf_len);
     629          215456 :     nbytes += inbytes;
     630          215456 :     cstate->raw_buf[nbytes] = '\0';
     631          215456 :     cstate->raw_buf_len = nbytes;
     632                 : 
     633          215456 :     cstate->bytes_processed += inbytes;
     634          215456 :     pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
     635                 : 
     636          215456 :     if (inbytes == 0)
     637             639 :         cstate->raw_reached_eof = true;
     638          215456 : }
     639                 : 
     640                 : /*
     641                 :  * CopyLoadInputBuf loads some more data into input_buf
     642                 :  *
     643                 :  * On return, at least one more input character is loaded into
     644                 :  * input_buf, or input_reached_eof is set.
     645                 :  *
     646                 :  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
     647                 :  * of the buffer and then we load more data after that.
     648                 :  */
     649                 : static void
     650          215438 : CopyLoadInputBuf(CopyFromState cstate)
     651                 : {
     652          215438 :     int         nbytes = INPUT_BUF_BYTES(cstate);
     653                 : 
     654                 :     /*
     655                 :      * The caller has updated input_buf_index to indicate how much of the
     656                 :      * input has been consumed and isn't needed anymore.  If input_buf is the
     657                 :      * same physical area as raw_buf, update raw_buf_index accordingly.
     658                 :      */
     659          215438 :     if (cstate->raw_buf == cstate->input_buf)
     660                 :     {
     661          215438 :         Assert(!cstate->need_transcoding);
     662          215438 :         Assert(cstate->input_buf_index >= cstate->raw_buf_index);
     663          215438 :         cstate->raw_buf_index = cstate->input_buf_index;
     664                 :     }
     665                 : 
     666                 :     for (;;)
     667                 :     {
     668                 :         /* If we now have some unconverted data, try to convert it */
     669          430876 :         CopyConvertBuf(cstate);
     670                 : 
     671                 :         /* If we now have some more input bytes ready, return them */
     672          430876 :         if (INPUT_BUF_BYTES(cstate) > nbytes)
     673          214805 :             return;
     674                 : 
     675                 :         /*
     676                 :          * If we reached an invalid byte sequence, or we're at an incomplete
     677                 :          * multi-byte character but there is no more raw input data, report
     678                 :          * conversion error.
     679                 :          */
     680          216071 :         if (cstate->input_reached_error)
     681 UBC           0 :             CopyConversionError(cstate);
     682                 : 
     683                 :         /* no more input, and everything has been converted */
     684 CBC      216071 :         if (cstate->input_reached_eof)
     685             633 :             break;
     686                 : 
     687                 :         /* Try to load more raw data */
     688          215438 :         Assert(!cstate->raw_reached_eof);
     689          215438 :         CopyLoadRawBuf(cstate);
     690                 :     }
     691                 : }
     692                 : 
     693                 : /*
     694                 :  * CopyReadBinaryData
     695                 :  *
     696                 :  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
     697                 :  * and writes them to 'dest'.  Returns the number of bytes read (which
     698                 :  * would be less than 'nbytes' only if we reach EOF).
     699                 :  */
     700                 : static int
     701             191 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
     702                 : {
     703             191 :     int         copied_bytes = 0;
     704                 : 
     705             191 :     if (RAW_BUF_BYTES(cstate) >= nbytes)
     706                 :     {
     707                 :         /* Enough bytes are present in the buffer. */
     708             173 :         memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
     709             173 :         cstate->raw_buf_index += nbytes;
     710             173 :         copied_bytes = nbytes;
     711                 :     }
     712                 :     else
     713                 :     {
     714                 :         /*
     715                 :          * Not enough bytes in the buffer, so must read from the file.  Need
     716                 :          * to loop since 'nbytes' could be larger than the buffer size.
     717                 :          */
     718                 :         do
     719                 :         {
     720                 :             int         copy_bytes;
     721                 : 
     722                 :             /* Load more data if buffer is empty. */
     723              18 :             if (RAW_BUF_BYTES(cstate) == 0)
     724                 :             {
     725              18 :                 CopyLoadRawBuf(cstate);
     726              18 :                 if (cstate->raw_reached_eof)
     727               6 :                     break;      /* EOF */
     728                 :             }
     729                 : 
     730                 :             /* Transfer some bytes. */
     731              12 :             copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
     732              12 :             memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
     733              12 :             cstate->raw_buf_index += copy_bytes;
     734              12 :             dest += copy_bytes;
     735              12 :             copied_bytes += copy_bytes;
     736              12 :         } while (copied_bytes < nbytes);
     737                 :     }
     738                 : 
     739             191 :     return copied_bytes;
     740                 : }
     741                 : 
     742                 : /*
     743                 :  * Read raw fields in the next line for COPY FROM in text or csv mode.
     744                 :  * Return false if no more lines.
     745                 :  *
     746                 :  * An internal temporary buffer is returned via 'fields'. It is valid until
     747                 :  * the next call of the function. Since the function returns all raw fields
     748                 :  * in the input file, 'nfields' could be different from the number of columns
     749                 :  * in the relation.
     750                 :  *
     751                 :  * NOTE: force_not_null option are not applied to the returned fields.
     752                 :  */
     753                 : bool
     754          894910 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
     755                 : {
     756                 :     int         fldct;
     757                 :     bool        done;
     758                 : 
     759                 :     /* only available for text or csv input */
     760          894910 :     Assert(!cstate->opts.binary);
     761                 : 
     762                 :     /* on input check that the header line is correct if needed */
     763          894910 :     if (cstate->cur_lineno == 0 && cstate->opts.header_line)
     764                 :     {
     765                 :         ListCell   *cur;
     766                 :         TupleDesc   tupDesc;
     767                 : 
     768              55 :         tupDesc = RelationGetDescr(cstate->rel);
     769                 : 
     770              55 :         cstate->cur_lineno++;
     771              55 :         done = CopyReadLine(cstate);
     772                 : 
     773              55 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     774                 :         {
     775                 :             int         fldnum;
     776                 : 
     777              38 :             if (cstate->opts.csv_mode)
     778               5 :                 fldct = CopyReadAttributesCSV(cstate);
     779                 :             else
     780              33 :                 fldct = CopyReadAttributesText(cstate);
     781                 : 
     782              38 :             if (fldct != list_length(cstate->attnumlist))
     783              12 :                 ereport(ERROR,
     784                 :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     785                 :                          errmsg("wrong number of fields in header line: got %d, expected %d",
     786                 :                                 fldct, list_length(cstate->attnumlist))));
     787                 : 
     788              26 :             fldnum = 0;
     789              79 :             foreach(cur, cstate->attnumlist)
     790                 :             {
     791              63 :                 int         attnum = lfirst_int(cur);
     792                 :                 char       *colName;
     793              63 :                 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
     794                 : 
     795              63 :                 Assert(fldnum < cstate->max_fields);
     796                 : 
     797              63 :                 colName = cstate->raw_fields[fldnum++];
     798              63 :                 if (colName == NULL)
     799               3 :                     ereport(ERROR,
     800                 :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     801                 :                              errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
     802                 :                                     fldnum, cstate->opts.null_print, NameStr(attr->attname))));
     803                 : 
     804              60 :                 if (namestrcmp(&attr->attname, colName) != 0)
     805                 :                 {
     806               7 :                     ereport(ERROR,
     807                 :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     808                 :                              errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
     809                 :                                     fldnum, colName, NameStr(attr->attname))));
     810                 :                 }
     811                 :             }
     812                 :         }
     813                 : 
     814              33 :         if (done)
     815 UBC           0 :             return false;
     816                 :     }
     817                 : 
     818 CBC      894888 :     cstate->cur_lineno++;
     819                 : 
     820                 :     /* Actually read the line into memory here */
     821          894888 :     done = CopyReadLine(cstate);
     822                 : 
     823                 :     /*
     824                 :      * EOF at start of line means we're done.  If we see EOF after some
     825                 :      * characters, we act as though it was newline followed by EOF, ie,
     826                 :      * process the line and then exit loop on next iteration.
     827                 :      */
     828          894888 :     if (done && cstate->line_buf.len == 0)
     829             927 :         return false;
     830                 : 
     831                 :     /* Parse the line into de-escaped field values */
     832          893961 :     if (cstate->opts.csv_mode)
     833             188 :         fldct = CopyReadAttributesCSV(cstate);
     834                 :     else
     835          893773 :         fldct = CopyReadAttributesText(cstate);
     836                 : 
     837          893955 :     *fields = cstate->raw_fields;
     838          893955 :     *nfields = fldct;
     839          893955 :     return true;
     840                 : }
     841                 : 
     842                 : /*
     843                 :  * Read next tuple from file for COPY FROM. Return false if no more tuples.
     844                 :  *
     845                 :  * 'econtext' is used to evaluate default expression for each column that is
     846                 :  * either not read from the file or is using the DEFAULT option of COPY FROM.
     847                 :  * It can be NULL when no default values are used, i.e. when all columns are
     848                 :  * read from the file, and DEFAULT option is unset.
     849                 :  *
     850                 :  * 'values' and 'nulls' arrays must be the same length as columns of the
     851                 :  * relation passed to BeginCopyFrom. This function fills the arrays.
     852                 :  */
     853                 : bool
     854 GIC      894931 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
     855 ECB             :              Datum *values, bool *nulls)
     856                 : {
     857                 :     TupleDesc   tupDesc;
     858                 :     AttrNumber  num_phys_attrs,
     859                 :                 attr_count,
     860 GIC      894931 :                 num_defaults = cstate->num_defaults;
     861 CBC      894931 :     FmgrInfo   *in_functions = cstate->in_functions;
     862          894931 :     Oid        *typioparams = cstate->typioparams;
     863 ECB             :     int         i;
     864 GIC      894931 :     int        *defmap = cstate->defmap;
     865 CBC      894931 :     ExprState **defexprs = cstate->defexprs;
     866 ECB             : 
     867 GIC      894931 :     tupDesc = RelationGetDescr(cstate->rel);
     868 CBC      894931 :     num_phys_attrs = tupDesc->natts;
     869          894931 :     attr_count = list_length(cstate->attnumlist);
     870 ECB             : 
     871                 :     /* Initialize all values for row to NULL */
     872 GIC     4580403 :     MemSet(values, 0, num_phys_attrs * sizeof(Datum));
     873 CBC      894931 :     MemSet(nulls, true, num_phys_attrs * sizeof(bool));
     874 GNC      894931 :     cstate->defaults = (bool *) palloc0(num_phys_attrs * sizeof(bool));
     875 ECB             : 
     876 CBC      894931 :     if (!cstate->opts.binary)
     877                 :     {
     878 ECB             :         char      **field_strings;
     879                 :         ListCell   *cur;
     880                 :         int         fldct;
     881                 :         int         fieldno;
     882                 :         char       *string;
     883                 : 
     884                 :         /* read raw fields in the next line */
     885 GIC      894910 :         if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
     886             927 :             return false;
     887 ECB             : 
     888                 :         /* check for overflowing fields */
     889 GIC      893955 :         if (attr_count > 0 && fldct > attr_count)
     890               6 :             ereport(ERROR,
     891 ECB             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     892                 :                      errmsg("extra data after last expected column")));
     893                 : 
     894 GIC      893949 :         fieldno = 0;
     895                 : 
     896 ECB             :         /* Loop to read the user attributes on the line. */
     897 GIC     4346464 :         foreach(cur, cstate->attnumlist)
     898                 :         {
     899 CBC     3452534 :             int         attnum = lfirst_int(cur);
     900 GIC     3452534 :             int         m = attnum - 1;
     901 CBC     3452534 :             Form_pg_attribute att = TupleDescAttr(tupDesc, m);
     902 ECB             : 
     903 CBC     3452534 :             if (fieldno >= fldct)
     904 GIC           6 :                 ereport(ERROR,
     905 ECB             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     906                 :                          errmsg("missing data for column \"%s\"",
     907                 :                                 NameStr(att->attname))));
     908 GIC     3452528 :             string = field_strings[fieldno++];
     909                 : 
     910 CBC     3452528 :             if (cstate->convert_select_flags &&
     911 GIC          10 :                 !cstate->convert_select_flags[m])
     912 ECB             :             {
     913                 :                 /* ignore input field, leaving column as NULL */
     914 GIC           5 :                 continue;
     915                 :             }
     916 ECB             : 
     917 GIC     3452523 :             if (cstate->opts.csv_mode)
     918                 :             {
     919 CBC         399 :                 if (string == NULL &&
     920 GIC          16 :                     cstate->opts.force_notnull_flags[m])
     921 ECB             :                 {
     922                 :                     /*
     923                 :                      * FORCE_NOT_NULL option is set and column is NULL -
     924                 :                      * convert it to the NULL string.
     925                 :                      */
     926 GIC           8 :                     string = cstate->opts.null_print;
     927                 :                 }
     928 CBC         391 :                 else if (string != NULL && cstate->opts.force_null_flags[m]
     929 GIC          10 :                          && strcmp(string, cstate->opts.null_print) == 0)
     930 ECB             :                 {
     931                 :                     /*
     932                 :                      * FORCE_NULL option is set and column matches the NULL
     933                 :                      * string. It must have been quoted, or otherwise the
     934                 :                      * string would already have been set to NULL. Convert it
     935                 :                      * to NULL as specified.
     936                 :                      */
     937 GIC           7 :                     string = NULL;
     938                 :                 }
     939 ECB             :             }
     940                 : 
     941 GIC     3452523 :             cstate->cur_attname = NameStr(att->attname);
     942         3452523 :             cstate->cur_attval = string;
     943                 : 
     944 CBC     3452523 :             if (string != NULL)
     945 GIC     3450135 :                 nulls[m] = false;
     946                 : 
     947 GNC     3452523 :             if (cstate->defaults[m])
     948                 :             {
     949                 :                 /*
     950                 :                  * The caller must supply econtext and have switched into the
     951                 :                  * per-tuple memory context in it.
     952                 :                  */
     953              30 :                 Assert(econtext != NULL);
     954              30 :                 Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
     955                 : 
     956              30 :                 values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
     957                 :             }
     958                 :             else
     959         3452480 :                 values[m] = InputFunctionCall(&in_functions[m],
     960                 :                                               string,
     961         3452493 :                                               typioparams[m],
     962                 :                                               att->atttypmod);
     963                 : 
     964 CBC     3452510 :             cstate->cur_attname = NULL;
     965 GIC     3452510 :             cstate->cur_attval = NULL;
     966                 :         }
     967                 : 
     968          893930 :         Assert(fieldno == attr_count);
     969                 :     }
     970 ECB             :     else
     971                 :     {
     972                 :         /* binary */
     973                 :         int16       fld_count;
     974                 :         ListCell   *cur;
     975                 : 
     976 CBC          21 :         cstate->cur_lineno++;
     977                 : 
     978              21 :         if (!CopyGetInt16(cstate, &fld_count))
     979                 :         {
     980                 :             /* EOF detected (end of file, or protocol-level EOF) */
     981               6 :             return false;
     982 ECB             :         }
     983                 : 
     984 GIC          21 :         if (fld_count == -1)
     985 ECB             :         {
     986                 :             /*
     987                 :              * Received EOF marker.  Wait for the protocol-level EOF, and
     988                 :              * complain if it doesn't come immediately.  In COPY FROM STDIN,
     989                 :              * this ensures that we correctly handle CopyFail, if client
     990                 :              * chooses to send that now.  When copying from file, we could
     991                 :              * ignore the rest of the file like in text mode, but we choose to
     992                 :              * be consistent with the COPY FROM STDIN case.
     993                 :              */
     994                 :             char        dummy;
     995                 : 
     996 GIC           6 :             if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
     997 UIC           0 :                 ereport(ERROR,
     998 ECB             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     999                 :                          errmsg("received copy data after EOF marker")));
    1000 GIC           6 :             return false;
    1001 ECB             :         }
    1002                 : 
    1003 GIC          15 :         if (fld_count != attr_count)
    1004 UIC           0 :             ereport(ERROR,
    1005                 :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1006                 :                      errmsg("row field count is %d, expected %d",
    1007                 :                             (int) fld_count, attr_count)));
    1008                 : 
    1009 GIC          93 :         foreach(cur, cstate->attnumlist)
    1010                 :         {
    1011              79 :             int         attnum = lfirst_int(cur);
    1012              79 :             int         m = attnum - 1;
    1013 CBC          79 :             Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1014 EUB             : 
    1015 GIC          79 :             cstate->cur_attname = NameStr(att->attname);
    1016             157 :             values[m] = CopyReadBinaryAttribute(cstate,
    1017 CBC          79 :                                                 &in_functions[m],
    1018 GIC          79 :                                                 typioparams[m],
    1019                 :                                                 att->atttypmod,
    1020 ECB             :                                                 &nulls[m]);
    1021 GBC          78 :             cstate->cur_attname = NULL;
    1022                 :         }
    1023                 :     }
    1024                 : 
    1025                 :     /*
    1026 ECB             :      * Now compute and insert any defaults available for the columns not
    1027                 :      * provided by the input data.  Anything not processed here or above will
    1028                 :      * remain NULL.
    1029                 :      */
    1030 CBC      894209 :     for (i = 0; i < num_defaults; i++)
    1031                 :     {
    1032 ECB             :         /*
    1033                 :          * The caller must supply econtext and have switched into the
    1034                 :          * per-tuple memory context in it.
    1035                 :          */
    1036 GIC         265 :         Assert(econtext != NULL);
    1037             265 :         Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
    1038 ECB             : 
    1039 GNC         265 :         values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
    1040 GIC         265 :                                          &nulls[defmap[i]]);
    1041                 :     }
    1042                 : 
    1043 GNC      893944 :     pfree(cstate->defaults);
    1044                 : 
    1045 GIC      893944 :     return true;
    1046                 : }
    1047                 : 
    1048                 : /*
    1049 ECB             :  * Read the next input line and stash it in line_buf.
    1050                 :  *
    1051                 :  * Result is true if read was terminated by EOF, false if terminated
    1052                 :  * by newline.  The terminating newline or EOF marker is not included
    1053                 :  * in the final value of line_buf.
    1054                 :  */
    1055                 : static bool
    1056 CBC      894943 : CopyReadLine(CopyFromState cstate)
    1057                 : {
    1058 ECB             :     bool        result;
    1059                 : 
    1060 GIC      894943 :     resetStringInfo(&cstate->line_buf);
    1061          894943 :     cstate->line_buf_valid = false;
    1062 ECB             : 
    1063                 :     /* Parse data and transfer into line_buf */
    1064 CBC      894943 :     result = CopyReadLineText(cstate);
    1065                 : 
    1066 GIC      894943 :     if (result)
    1067                 :     {
    1068                 :         /*
    1069                 :          * Reached EOF.  In protocol version 3, we should ignore anything
    1070                 :          * after \. up to the protocol end of copy data.  (XXX maybe better
    1071                 :          * not to treat \. as special?)
    1072                 :          */
    1073             927 :         if (cstate->copy_src == COPY_FRONTEND)
    1074                 :         {
    1075 ECB             :             int         inbytes;
    1076                 : 
    1077                 :             do
    1078                 :             {
    1079 CBC         334 :                 inbytes = CopyGetData(cstate, cstate->input_buf,
    1080 ECB             :                                       1, INPUT_BUF_SIZE);
    1081 GIC         334 :             } while (inbytes > 0);
    1082             334 :             cstate->input_buf_index = 0;
    1083 CBC         334 :             cstate->input_buf_len = 0;
    1084 GIC         334 :             cstate->raw_buf_index = 0;
    1085 CBC         334 :             cstate->raw_buf_len = 0;
    1086                 :         }
    1087                 :     }
    1088                 :     else
    1089                 :     {
    1090                 :         /*
    1091                 :          * If we didn't hit EOF, then we must have transferred the EOL marker
    1092 ECB             :          * to line_buf along with the data.  Get rid of it.
    1093                 :          */
    1094 GIC      894016 :         switch (cstate->eol_type)
    1095                 :         {
    1096          894016 :             case EOL_NL:
    1097          894016 :                 Assert(cstate->line_buf.len >= 1);
    1098 CBC      894016 :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1099 GIC      894016 :                 cstate->line_buf.len--;
    1100 CBC      894016 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1101          894016 :                 break;
    1102 LBC           0 :             case EOL_CR:
    1103               0 :                 Assert(cstate->line_buf.len >= 1);
    1104               0 :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
    1105 UIC           0 :                 cstate->line_buf.len--;
    1106               0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1107               0 :                 break;
    1108               0 :             case EOL_CRNL:
    1109               0 :                 Assert(cstate->line_buf.len >= 2);
    1110               0 :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
    1111               0 :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1112               0 :                 cstate->line_buf.len -= 2;
    1113 LBC           0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1114 UIC           0 :                 break;
    1115 LBC           0 :             case EOL_UNKNOWN:
    1116 ECB             :                 /* shouldn't get here */
    1117 LBC           0 :                 Assert(false);
    1118 ECB             :                 break;
    1119                 :         }
    1120                 :     }
    1121 EUB             : 
    1122                 :     /* Now it's safe to use the buffer in error messages */
    1123 GBC      894943 :     cstate->line_buf_valid = true;
    1124 EUB             : 
    1125 GBC      894943 :     return result;
    1126 EUB             : }
    1127                 : 
    1128                 : /*
    1129                 :  * CopyReadLineText - inner loop of CopyReadLine for text mode
    1130                 :  */
    1131                 : static bool
    1132 GBC      894943 : CopyReadLineText(CopyFromState cstate)
    1133 EUB             : {
    1134                 :     char       *copy_input_buf;
    1135                 :     int         input_buf_ptr;
    1136                 :     int         copy_buf_len;
    1137 GIC      894943 :     bool        need_data = false;
    1138          894943 :     bool        hit_eof = false;
    1139          894943 :     bool        result = false;
    1140                 : 
    1141                 :     /* CSV variables */
    1142 CBC      894943 :     bool        first_char_in_line = true;
    1143 GIC      894943 :     bool        in_quote = false,
    1144 CBC      894943 :                 last_was_esc = false;
    1145 GIC      894943 :     char        quotec = '\0';
    1146          894943 :     char        escapec = '\0';
    1147                 : 
    1148          894943 :     if (cstate->opts.csv_mode)
    1149                 :     {
    1150             283 :         quotec = cstate->opts.quote[0];
    1151 CBC         283 :         escapec = cstate->opts.escape[0];
    1152                 :         /* ignore special escape processing if it's the same as quotec */
    1153 GIC         283 :         if (quotec == escapec)
    1154             215 :             escapec = '\0';
    1155                 :     }
    1156 ECB             : 
    1157                 :     /*
    1158                 :      * The objective of this loop is to transfer the entire next input line
    1159                 :      * into line_buf.  Hence, we only care for detecting newlines (\r and/or
    1160                 :      * \n) and the end-of-copy marker (\.).
    1161                 :      *
    1162                 :      * In CSV mode, \r and \n inside a quoted field are just part of the data
    1163                 :      * value and are put in line_buf.  We keep just enough state to know if we
    1164                 :      * are currently in a quoted field or not.
    1165                 :      *
    1166                 :      * These four characters, and the CSV escape and quote characters, are
    1167                 :      * assumed the same in frontend and backend encodings.
    1168                 :      *
    1169                 :      * The input has already been converted to the database encoding.  All
    1170                 :      * supported server encodings have the property that all bytes in a
    1171                 :      * multi-byte sequence have the high bit set, so a multibyte character
    1172                 :      * cannot contain any newline or escape characters embedded in the
    1173                 :      * multibyte sequence.  Therefore, we can process the input byte-by-byte,
    1174                 :      * regardless of the encoding.
    1175                 :      *
    1176                 :      * For speed, we try to move data from input_buf to line_buf in chunks
    1177                 :      * rather than one character at a time.  input_buf_ptr points to the next
    1178                 :      * character to examine; any characters from input_buf_index to
    1179                 :      * input_buf_ptr have been determined to be part of the line, but not yet
    1180                 :      * transferred to line_buf.
    1181                 :      *
    1182                 :      * For a little extra speed within the loop, we copy input_buf and
    1183                 :      * input_buf_len into local variables.
    1184                 :      */
    1185 GIC      894943 :     copy_input_buf = cstate->input_buf;
    1186          894943 :     input_buf_ptr = cstate->input_buf_index;
    1187          894943 :     copy_buf_len = cstate->input_buf_len;
    1188                 : 
    1189                 :     for (;;)
    1190        21722128 :     {
    1191                 :         int         prev_raw_ptr;
    1192                 :         char        c;
    1193                 : 
    1194                 :         /*
    1195                 :          * Load more data if needed.
    1196                 :          *
    1197                 :          * TODO: We could just force four bytes of read-ahead and avoid the
    1198                 :          * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
    1199                 :          * unsafe with the old v2 COPY protocol, but we don't support that
    1200                 :          * anymore.
    1201                 :          */
    1202        22617071 :         if (input_buf_ptr >= copy_buf_len || need_data)
    1203                 :         {
    1204 CBC      215438 :             REFILL_LINEBUF;
    1205 ECB             : 
    1206 CBC      215438 :             CopyLoadInputBuf(cstate);
    1207                 :             /* update our local variables */
    1208 GIC      215438 :             hit_eof = cstate->input_reached_eof;
    1209 CBC      215438 :             input_buf_ptr = cstate->input_buf_index;
    1210 GIC      215438 :             copy_buf_len = cstate->input_buf_len;
    1211                 : 
    1212                 :             /*
    1213                 :              * If we are completely out of data, break out of the loop,
    1214                 :              * reporting EOF.
    1215                 :              */
    1216          215438 :             if (INPUT_BUF_BYTES(cstate) <= 0)
    1217                 :             {
    1218             633 :                 result = true;
    1219             633 :                 break;
    1220                 :             }
    1221 CBC      214805 :             need_data = false;
    1222                 :         }
    1223 ECB             : 
    1224                 :         /* OK to fetch a character */
    1225 CBC    22616438 :         prev_raw_ptr = input_buf_ptr;
    1226 GIC    22616438 :         c = copy_input_buf[input_buf_ptr++];
    1227 ECB             : 
    1228 CBC    22616438 :         if (cstate->opts.csv_mode)
    1229 ECB             :         {
    1230                 :             /*
    1231                 :              * If character is '\\' or '\r', we may need to look ahead below.
    1232                 :              * Force fetch of the next character if we don't already have it.
    1233                 :              * We need to do this before changing CSV state, in case one of
    1234                 :              * these characters is also the quote or escape character.
    1235                 :              */
    1236 GIC        2357 :             if (c == '\\' || c == '\r')
    1237 ECB             :             {
    1238 CBC         150 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1239                 :             }
    1240 ECB             : 
    1241                 :             /*
    1242                 :              * Dealing with quotes and escapes here is mildly tricky. If the
    1243                 :              * quote char is also the escape char, there's no problem - we
    1244                 :              * just use the char as a toggle. If they are different, we need
    1245                 :              * to ensure that we only take account of an escape inside a
    1246                 :              * quoted field and immediately preceding a quote char, and not
    1247                 :              * the second in an escape-escape sequence.
    1248                 :              */
    1249 GIC        2357 :             if (in_quote && c == escapec)
    1250              24 :                 last_was_esc = !last_was_esc;
    1251            2357 :             if (c == quotec && !last_was_esc)
    1252             180 :                 in_quote = !in_quote;
    1253            2357 :             if (c != escapec)
    1254            2330 :                 last_was_esc = false;
    1255 ECB             : 
    1256                 :             /*
    1257                 :              * Updating the line count for embedded CR and/or LF chars is
    1258                 :              * necessarily a little fragile - this test is probably about the
    1259                 :              * best we can do.  (XXX it's arguable whether we should do this
    1260                 :              * at all --- is cur_lineno a physical or logical count?)
    1261                 :              */
    1262 GIC        2357 :             if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
    1263              18 :                 cstate->cur_lineno++;
    1264                 :         }
    1265                 : 
    1266                 :         /* Process \r */
    1267        22616438 :         if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
    1268 ECB             :         {
    1269                 :             /* Check for \r\n on first line, _and_ handle \r\n. */
    1270 LBC           0 :             if (cstate->eol_type == EOL_UNKNOWN ||
    1271               0 :                 cstate->eol_type == EOL_CRNL)
    1272 ECB             :             {
    1273                 :                 /*
    1274                 :                  * If need more data, go back to loop top to load it.
    1275                 :                  *
    1276                 :                  * Note that if we are at EOF, c will wind up as '\0' because
    1277                 :                  * of the guaranteed pad of input_buf.
    1278                 :                  */
    1279 UIC           0 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1280                 : 
    1281 ECB             :                 /* get next char */
    1282 LBC           0 :                 c = copy_input_buf[input_buf_ptr];
    1283                 : 
    1284 UIC           0 :                 if (c == '\n')
    1285                 :                 {
    1286 LBC           0 :                     input_buf_ptr++;    /* eat newline */
    1287 UIC           0 :                     cstate->eol_type = EOL_CRNL; /* in case not set yet */
    1288                 :                 }
    1289 EUB             :                 else
    1290                 :                 {
    1291                 :                     /* found \r, but no \n */
    1292 UIC           0 :                     if (cstate->eol_type == EOL_CRNL)
    1293               0 :                         ereport(ERROR,
    1294                 :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1295                 :                                  !cstate->opts.csv_mode ?
    1296                 :                                  errmsg("literal carriage return found in data") :
    1297                 :                                  errmsg("unquoted carriage return found in data"),
    1298 EUB             :                                  !cstate->opts.csv_mode ?
    1299                 :                                  errhint("Use \"\\r\" to represent carriage return.") :
    1300                 :                                  errhint("Use quoted CSV field to represent carriage return.")));
    1301                 : 
    1302                 :                     /*
    1303                 :                      * if we got here, it is the first line and we didn't find
    1304                 :                      * \n, so don't consume the peeked character
    1305                 :                      */
    1306 UBC           0 :                     cstate->eol_type = EOL_CR;
    1307                 :                 }
    1308                 :             }
    1309 UIC           0 :             else if (cstate->eol_type == EOL_NL)
    1310               0 :                 ereport(ERROR,
    1311 EUB             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1312                 :                          !cstate->opts.csv_mode ?
    1313                 :                          errmsg("literal carriage return found in data") :
    1314                 :                          errmsg("unquoted carriage return found in data"),
    1315                 :                          !cstate->opts.csv_mode ?
    1316                 :                          errhint("Use \"\\r\" to represent carriage return.") :
    1317                 :                          errhint("Use quoted CSV field to represent carriage return.")));
    1318                 :             /* If reach here, we have found the line terminator */
    1319 UIC           0 :             break;
    1320                 :         }
    1321                 : 
    1322                 :         /* Process \n */
    1323 GIC    22616438 :         if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
    1324                 :         {
    1325 GBC      894016 :             if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
    1326 UIC           0 :                 ereport(ERROR,
    1327                 :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1328 EUB             :                          !cstate->opts.csv_mode ?
    1329                 :                          errmsg("literal newline found in data") :
    1330                 :                          errmsg("unquoted newline found in data"),
    1331                 :                          !cstate->opts.csv_mode ?
    1332                 :                          errhint("Use \"\\n\" to represent newline.") :
    1333                 :                          errhint("Use quoted CSV field to represent newline.")));
    1334 GIC      894016 :             cstate->eol_type = EOL_NL;   /* in case not set yet */
    1335                 :             /* If reach here, we have found the line terminator */
    1336          894016 :             break;
    1337                 :         }
    1338 EUB             : 
    1339                 :         /*
    1340                 :          * In CSV mode, we only recognize \. alone on a line.  This is because
    1341                 :          * \. is a valid CSV data value.
    1342 ECB             :          */
    1343 GIC    21722422 :         if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
    1344 ECB             :         {
    1345 EUB             :             char        c2;
    1346                 : 
    1347 GIC        4255 :             IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1348            4255 :             IF_NEED_REFILL_AND_EOF_BREAK(0);
    1349                 : 
    1350                 :             /* -----
    1351                 :              * get next character
    1352                 :              * Note: we do not change c so if it isn't \., we can fall
    1353 ECB             :              * through and continue processing.
    1354                 :              * -----
    1355                 :              */
    1356 GIC        4255 :             c2 = copy_input_buf[input_buf_ptr];
    1357                 : 
    1358            4255 :             if (c2 == '.')
    1359                 :             {
    1360             297 :                 input_buf_ptr++;    /* consume the '.' */
    1361                 : 
    1362 ECB             :                 /*
    1363                 :                  * Note: if we loop back for more data here, it does not
    1364                 :                  * matter that the CSV state change checks are re-executed; we
    1365                 :                  * will come back here with no important state changed.
    1366                 :                  */
    1367 CBC         297 :                 if (cstate->eol_type == EOL_CRNL)
    1368                 :                 {
    1369                 :                     /* Get the next character */
    1370 UIC           0 :                     IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1371                 :                     /* if hit_eof, c2 will become '\0' */
    1372               0 :                     c2 = copy_input_buf[input_buf_ptr++];
    1373                 : 
    1374               0 :                     if (c2 == '\n')
    1375 ECB             :                     {
    1376 UIC           0 :                         if (!cstate->opts.csv_mode)
    1377 LBC           0 :                             ereport(ERROR,
    1378                 :                                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1379 ECB             :                                      errmsg("end-of-copy marker does not match previous newline style")));
    1380                 :                         else
    1381 UIC           0 :                             NO_END_OF_COPY_GOTO;
    1382                 :                     }
    1383               0 :                     else if (c2 != '\r')
    1384                 :                     {
    1385               0 :                         if (!cstate->opts.csv_mode)
    1386 LBC           0 :                             ereport(ERROR,
    1387                 :                                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1388                 :                                      errmsg("end-of-copy marker corrupt")));
    1389 EUB             :                         else
    1390 UIC           0 :                             NO_END_OF_COPY_GOTO;
    1391 EUB             :                     }
    1392                 :                 }
    1393                 : 
    1394                 :                 /* Get the next character */
    1395 GBC         297 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1396 EUB             :                 /* if hit_eof, c2 will become '\0' */
    1397 GIC         297 :                 c2 = copy_input_buf[input_buf_ptr++];
    1398                 : 
    1399             297 :                 if (c2 != '\r' && c2 != '\n')
    1400 EUB             :                 {
    1401 GIC           3 :                     if (!cstate->opts.csv_mode)
    1402 UBC           0 :                         ereport(ERROR,
    1403                 :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1404 EUB             :                                  errmsg("end-of-copy marker corrupt")));
    1405                 :                     else
    1406 GIC           3 :                         NO_END_OF_COPY_GOTO;
    1407                 :                 }
    1408                 : 
    1409 GBC         294 :                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
    1410 GIC         294 :                     (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
    1411             294 :                     (cstate->eol_type == EOL_CR && c2 != '\r'))
    1412                 :                 {
    1413 UIC           0 :                     ereport(ERROR,
    1414 ECB             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1415                 :                              errmsg("end-of-copy marker does not match previous newline style")));
    1416                 :                 }
    1417                 : 
    1418                 :                 /*
    1419                 :                  * Transfer only the data before the \. into line_buf, then
    1420                 :                  * discard the data and the \. sequence.
    1421 EUB             :                  */
    1422 GIC         294 :                 if (prev_raw_ptr > cstate->input_buf_index)
    1423 UIC           0 :                     appendBinaryStringInfo(&cstate->line_buf,
    1424               0 :                                            cstate->input_buf + cstate->input_buf_index,
    1425 LBC           0 :                                            prev_raw_ptr - cstate->input_buf_index);
    1426 GIC         294 :                 cstate->input_buf_index = input_buf_ptr;
    1427             294 :                 result = true;  /* report EOF */
    1428 CBC         294 :                 break;
    1429 ECB             :             }
    1430 CBC        3958 :             else if (!cstate->opts.csv_mode)
    1431                 :             {
    1432 EUB             :                 /*
    1433                 :                  * If we are here, it means we found a backslash followed by
    1434                 :                  * something other than a period.  In non-CSV mode, anything
    1435                 :                  * after a backslash is special, so we skip over that second
    1436                 :                  * character too.  If we didn't do that \\. would be
    1437                 :                  * considered an eof-of copy, while in non-CSV mode it is a
    1438                 :                  * literal backslash followed by a period.  In CSV mode,
    1439                 :                  * backslashes are not special, so we want to process the
    1440                 :                  * character after the backslash just like a normal character,
    1441 ECB             :                  * so we don't increment in those cases.
    1442 EUB             :                  */
    1443 GBC        3955 :                 input_buf_ptr++;
    1444 EUB             :             }
    1445 ECB             :         }
    1446                 : 
    1447                 :         /*
    1448                 :          * This label is for CSV cases where \. appears at the start of a
    1449                 :          * line, but there is more text after it, meaning it was a data value.
    1450                 :          * We are more strict for \. in CSV mode because \. could be a data
    1451                 :          * value, while in non-CSV mode, \. cannot be a data value.
    1452                 :          */
    1453 GIC    21718170 : not_end_of_copy:
    1454        21722128 :         first_char_in_line = false;
    1455                 :     }                           /* end of outer loop */
    1456                 : 
    1457                 :     /*
    1458                 :      * Transfer any still-uncopied data to line_buf.
    1459                 :      */
    1460          894943 :     REFILL_LINEBUF;
    1461                 : 
    1462 CBC      894943 :     return result;
    1463                 : }
    1464                 : 
    1465                 : /*
    1466                 :  *  Return decimal value for a hexadecimal digit
    1467                 :  */
    1468                 : static int
    1469 UIC           0 : GetDecimalFromHex(char hex)
    1470                 : {
    1471               0 :     if (isdigit((unsigned char) hex))
    1472 LBC           0 :         return hex - '0';
    1473 ECB             :     else
    1474 UIC           0 :         return tolower((unsigned char) hex) - 'a' + 10;
    1475                 : }
    1476                 : 
    1477                 : /*
    1478                 :  * Parse the current line into separate attributes (fields),
    1479 ECB             :  * performing de-escaping as needed.
    1480                 :  *
    1481                 :  * The input is in line_buf.  We use attribute_buf to hold the result
    1482                 :  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
    1483                 :  * string, or NULL when the input matches the null marker string.
    1484                 :  * This array is expanded as necessary.
    1485                 :  *
    1486                 :  * (Note that the caller cannot check for nulls since the returned
    1487                 :  * string would be the post-de-escaping equivalent, which may look
    1488 EUB             :  * the same as some valid data string.)
    1489                 :  *
    1490                 :  * delim is the column delimiter string (must be just one byte for now).
    1491                 :  * null_print is the null marker string.  Note that this is compared to
    1492                 :  * the pre-de-escaped input string.
    1493                 :  *
    1494                 :  * The return value is the number of fields actually read.
    1495                 :  */
    1496                 : static int
    1497 GIC      893806 : CopyReadAttributesText(CopyFromState cstate)
    1498                 : {
    1499          893806 :     char        delimc = cstate->opts.delim[0];
    1500                 :     int         fieldno;
    1501                 :     char       *output_ptr;
    1502                 :     char       *cur_ptr;
    1503                 :     char       *line_end_ptr;
    1504                 : 
    1505                 :     /*
    1506                 :      * We need a special case for zero-column tables: check that the input
    1507                 :      * line is empty, and return.
    1508                 :      */
    1509          893806 :     if (cstate->max_fields <= 0)
    1510                 :     {
    1511               3 :         if (cstate->line_buf.len != 0)
    1512 UIC           0 :             ereport(ERROR,
    1513                 :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1514                 :                      errmsg("extra data after last expected column")));
    1515 GIC           3 :         return 0;
    1516 ECB             :     }
    1517                 : 
    1518 CBC      893803 :     resetStringInfo(&cstate->attribute_buf);
    1519                 : 
    1520                 :     /*
    1521                 :      * The de-escaped attributes will certainly not be longer than the input
    1522                 :      * data line, so we can just force attribute_buf to be large enough and
    1523                 :      * then transfer data without any checks for enough space.  We need to do
    1524                 :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1525                 :      * pointers already stored into cstate->raw_fields[].
    1526                 :      */
    1527 GIC      893803 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1528 CBC           4 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1529 GIC      893803 :     output_ptr = cstate->attribute_buf.data;
    1530 ECB             : 
    1531 EUB             :     /* set pointer variables for loop */
    1532 GIC      893803 :     cur_ptr = cstate->line_buf.data;
    1533          893803 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1534 ECB             : 
    1535                 :     /* Outer loop iterates over fields */
    1536 GIC      893803 :     fieldno = 0;
    1537 ECB             :     for (;;)
    1538 GIC     2558447 :     {
    1539         3452250 :         bool        found_delim = false;
    1540                 :         char       *start_ptr;
    1541                 :         char       *end_ptr;
    1542                 :         int         input_len;
    1543         3452250 :         bool        saw_non_ascii = false;
    1544                 : 
    1545                 :         /* Make sure there is enough space for the next value */
    1546 CBC     3452250 :         if (fieldno >= cstate->max_fields)
    1547 ECB             :         {
    1548 CBC          15 :             cstate->max_fields *= 2;
    1549 GIC          15 :             cstate->raw_fields =
    1550              15 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1551 ECB             :         }
    1552                 : 
    1553                 :         /* Remember start of field on both input and output sides */
    1554 GIC     3452250 :         start_ptr = cur_ptr;
    1555 CBC     3452250 :         cstate->raw_fields[fieldno] = output_ptr;
    1556                 : 
    1557 ECB             :         /*
    1558                 :          * Scan data for field.
    1559                 :          *
    1560                 :          * Note that in this loop, we are scanning to locate the end of field
    1561                 :          * and also speculatively performing de-escaping.  Once we find the
    1562                 :          * end-of-field, we can match the raw field contents against the null
    1563                 :          * marker string.  Only after that comparison fails do we know that
    1564                 :          * de-escaping is actually the right thing to do; therefore we *must
    1565                 :          * not* throw any syntax errors before we've done the null-marker
    1566                 :          * check.
    1567                 :          */
    1568                 :         for (;;)
    1569 CBC    19161333 :         {
    1570                 :             char        c;
    1571                 : 
    1572 GIC    22613583 :             end_ptr = cur_ptr;
    1573 CBC    22613583 :             if (cur_ptr >= line_end_ptr)
    1574          893800 :                 break;
    1575 GIC    21719783 :             c = *cur_ptr++;
    1576        21719783 :             if (c == delimc)
    1577                 :             {
    1578         2558450 :                 found_delim = true;
    1579         2558450 :                 break;
    1580                 :             }
    1581        19161333 :             if (c == '\\')
    1582                 :             {
    1583            3955 :                 if (cur_ptr >= line_end_ptr)
    1584 UIC           0 :                     break;
    1585 GIC        3955 :                 c = *cur_ptr++;
    1586            3955 :                 switch (c)
    1587                 :                 {
    1588 CBC           6 :                     case '0':
    1589                 :                     case '1':
    1590                 :                     case '2':
    1591 ECB             :                     case '3':
    1592                 :                     case '4':
    1593                 :                     case '5':
    1594                 :                     case '6':
    1595                 :                     case '7':
    1596                 :                         {
    1597                 :                             /* handle \013 */
    1598                 :                             int         val;
    1599                 : 
    1600 CBC           6 :                             val = OCTVALUE(c);
    1601 GIC           6 :                             if (cur_ptr < line_end_ptr)
    1602 ECB             :                             {
    1603 GBC           3 :                                 c = *cur_ptr;
    1604 CBC           3 :                                 if (ISOCTAL(c))
    1605 ECB             :                                 {
    1606 UIC           0 :                                     cur_ptr++;
    1607 LBC           0 :                                     val = (val << 3) + OCTVALUE(c);
    1608 UIC           0 :                                     if (cur_ptr < line_end_ptr)
    1609                 :                                     {
    1610               0 :                                         c = *cur_ptr;
    1611               0 :                                         if (ISOCTAL(c))
    1612                 :                                         {
    1613               0 :                                             cur_ptr++;
    1614               0 :                                             val = (val << 3) + OCTVALUE(c);
    1615                 :                                         }
    1616                 :                                     }
    1617                 :                                 }
    1618                 :                             }
    1619 CBC           6 :                             c = val & 0377;
    1620               6 :                             if (c == '\0' || IS_HIGHBIT_SET(c))
    1621 GIC           6 :                                 saw_non_ascii = true;
    1622 ECB             :                         }
    1623 CBC           6 :                         break;
    1624 GIC           6 :                     case 'x':
    1625 EUB             :                         /* Handle \x3F */
    1626 GBC           6 :                         if (cur_ptr < line_end_ptr)
    1627 EUB             :                         {
    1628 GIC           3 :                             char        hexchar = *cur_ptr;
    1629 EUB             : 
    1630 GBC           3 :                             if (isxdigit((unsigned char) hexchar))
    1631                 :                             {
    1632 UBC           0 :                                 int         val = GetDecimalFromHex(hexchar);
    1633 EUB             : 
    1634 UIC           0 :                                 cur_ptr++;
    1635               0 :                                 if (cur_ptr < line_end_ptr)
    1636                 :                                 {
    1637               0 :                                     hexchar = *cur_ptr;
    1638 LBC           0 :                                     if (isxdigit((unsigned char) hexchar))
    1639 ECB             :                                     {
    1640 LBC           0 :                                         cur_ptr++;
    1641 UIC           0 :                                         val = (val << 4) + GetDecimalFromHex(hexchar);
    1642 ECB             :                                     }
    1643                 :                                 }
    1644 UIC           0 :                                 c = val & 0xff;
    1645 LBC           0 :                                 if (c == '\0' || IS_HIGHBIT_SET(c))
    1646 UIC           0 :                                     saw_non_ascii = true;
    1647 ECB             :                             }
    1648                 :                         }
    1649 CBC           6 :                         break;
    1650 UIC           0 :                     case 'b':
    1651 UBC           0 :                         c = '\b';
    1652 UIC           0 :                         break;
    1653 UBC           0 :                     case 'f':
    1654               0 :                         c = '\f';
    1655 UIC           0 :                         break;
    1656 GBC        1525 :                     case 'n':
    1657            1525 :                         c = '\n';
    1658 GIC        1525 :                         break;
    1659 UBC           0 :                     case 'r':
    1660               0 :                         c = '\r';
    1661 UIC           0 :                         break;
    1662               0 :                     case 't':
    1663 UBC           0 :                         c = '\t';
    1664               0 :                         break;
    1665               0 :                     case 'v':
    1666 UIC           0 :                         c = '\v';
    1667               0 :                         break;
    1668 ECB             : 
    1669 EUB             :                         /*
    1670                 :                          * in all other cases, take the char after '\'
    1671                 :                          * literally
    1672                 :                          */
    1673                 :                 }
    1674                 :             }
    1675 ECB             : 
    1676                 :             /* Add c to output string */
    1677 CBC    19161333 :             *output_ptr++ = c;
    1678 EUB             :         }
    1679                 : 
    1680                 :         /* Check whether raw input matched null marker */
    1681 GBC     3452250 :         input_len = end_ptr - start_ptr;
    1682         3452250 :         if (input_len == cstate->opts.null_print_len &&
    1683          246279 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1684            2376 :             cstate->raw_fields[fieldno] = NULL;
    1685                 :         /* Check whether raw input matched default marker */
    1686 GNC     3449874 :         else if (fieldno < list_length(cstate->attnumlist) &&
    1687         3449856 :                  cstate->opts.default_print &&
    1688              57 :                  input_len == cstate->opts.default_print_len &&
    1689              15 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1690              12 :         {
    1691                 :             /* fieldno is 0-indexed and attnum is 1-indexed */
    1692              15 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1693                 : 
    1694              15 :             if (cstate->defexprs[m] != NULL)
    1695                 :             {
    1696                 :                 /* defaults contain entries for all physical attributes */
    1697              12 :                 cstate->defaults[m] = true;
    1698                 :             }
    1699                 :             else
    1700                 :             {
    1701               3 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    1702               3 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1703                 : 
    1704               3 :                 ereport(ERROR,
    1705                 :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1706                 :                          errmsg("unexpected DEFAULT in COPY data"),
    1707                 :                          errdetail("Column \"%s\" has no DEFAULT value.",
    1708                 :                                    NameStr(att->attname))));
    1709                 :             }
    1710                 :         }
    1711 EUB             :         else
    1712                 :         {
    1713                 :             /*
    1714                 :              * At this point we know the field is supposed to contain data.
    1715                 :              *
    1716                 :              * If we de-escaped any non-7-bit-ASCII chars, make sure the
    1717                 :              * resulting string is valid data for the db encoding.
    1718                 :              */
    1719 GIC     3449859 :             if (saw_non_ascii)
    1720                 :             {
    1721 UIC           0 :                 char       *fld = cstate->raw_fields[fieldno];
    1722 ECB             : 
    1723 UIC           0 :                 pg_verifymbstr(fld, output_ptr - fld, false);
    1724                 :             }
    1725                 :         }
    1726 ECB             : 
    1727                 :         /* Terminate attribute value in output area */
    1728 CBC     3452247 :         *output_ptr++ = '\0';
    1729 ECB             : 
    1730 GIC     3452247 :         fieldno++;
    1731 ECB             :         /* Done if we hit EOL instead of a delim */
    1732 CBC     3452247 :         if (!found_delim)
    1733          893800 :             break;
    1734 ECB             :     }
    1735                 : 
    1736                 :     /* Clean up state of attribute_buf */
    1737 CBC      893800 :     output_ptr--;
    1738 GIC      893800 :     Assert(*output_ptr == '\0');
    1739 CBC      893800 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    1740                 : 
    1741 GIC      893800 :     return fieldno;
    1742 ECB             : }
    1743                 : 
    1744                 : /*
    1745                 :  * Parse the current line into separate attributes (fields),
    1746                 :  * performing de-escaping as needed.  This has exactly the same API as
    1747                 :  * CopyReadAttributesText, except we parse the fields according to
    1748                 :  * "standard" (i.e. common) CSV usage.
    1749                 :  */
    1750                 : static int
    1751 GIC         193 : CopyReadAttributesCSV(CopyFromState cstate)
    1752                 : {
    1753             193 :     char        delimc = cstate->opts.delim[0];
    1754             193 :     char        quotec = cstate->opts.quote[0];
    1755             193 :     char        escapec = cstate->opts.escape[0];
    1756                 :     int         fieldno;
    1757                 :     char       *output_ptr;
    1758                 :     char       *cur_ptr;
    1759                 :     char       *line_end_ptr;
    1760                 : 
    1761                 :     /*
    1762                 :      * We need a special case for zero-column tables: check that the input
    1763                 :      * line is empty, and return.
    1764 ECB             :      */
    1765 GIC         193 :     if (cstate->max_fields <= 0)
    1766 EUB             :     {
    1767 UIC           0 :         if (cstate->line_buf.len != 0)
    1768 UBC           0 :             ereport(ERROR,
    1769                 :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1770                 :                      errmsg("extra data after last expected column")));
    1771 UIC           0 :         return 0;
    1772                 :     }
    1773 ECB             : 
    1774 GIC         193 :     resetStringInfo(&cstate->attribute_buf);
    1775 ECB             : 
    1776                 :     /*
    1777                 :      * The de-escaped attributes will certainly not be longer than the input
    1778                 :      * data line, so we can just force attribute_buf to be large enough and
    1779                 :      * then transfer data without any checks for enough space.  We need to do
    1780                 :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1781                 :      * pointers already stored into cstate->raw_fields[].
    1782                 :      */
    1783 CBC         193 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1784 LBC           0 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1785 GIC         193 :     output_ptr = cstate->attribute_buf.data;
    1786 ECB             : 
    1787                 :     /* set pointer variables for loop */
    1788 GIC         193 :     cur_ptr = cstate->line_buf.data;
    1789             193 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1790                 : 
    1791                 :     /* Outer loop iterates over fields */
    1792             193 :     fieldno = 0;
    1793                 :     for (;;)
    1794             227 :     {
    1795             420 :         bool        found_delim = false;
    1796 CBC         420 :         bool        saw_quote = false;
    1797                 :         char       *start_ptr;
    1798 ECB             :         char       *end_ptr;
    1799                 :         int         input_len;
    1800                 : 
    1801                 :         /* Make sure there is enough space for the next value */
    1802 GIC         420 :         if (fieldno >= cstate->max_fields)
    1803                 :         {
    1804 UIC           0 :             cstate->max_fields *= 2;
    1805               0 :             cstate->raw_fields =
    1806               0 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1807                 :         }
    1808                 : 
    1809                 :         /* Remember start of field on both input and output sides */
    1810 CBC         420 :         start_ptr = cur_ptr;
    1811 GIC         420 :         cstate->raw_fields[fieldno] = output_ptr;
    1812 EUB             : 
    1813                 :         /*
    1814                 :          * Scan data for field,
    1815                 :          *
    1816                 :          * The loop starts in "not quote" mode and then toggles between that
    1817                 :          * and "in quote" mode. The loop exits normally if it is in "not
    1818                 :          * quote" mode and a delimiter or line end is seen.
    1819 ECB             :          */
    1820                 :         for (;;)
    1821 GIC          79 :         {
    1822                 :             char        c;
    1823                 : 
    1824                 :             /* Not in quote */
    1825                 :             for (;;)
    1826                 :             {
    1827            1385 :                 end_ptr = cur_ptr;
    1828 CBC        1385 :                 if (cur_ptr >= line_end_ptr)
    1829 GBC         190 :                     goto endfield;
    1830 CBC        1195 :                 c = *cur_ptr++;
    1831                 :                 /* unquoted field delimiter */
    1832 GIC        1195 :                 if (c == delimc)
    1833 ECB             :                 {
    1834 CBC         230 :                     found_delim = true;
    1835 GIC         230 :                     goto endfield;
    1836                 :                 }
    1837 ECB             :                 /* start of quoted field (or part of field) */
    1838 GIC         965 :                 if (c == quotec)
    1839 ECB             :                 {
    1840 CBC          79 :                     saw_quote = true;
    1841              79 :                     break;
    1842                 :                 }
    1843                 :                 /* Add c to output string */
    1844 GIC         886 :                 *output_ptr++ = c;
    1845                 :             }
    1846                 : 
    1847 ECB             :             /* In quote */
    1848                 :             for (;;)
    1849 EUB             :             {
    1850 GBC         570 :                 end_ptr = cur_ptr;
    1851             570 :                 if (cur_ptr >= line_end_ptr)
    1852 UIC           0 :                     ereport(ERROR,
    1853                 :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1854                 :                              errmsg("unterminated CSV quoted field")));
    1855 ECB             : 
    1856 CBC         570 :                 c = *cur_ptr++;
    1857                 : 
    1858                 :                 /* escape within a quoted field */
    1859 GIC         570 :                 if (c == escapec)
    1860                 :                 {
    1861                 :                     /*
    1862                 :                      * peek at the next char if available, and escape it if it
    1863                 :                      * is an escape char or a quote char
    1864                 :                      */
    1865              47 :                     if (cur_ptr < line_end_ptr)
    1866 ECB             :                     {
    1867 GIC          33 :                         char        nextc = *cur_ptr;
    1868                 : 
    1869              33 :                         if (nextc == escapec || nextc == quotec)
    1870                 :                         {
    1871              12 :                             *output_ptr++ = nextc;
    1872 CBC          12 :                             cur_ptr++;
    1873              12 :                             continue;
    1874 ECB             :                         }
    1875                 :                     }
    1876                 :                 }
    1877                 : 
    1878                 :                 /*
    1879                 :                  * end of quoted field. Must do this test after testing for
    1880                 :                  * escape in case quote char and escape char are the same
    1881                 :                  * (which is the common case).
    1882                 :                  */
    1883 CBC         558 :                 if (c == quotec)
    1884 GIC          79 :                     break;
    1885 ECB             : 
    1886                 :                 /* Add c to output string */
    1887 GIC         479 :                 *output_ptr++ = c;
    1888                 :             }
    1889 ECB             :         }
    1890 GIC         420 : endfield:
    1891                 : 
    1892                 :         /* Terminate attribute value in output area */
    1893             420 :         *output_ptr++ = '\0';
    1894                 : 
    1895 ECB             :         /* Check whether raw input matched null marker */
    1896 CBC         420 :         input_len = end_ptr - start_ptr;
    1897 GBC         420 :         if (!saw_quote && input_len == cstate->opts.null_print_len &&
    1898 GIC          16 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1899              16 :             cstate->raw_fields[fieldno] = NULL;
    1900                 :         /* Check whether raw input matched default marker */
    1901 GNC         404 :         else if (fieldno < list_length(cstate->attnumlist) &&
    1902             404 :                  cstate->opts.default_print &&
    1903              75 :                  input_len == cstate->opts.default_print_len &&
    1904              21 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1905                 :         {
    1906                 :             /* fieldno is 0-index and attnum is 1-index */
    1907              21 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1908                 : 
    1909              21 :             if (cstate->defexprs[m] != NULL)
    1910                 :             {
    1911                 :                 /* defaults contain entries for all physical attributes */
    1912              18 :                 cstate->defaults[m] = true;
    1913                 :             }
    1914                 :             else
    1915                 :             {
    1916               3 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    1917               3 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1918                 : 
    1919               3 :                 ereport(ERROR,
    1920                 :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1921                 :                          errmsg("unexpected DEFAULT in COPY data"),
    1922                 :                          errdetail("Column \"%s\" has no DEFAULT value.",
    1923                 :                                    NameStr(att->attname))));
    1924                 :             }
    1925                 :         }
    1926                 : 
    1927 CBC         417 :         fieldno++;
    1928                 :         /* Done if we hit EOL instead of a delim */
    1929 GIC         417 :         if (!found_delim)
    1930 CBC         190 :             break;
    1931                 :     }
    1932                 : 
    1933                 :     /* Clean up state of attribute_buf */
    1934 GIC         190 :     output_ptr--;
    1935             190 :     Assert(*output_ptr == '\0');
    1936 CBC         190 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    1937                 : 
    1938             190 :     return fieldno;
    1939                 : }
    1940 ECB             : 
    1941                 : 
    1942                 : /*
    1943                 :  * Read a binary attribute
    1944                 :  */
    1945                 : static Datum
    1946 GIC          79 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
    1947                 :                         Oid typioparam, int32 typmod,
    1948                 :                         bool *isnull)
    1949                 : {
    1950                 :     int32       fld_size;
    1951                 :     Datum       result;
    1952                 : 
    1953              79 :     if (!CopyGetInt32(cstate, &fld_size))
    1954 LBC           0 :         ereport(ERROR,
    1955 ECB             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1956                 :                  errmsg("unexpected EOF in COPY data")));
    1957 GIC          79 :     if (fld_size == -1)
    1958 ECB             :     {
    1959 GIC          15 :         *isnull = true;
    1960              15 :         return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    1961 ECB             :     }
    1962 GIC          64 :     if (fld_size < 0)
    1963 UIC           0 :         ereport(ERROR,
    1964 ECB             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1965                 :                  errmsg("invalid field size")));
    1966                 : 
    1967                 :     /* reset attribute_buf to empty, and load raw data in it */
    1968 CBC          64 :     resetStringInfo(&cstate->attribute_buf);
    1969 ECB             : 
    1970 CBC          64 :     enlargeStringInfo(&cstate->attribute_buf, fld_size);
    1971 GIC          64 :     if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
    1972 CBC          64 :                            fld_size) != fld_size)
    1973 LBC           0 :         ereport(ERROR,
    1974 ECB             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1975                 :                  errmsg("unexpected EOF in COPY data")));
    1976                 : 
    1977 GIC          64 :     cstate->attribute_buf.len = fld_size;
    1978 CBC          64 :     cstate->attribute_buf.data[fld_size] = '\0';
    1979                 : 
    1980 ECB             :     /* Call the column type's binary input converter */
    1981 GIC          64 :     result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
    1982                 :                                  typioparam, typmod);
    1983 ECB             : 
    1984                 :     /* Trouble if it didn't eat the whole buffer */
    1985 GIC          64 :     if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
    1986               1 :         ereport(ERROR,
    1987 ECB             :                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    1988                 :                  errmsg("incorrect binary data format")));
    1989                 : 
    1990 CBC          63 :     *isnull = false;
    1991 GIC          63 :     return result;
    1992                 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a