LCOV - differential code coverage report
Current view: top level - src/backend/storage/file - buffile.c (source / functions) Coverage Total Hit UNC LBC UIC UBC GBC GIC GNC CBC EUB ECB DUB DCB
Current: Differential Code Coverage HEAD vs 15 Lines: 77.4 % 327 253 7 11 44 12 19 157 15 62 42 175 1 5
Current Date: 2023-04-08 15:15:32 Functions: 92.0 % 25 23 1 1 19 4 2 23
Baseline: 15
Baseline Date: 2023-04-08 15:09:40
Legend: Lines: hit not hit

           TLA  Line data    Source code
       1                 : /*-------------------------------------------------------------------------
       2                 :  *
       3                 :  * buffile.c
       4                 :  *    Management of large buffered temporary files.
       5                 :  *
       6                 :  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
       7                 :  * Portions Copyright (c) 1994, Regents of the University of California
       8                 :  *
       9                 :  * IDENTIFICATION
      10                 :  *    src/backend/storage/file/buffile.c
      11                 :  *
      12                 :  * NOTES:
      13                 :  *
      14                 :  * BufFiles provide a very incomplete emulation of stdio atop virtual Files
      15                 :  * (as managed by fd.c).  Currently, we only support the buffered-I/O
      16                 :  * aspect of stdio: a read or write of the low-level File occurs only
      17                 :  * when the buffer is filled or emptied.  This is an even bigger win
      18                 :  * for virtual Files than for ordinary kernel files, since reducing the
      19                 :  * frequency with which a virtual File is touched reduces "thrashing"
      20                 :  * of opening/closing file descriptors.
      21                 :  *
      22                 :  * Note that BufFile structs are allocated with palloc(), and therefore
      23                 :  * will go away automatically at query/transaction end.  Since the underlying
      24                 :  * virtual Files are made with OpenTemporaryFile, all resources for
      25                 :  * the file are certain to be cleaned up even if processing is aborted
      26                 :  * by ereport(ERROR).  The data structures required are made in the
      27                 :  * palloc context that was current when the BufFile was created, and
      28                 :  * any external resources such as temp files are owned by the ResourceOwner
      29                 :  * that was current at that time.
      30                 :  *
      31                 :  * BufFile also supports temporary files that exceed the OS file size limit
      32                 :  * (by opening multiple fd.c temporary files).  This is an essential feature
      33                 :  * for sorts and hashjoins on large amounts of data.
      34                 :  *
      35                 :  * BufFile supports temporary files that can be shared with other backends, as
      36                 :  * infrastructure for parallel execution.  Such files need to be created as a
      37                 :  * member of a SharedFileSet that all participants are attached to.
      38                 :  *
      39                 :  * BufFile also supports temporary files that can be used by the single backend
      40                 :  * when the corresponding files need to be survived across the transaction and
      41                 :  * need to be opened and closed multiple times.  Such files need to be created
      42                 :  * as a member of a FileSet.
      43                 :  *-------------------------------------------------------------------------
      44                 :  */
      45                 : 
      46                 : #include "postgres.h"
      47                 : 
      48                 : #include "commands/tablespace.h"
      49                 : #include "executor/instrument.h"
      50                 : #include "miscadmin.h"
      51                 : #include "pgstat.h"
      52                 : #include "storage/buf_internals.h"
      53                 : #include "storage/buffile.h"
      54                 : #include "storage/fd.h"
      55                 : #include "utils/resowner.h"
      56                 : 
      57                 : /*
      58                 :  * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE.
      59                 :  * The reason is that we'd like large BufFiles to be spread across multiple
      60                 :  * tablespaces when available.
      61                 :  */
      62                 : #define MAX_PHYSICAL_FILESIZE   0x40000000
      63                 : #define BUFFILE_SEG_SIZE        (MAX_PHYSICAL_FILESIZE / BLCKSZ)
      64                 : 
      65                 : /*
      66                 :  * This data structure represents a buffered file that consists of one or
      67                 :  * more physical files (each accessed through a virtual file descriptor
      68                 :  * managed by fd.c).
      69                 :  */
      70                 : struct BufFile
      71                 : {
      72                 :     int         numFiles;       /* number of physical files in set */
      73                 :     /* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
      74                 :     File       *files;          /* palloc'd array with numFiles entries */
      75                 : 
      76                 :     bool        isInterXact;    /* keep open over transactions? */
      77                 :     bool        dirty;          /* does buffer need to be written? */
      78                 :     bool        readOnly;       /* has the file been set to read only? */
      79                 : 
      80                 :     FileSet    *fileset;        /* space for fileset based segment files */
      81                 :     const char *name;           /* name of fileset based BufFile */
      82                 : 
      83                 :     /*
      84                 :      * resowner is the ResourceOwner to use for underlying temp files.  (We
      85                 :      * don't need to remember the memory context we're using explicitly,
      86                 :      * because after creation we only repalloc our arrays larger.)
      87                 :      */
      88                 :     ResourceOwner resowner;
      89                 : 
      90                 :     /*
      91                 :      * "current pos" is position of start of buffer within the logical file.
      92                 :      * Position as seen by user of BufFile is (curFile, curOffset + pos).
      93                 :      */
      94                 :     int         curFile;        /* file index (0..n) part of current pos */
      95                 :     off_t       curOffset;      /* offset part of current pos */
      96                 :     int         pos;            /* next read/write position in buffer */
      97                 :     int         nbytes;         /* total # of valid bytes in buffer */
      98                 : 
      99                 :     /*
     100                 :      * XXX Should ideally us PGIOAlignedBlock, but might need a way to avoid
     101                 :      * wasting per-file alignment padding when some users create many
     102                 :      * files.
     103                 :      */
     104                 :     PGAlignedBlock buffer;
     105                 : };
     106                 : 
     107                 : static BufFile *makeBufFileCommon(int nfiles);
     108                 : static BufFile *makeBufFile(File firstfile);
     109                 : static void extendBufFile(BufFile *file);
     110                 : static void BufFileLoadBuffer(BufFile *file);
     111                 : static void BufFileDumpBuffer(BufFile *file);
     112                 : static void BufFileFlush(BufFile *file);
     113                 : static File MakeNewFileSetSegment(BufFile *buffile, int segment);
     114                 : 
     115                 : /*
     116                 :  * Create BufFile and perform the common initialization.
     117                 :  */
     118                 : static BufFile *
     119 GIC        5116 : makeBufFileCommon(int nfiles)
     120                 : {
     121            5116 :     BufFile    *file = (BufFile *) palloc(sizeof(BufFile));
     122                 : 
     123            5116 :     file->numFiles = nfiles;
     124            5116 :     file->isInterXact = false;
     125 CBC        5116 :     file->dirty = false;
     126 GIC        5116 :     file->resowner = CurrentResourceOwner;
     127 CBC        5116 :     file->curFile = 0;
     128 GNC        5116 :     file->curOffset = 0;
     129 CBC        5116 :     file->pos = 0;
     130            5116 :     file->nbytes = 0;
     131 ECB             : 
     132 CBC        5116 :     return file;
     133 ECB             : }
     134                 : 
     135                 : /*
     136                 :  * Create a BufFile given the first underlying physical file.
     137                 :  * NOTE: caller must set isInterXact if appropriate.
     138                 :  */
     139                 : static BufFile *
     140 GIC        1783 : makeBufFile(File firstfile)
     141                 : {
     142            1783 :     BufFile    *file = makeBufFileCommon(1);
     143                 : 
     144            1783 :     file->files = (File *) palloc(sizeof(File));
     145            1783 :     file->files[0] = firstfile;
     146 CBC        1783 :     file->readOnly = false;
     147 GIC        1783 :     file->fileset = NULL;
     148 CBC        1783 :     file->name = NULL;
     149                 : 
     150            1783 :     return file;
     151 ECB             : }
     152                 : 
     153                 : /*
     154                 :  * Add another component temp file.
     155                 :  */
     156                 : static void
     157 UIC           0 : extendBufFile(BufFile *file)
     158                 : {
     159                 :     File        pfile;
     160                 :     ResourceOwner oldowner;
     161                 : 
     162                 :     /* Be sure to associate the file with the BufFile's resource owner */
     163 UBC           0 :     oldowner = CurrentResourceOwner;
     164 UIC           0 :     CurrentResourceOwner = file->resowner;
     165                 : 
     166               0 :     if (file->fileset == NULL)
     167               0 :         pfile = OpenTemporaryFile(file->isInterXact);
     168                 :     else
     169 UBC           0 :         pfile = MakeNewFileSetSegment(file, file->numFiles);
     170 EUB             : 
     171 UIC           0 :     Assert(pfile >= 0);
     172 EUB             : 
     173 UBC           0 :     CurrentResourceOwner = oldowner;
     174                 : 
     175               0 :     file->files = (File *) repalloc(file->files,
     176 UIC           0 :                                     (file->numFiles + 1) * sizeof(File));
     177 UBC           0 :     file->files[file->numFiles] = pfile;
     178 UIC           0 :     file->numFiles++;
     179 UBC           0 : }
     180                 : 
     181 EUB             : /*
     182                 :  * Create a BufFile for a new temporary file (which will expand to become
     183                 :  * multiple temporary files if more than MAX_PHYSICAL_FILESIZE bytes are
     184                 :  * written to it).
     185                 :  *
     186                 :  * If interXact is true, the temp file will not be automatically deleted
     187                 :  * at end of transaction.
     188                 :  *
     189                 :  * Note: if interXact is true, the caller had better be calling us in a
     190                 :  * memory context, and with a resource owner, that will survive across
     191                 :  * transaction boundaries.
     192                 :  */
     193                 : BufFile *
     194 GIC        1783 : BufFileCreateTemp(bool interXact)
     195                 : {
     196                 :     BufFile    *file;
     197                 :     File        pfile;
     198                 : 
     199                 :     /*
     200 ECB             :      * Ensure that temp tablespaces are set up for OpenTemporaryFile to use.
     201                 :      * Possibly the caller will have done this already, but it seems useful to
     202                 :      * double-check here.  Failure to do this at all would result in the temp
     203                 :      * files always getting placed in the default tablespace, which is a
     204                 :      * pretty hard-to-detect bug.  Callers may prefer to do it earlier if they
     205                 :      * want to be sure that any required catalog access is done in some other
     206                 :      * resource context.
     207                 :      */
     208 GIC        1783 :     PrepareTempTablespaces();
     209                 : 
     210            1783 :     pfile = OpenTemporaryFile(interXact);
     211            1783 :     Assert(pfile >= 0);
     212                 : 
     213            1783 :     file = makeBufFile(pfile);
     214 CBC        1783 :     file->isInterXact = interXact;
     215                 : 
     216            1783 :     return file;
     217 ECB             : }
     218                 : 
     219                 : /*
     220                 :  * Build the name for a given segment of a given BufFile.
     221                 :  */
     222                 : static void
     223 GIC        7326 : FileSetSegmentName(char *name, const char *buffile_name, int segment)
     224                 : {
     225            7326 :     snprintf(name, MAXPGPATH, "%s.%d", buffile_name, segment);
     226            7326 : }
     227                 : 
     228                 : /*
     229 ECB             :  * Create a new segment file backing a fileset based BufFile.
     230                 :  */
     231                 : static File
     232 CBC        1439 : MakeNewFileSetSegment(BufFile *buffile, int segment)
     233                 : {
     234                 :     char        name[MAXPGPATH];
     235                 :     File        file;
     236                 : 
     237                 :     /*
     238 ECB             :      * It is possible that there are files left over from before a crash
     239                 :      * restart with the same name.  In order for BufFileOpenFileSet() not to
     240                 :      * get confused about how many segments there are, we'll unlink the next
     241                 :      * segment number if it already exists.
     242                 :      */
     243 GIC        1439 :     FileSetSegmentName(name, buffile->name, segment + 1);
     244            1439 :     FileSetDelete(buffile->fileset, name, true);
     245                 : 
     246                 :     /* Create the new segment. */
     247            1439 :     FileSetSegmentName(name, buffile->name, segment);
     248            1439 :     file = FileSetCreate(buffile->fileset, name);
     249 ECB             : 
     250                 :     /* FileSetCreate would've errored out */
     251 GIC        1439 :     Assert(file > 0);
     252                 : 
     253 CBC        1439 :     return file;
     254 ECB             : }
     255                 : 
     256                 : /*
     257                 :  * Create a BufFile that can be discovered and opened read-only by other
     258                 :  * backends that are attached to the same SharedFileSet using the same name.
     259                 :  *
     260                 :  * The naming scheme for fileset based BufFiles is left up to the calling code.
     261                 :  * The name will appear as part of one or more filenames on disk, and might
     262                 :  * provide clues to administrators about which subsystem is generating
     263                 :  * temporary file data.  Since each SharedFileSet object is backed by one or
     264                 :  * more uniquely named temporary directory, names don't conflict with
     265                 :  * unrelated SharedFileSet objects.
     266                 :  */
     267                 : BufFile *
     268 GIC        1439 : BufFileCreateFileSet(FileSet *fileset, const char *name)
     269                 : {
     270                 :     BufFile    *file;
     271                 : 
     272            1439 :     file = makeBufFileCommon(1);
     273            1439 :     file->fileset = fileset;
     274 CBC        1439 :     file->name = pstrdup(name);
     275 GIC        1439 :     file->files = (File *) palloc(sizeof(File));
     276            1439 :     file->files[0] = MakeNewFileSetSegment(file, 0);
     277            1439 :     file->readOnly = false;
     278 ECB             : 
     279 CBC        1439 :     return file;
     280 ECB             : }
     281                 : 
     282                 : /*
     283                 :  * Open a file that was previously created in another backend (or this one)
     284                 :  * with BufFileCreateFileSet in the same FileSet using the same name.
     285                 :  * The backend that created the file must have called BufFileClose() or
     286                 :  * BufFileExportFileSet() to make sure that it is ready to be opened by other
     287                 :  * backends and render it read-only.  If missing_ok is true, which indicates
     288                 :  * that missing files can be safely ignored, then return NULL if the BufFile
     289                 :  * with the given name is not found, otherwise, throw an error.
     290                 :  */
     291                 : BufFile *
     292 GIC        2165 : BufFileOpenFileSet(FileSet *fileset, const char *name, int mode,
     293                 :                    bool missing_ok)
     294                 : {
     295                 :     BufFile    *file;
     296                 :     char        segment_name[MAXPGPATH];
     297            2165 :     Size        capacity = 16;
     298 ECB             :     File       *files;
     299 GIC        2165 :     int         nfiles = 0;
     300                 : 
     301            2165 :     files = palloc(sizeof(File) * capacity);
     302                 : 
     303 ECB             :     /*
     304                 :      * We don't know how many segments there are, so we'll probe the
     305                 :      * filesystem to find out.
     306                 :      */
     307                 :     for (;;)
     308                 :     {
     309                 :         /* See if we need to expand our file segment array. */
     310 GIC        4059 :         if (nfiles + 1 > capacity)
     311                 :         {
     312 UIC           0 :             capacity *= 2;
     313               0 :             files = repalloc(files, sizeof(File) * capacity);
     314                 :         }
     315                 :         /* Try to load a segment. */
     316 CBC        4059 :         FileSetSegmentName(segment_name, name, nfiles);
     317 GIC        4059 :         files[nfiles] = FileSetOpen(fileset, segment_name, mode);
     318 GBC        4059 :         if (files[nfiles] <= 0)
     319            2165 :             break;
     320 GIC        1894 :         ++nfiles;
     321                 : 
     322 CBC        1894 :         CHECK_FOR_INTERRUPTS();
     323 ECB             :     }
     324                 : 
     325                 :     /*
     326                 :      * If we didn't find any files at all, then no BufFile exists with this
     327                 :      * name.
     328                 :      */
     329 GIC        2165 :     if (nfiles == 0)
     330                 :     {
     331                 :         /* free the memory */
     332             271 :         pfree(files);
     333                 : 
     334             271 :         if (missing_ok)
     335 CBC         271 :             return NULL;
     336                 : 
     337 UIC           0 :         ereport(ERROR,
     338 ECB             :                 (errcode_for_file_access(),
     339                 :                  errmsg("could not open temporary file \"%s\" from BufFile \"%s\": %m",
     340                 :                         segment_name, name)));
     341                 :     }
     342                 : 
     343 GBC        1894 :     file = makeBufFileCommon(nfiles);
     344 GIC        1894 :     file->files = files;
     345            1894 :     file->readOnly = (mode == O_RDONLY);
     346            1894 :     file->fileset = fileset;
     347            1894 :     file->name = pstrdup(name);
     348                 : 
     349 CBC        1894 :     return file;
     350 ECB             : }
     351                 : 
     352                 : /*
     353                 :  * Delete a BufFile that was created by BufFileCreateFileSet in the given
     354                 :  * FileSet using the given name.
     355                 :  *
     356                 :  * It is not necessary to delete files explicitly with this function.  It is
     357                 :  * provided only as a way to delete files proactively, rather than waiting for
     358                 :  * the FileSet to be cleaned up.
     359                 :  *
     360                 :  * Only one backend should attempt to delete a given name, and should know
     361                 :  * that it exists and has been exported or closed otherwise missing_ok should
     362                 :  * be passed true.
     363                 :  */
     364                 : void
     365 GIC         350 : BufFileDeleteFileSet(FileSet *fileset, const char *name, bool missing_ok)
     366                 : {
     367                 :     char        segment_name[MAXPGPATH];
     368             350 :     int         segment = 0;
     369             350 :     bool        found = false;
     370                 : 
     371 ECB             :     /*
     372                 :      * We don't know how many segments the file has.  We'll keep deleting
     373                 :      * until we run out.  If we don't manage to find even an initial segment,
     374                 :      * raise an error.
     375                 :      */
     376                 :     for (;;)
     377                 :     {
     378 GIC         389 :         FileSetSegmentName(segment_name, name, segment);
     379             389 :         if (!FileSetDelete(fileset, segment_name, true))
     380             350 :             break;
     381              39 :         found = true;
     382              39 :         ++segment;
     383                 : 
     384 CBC          39 :         CHECK_FOR_INTERRUPTS();
     385 ECB             :     }
     386                 : 
     387 CBC         350 :     if (!found && !missing_ok)
     388 LBC           0 :         elog(ERROR, "could not delete unknown BufFile \"%s\"", name);
     389 GIC         350 : }
     390 ECB             : 
     391                 : /*
     392                 :  * BufFileExportFileSet --- flush and make read-only, in preparation for sharing.
     393                 :  */
     394 EUB             : void
     395 CBC         206 : BufFileExportFileSet(BufFile *file)
     396                 : {
     397                 :     /* Must be a file belonging to a FileSet. */
     398 GIC         206 :     Assert(file->fileset != NULL);
     399                 : 
     400                 :     /* It's probably a bug if someone calls this twice. */
     401 CBC         206 :     Assert(!file->readOnly);
     402                 : 
     403 GIC         206 :     BufFileFlush(file);
     404 CBC         206 :     file->readOnly = true;
     405 GIC         206 : }
     406                 : 
     407 ECB             : /*
     408                 :  * Close a BufFile
     409                 :  *
     410                 :  * Like fclose(), this also implicitly FileCloses the underlying File.
     411                 :  */
     412                 : void
     413 GIC        5036 : BufFileClose(BufFile *file)
     414                 : {
     415                 :     int         i;
     416                 : 
     417                 :     /* flush any unwritten data */
     418            5036 :     BufFileFlush(file);
     419 ECB             :     /* close and delete the underlying file(s) */
     420 GIC       10143 :     for (i = 0; i < file->numFiles; i++)
     421            5107 :         FileClose(file->files[i]);
     422                 :     /* release the buffer space */
     423            5036 :     pfree(file->files);
     424 CBC        5036 :     pfree(file);
     425 GIC        5036 : }
     426 ECB             : 
     427                 : /*
     428                 :  * BufFileLoadBuffer
     429                 :  *
     430                 :  * Load some data into buffer, if possible, starting from curOffset.
     431                 :  * At call, must have dirty = false, pos and nbytes = 0.
     432                 :  * On exit, nbytes is number of bytes loaded.
     433                 :  */
     434                 : static void
     435 GIC       54301 : BufFileLoadBuffer(BufFile *file)
     436                 : {
     437                 :     File        thisfile;
     438                 :     instr_time  io_start;
     439                 :     instr_time  io_time;
     440                 : 
     441 ECB             :     /*
     442                 :      * Advance to next component file if necessary and possible.
     443                 :      */
     444 GIC       54301 :     if (file->curOffset >= MAX_PHYSICAL_FILESIZE &&
     445 UIC           0 :         file->curFile + 1 < file->numFiles)
     446                 :     {
     447               0 :         file->curFile++;
     448 UNC           0 :         file->curOffset = 0;
     449                 :     }
     450 ECB             : 
     451 GBC       54301 :     thisfile = file->files[file->curFile];
     452                 : 
     453           54301 :     if (track_io_timing)
     454 UBC           0 :         INSTR_TIME_SET_CURRENT(io_start);
     455                 :     else
     456 GNC       54301 :         INSTR_TIME_SET_ZERO(io_start);
     457                 : 
     458                 :     /*
     459 ECB             :      * Read whatever we can get, up to a full bufferload.
     460                 :      */
     461 CBC      108602 :     file->nbytes = FileRead(thisfile,
     462 GBC       54301 :                             file->buffer.data,
     463                 :                             sizeof(file->buffer),
     464 ECB             :                             file->curOffset,
     465                 :                             WAIT_EVENT_BUFFILE_READ);
     466 GIC       54301 :     if (file->nbytes < 0)
     467                 :     {
     468 UIC           0 :         file->nbytes = 0;
     469 LBC           0 :         ereport(ERROR,
     470 ECB             :                 (errcode_for_file_access(),
     471                 :                  errmsg("could not read file \"%s\": %m",
     472                 :                         FilePathName(thisfile))));
     473                 :     }
     474                 : 
     475 GIC       54301 :     if (track_io_timing)
     476 EUB             :     {
     477 UBC           0 :         INSTR_TIME_SET_CURRENT(io_time);
     478 UNC           0 :         INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_read_time, io_time, io_start);
     479                 :     }
     480                 : 
     481                 :     /* we choose not to advance curOffset here */
     482 ECB             : 
     483 GIC       54301 :     if (file->nbytes > 0)
     484 GBC       52717 :         pgBufferUsage.temp_blks_read++;
     485           54301 : }
     486                 : 
     487                 : /*
     488                 :  * BufFileDumpBuffer
     489                 :  *
     490 ECB             :  * Dump buffer contents starting at curOffset.
     491                 :  * At call, should have dirty = true, nbytes > 0.
     492                 :  * On exit, dirty is cleared if successful write, and curOffset is advanced.
     493                 :  */
     494                 : static void
     495 GIC       59021 : BufFileDumpBuffer(BufFile *file)
     496                 : {
     497           59021 :     int         wpos = 0;
     498                 :     int         bytestowrite;
     499                 :     File        thisfile;
     500                 : 
     501                 :     /*
     502 ECB             :      * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it
     503                 :      * crosses a component-file boundary; so we need a loop.
     504                 :      */
     505 GIC      118042 :     while (wpos < file->nbytes)
     506                 :     {
     507                 :         off_t       availbytes;
     508                 :         instr_time  io_start;
     509                 :         instr_time  io_time;
     510                 : 
     511                 :         /*
     512 ECB             :          * Advance to next component file if necessary and possible.
     513                 :          */
     514 GIC       59021 :         if (file->curOffset >= MAX_PHYSICAL_FILESIZE)
     515                 :         {
     516 UIC           0 :             while (file->curFile + 1 >= file->numFiles)
     517               0 :                 extendBufFile(file);
     518               0 :             file->curFile++;
     519 UNC           0 :             file->curOffset = 0;
     520                 :         }
     521 ECB             : 
     522                 :         /*
     523 EUB             :          * Determine how much we need to write into this file.
     524                 :          */
     525 GBC       59021 :         bytestowrite = file->nbytes - wpos;
     526           59021 :         availbytes = MAX_PHYSICAL_FILESIZE - file->curOffset;
     527                 : 
     528 GIC       59021 :         if ((off_t) bytestowrite > availbytes)
     529 UIC           0 :             bytestowrite = (int) availbytes;
     530                 : 
     531 GIC       59021 :         thisfile = file->files[file->curFile];
     532 ECB             : 
     533 CBC       59021 :         if (track_io_timing)
     534 UIC           0 :             INSTR_TIME_SET_CURRENT(io_start);
     535                 :         else
     536 GNC       59021 :             INSTR_TIME_SET_ZERO(io_start);
     537 ECB             : 
     538 GBC       59021 :         bytestowrite = FileWrite(thisfile,
     539 GIC       59021 :                                  file->buffer.data + wpos,
     540 ECB             :                                  bytestowrite,
     541                 :                                  file->curOffset,
     542                 :                                  WAIT_EVENT_BUFFILE_WRITE);
     543 GBC       59021 :         if (bytestowrite <= 0)
     544 UIC           0 :             ereport(ERROR,
     545 ECB             :                     (errcode_for_file_access(),
     546                 :                      errmsg("could not write to file \"%s\": %m",
     547                 :                             FilePathName(thisfile))));
     548                 : 
     549 GIC       59021 :         if (track_io_timing)
     550                 :         {
     551 UIC           0 :             INSTR_TIME_SET_CURRENT(io_time);
     552 UNC           0 :             INSTR_TIME_ACCUM_DIFF(pgBufferUsage.temp_blk_write_time, io_time, io_start);
     553                 :         }
     554                 : 
     555 GIC       59021 :         file->curOffset += bytestowrite;
     556           59021 :         wpos += bytestowrite;
     557 ECB             : 
     558 GIC       59021 :         pgBufferUsage.temp_blks_written++;
     559 EUB             :     }
     560 GBC       59021 :     file->dirty = false;
     561                 : 
     562                 :     /*
     563 ECB             :      * At this point, curOffset has been advanced to the end of the buffer,
     564                 :      * ie, its original value + nbytes.  We need to make it point to the
     565                 :      * logical file position, ie, original value + pos, in case that is less
     566                 :      * (as could happen due to a small backwards seek in a dirty buffer!)
     567                 :      */
     568 CBC       59021 :     file->curOffset -= (file->nbytes - file->pos);
     569 GIC       59021 :     if (file->curOffset < 0)  /* handle possible segment crossing */
     570                 :     {
     571 UIC           0 :         file->curFile--;
     572               0 :         Assert(file->curFile >= 0);
     573               0 :         file->curOffset += MAX_PHYSICAL_FILESIZE;
     574                 :     }
     575                 : 
     576 ECB             :     /*
     577                 :      * Now we can set the buffer empty without changing the logical position
     578                 :      */
     579 GBC       59021 :     file->pos = 0;
     580           59021 :     file->nbytes = 0;
     581           59021 : }
     582                 : 
     583                 : /*
     584                 :  * BufFileRead variants
     585                 :  *
     586                 :  * Like fread() except we assume 1-byte element size and report I/O errors via
     587 ECB             :  * ereport().
     588                 :  *
     589                 :  * If 'exact' is true, then an error is also raised if the number of bytes
     590                 :  * read is not exactly 'size' (no short reads).  If 'exact' and 'eofOK' are
     591                 :  * true, then reading zero bytes is ok.
     592                 :  */
     593                 : static size_t
     594 GNC    16167292 : BufFileReadCommon(BufFile *file, void *ptr, size_t size, bool exact, bool eofOK)
     595                 : {
     596        16167292 :     size_t      start_size = size;
     597 GIC    16167292 :     size_t      nread = 0;
     598                 :     size_t      nthistime;
     599                 : 
     600        16167292 :     BufFileFlush(file);
     601                 : 
     602        32348235 :     while (size > 0)
     603                 :     {
     604        16182527 :         if (file->pos >= file->nbytes)
     605                 :         {
     606                 :             /* Try to load more data into buffer. */
     607 CBC       54301 :             file->curOffset += file->pos;
     608 GIC       54301 :             file->pos = 0;
     609 CBC       54301 :             file->nbytes = 0;
     610           54301 :             BufFileLoadBuffer(file);
     611 GIC       54301 :             if (file->nbytes <= 0)
     612            1584 :                 break;          /* no more data available */
     613 ECB             :         }
     614                 : 
     615 CBC    16180943 :         nthistime = file->nbytes - file->pos;
     616 GIC    16180943 :         if (nthistime > size)
     617 CBC    16129816 :             nthistime = size;
     618 GIC    16180943 :         Assert(nthistime > 0);
     619                 : 
     620 CBC    16180943 :         memcpy(ptr, file->buffer.data + file->pos, nthistime);
     621 ECB             : 
     622 CBC    16180943 :         file->pos += nthistime;
     623 GNC    16180943 :         ptr = (char *) ptr + nthistime;
     624 CBC    16180943 :         size -= nthistime;
     625        16180943 :         nread += nthistime;
     626                 :     }
     627                 : 
     628 GNC    16167292 :     if (exact &&
     629            1584 :         (nread != start_size && !(nread == 0 && eofOK)))
     630 UNC           0 :         ereport(ERROR,
     631                 :                 errcode_for_file_access(),
     632                 :                 file->name ?
     633                 :                 errmsg("could not read from file set \"%s\": read only %zu of %zu bytes",
     634                 :                        file->name, nread, start_size) :
     635                 :                 errmsg("could not read from temporary file: read only %zu of %zu bytes",
     636                 :                        nread, start_size));
     637                 : 
     638 CBC    16167292 :     return nread;
     639 ECB             : }
     640                 : 
     641                 : /*
     642                 :  * Legacy interface where the caller needs to check for end of file or short
     643                 :  * reads.
     644                 :  */
     645                 : size_t
     646 UNC           0 : BufFileRead(BufFile *file, void *ptr, size_t size)
     647                 : {
     648               0 :     return BufFileReadCommon(file, ptr, size, false, false);
     649                 : }
     650                 : 
     651                 : /*
     652                 :  * Require read of exactly the specified size.
     653                 :  */
     654                 : void
     655 GNC     9988299 : BufFileReadExact(BufFile *file, void *ptr, size_t size)
     656                 : {
     657         9988299 :     BufFileReadCommon(file, ptr, size, true, false);
     658         9988299 : }
     659                 : 
     660                 : /*
     661                 :  * Require read of exactly the specified size, but optionally allow end of
     662                 :  * file (in which case 0 is returned).
     663                 :  */
     664                 : size_t
     665         6178993 : BufFileReadMaybeEOF(BufFile *file, void *ptr, size_t size, bool eofOK)
     666                 : {
     667         6178993 :     return BufFileReadCommon(file, ptr, size, true, eofOK);
     668                 : }
     669                 : 
     670 ECB             : /*
     671                 :  * BufFileWrite
     672                 :  *
     673                 :  * Like fwrite() except we assume 1-byte element size and report errors via
     674                 :  * ereport().
     675                 :  */
     676                 : void
     677 GNC    13197936 : BufFileWrite(BufFile *file, const void *ptr, size_t size)
     678                 : {
     679                 :     size_t      nthistime;
     680 ECB             : 
     681 CBC    13197936 :     Assert(!file->readOnly);
     682 EUB             : 
     683 GIC    26418619 :     while (size > 0)
     684                 :     {
     685        13220683 :         if (file->pos >= BLCKSZ)
     686                 :         {
     687                 :             /* Buffer full, dump it out */
     688           38068 :             if (file->dirty)
     689           37600 :                 BufFileDumpBuffer(file);
     690 ECB             :             else
     691                 :             {
     692                 :                 /* Hmm, went directly from reading to writing? */
     693 GIC         468 :                 file->curOffset += file->pos;
     694             468 :                 file->pos = 0;
     695             468 :                 file->nbytes = 0;
     696                 :             }
     697                 :         }
     698 EUB             : 
     699 GIC    13220683 :         nthistime = BLCKSZ - file->pos;
     700 GBC    13220683 :         if (nthistime > size)
     701 GIC    13163779 :             nthistime = size;
     702        13220683 :         Assert(nthistime > 0);
     703                 : 
     704        13220683 :         memcpy(file->buffer.data + file->pos, ptr, nthistime);
     705                 : 
     706        13220683 :         file->dirty = true;
     707 CBC    13220683 :         file->pos += nthistime;
     708 GIC    13220683 :         if (file->nbytes < file->pos)
     709 CBC    13218748 :             file->nbytes = file->pos;
     710 GNC    13220683 :         ptr = (const char *) ptr + nthistime;
     711 GIC    13220683 :         size -= nthistime;
     712                 :     }
     713        13197936 : }
     714                 : 
     715                 : /*
     716                 :  * BufFileFlush
     717 ECB             :  *
     718                 :  * Like fflush(), except that I/O errors are reported with ereport().
     719                 :  */
     720                 : static void
     721 GIC    16201345 : BufFileFlush(BufFile *file)
     722                 : {
     723        16201345 :     if (file->dirty)
     724           21421 :         BufFileDumpBuffer(file);
     725                 : 
     726        16201345 :     Assert(!file->dirty);
     727        16201345 : }
     728                 : 
     729 ECB             : /*
     730                 :  * BufFileSeek
     731                 :  *
     732                 :  * Like fseek(), except that target position needs two values in order to
     733                 :  * work when logical filesize exceeds maximum value representable by off_t.
     734                 :  * We do not support relative seeks across more than that, however.
     735                 :  * I/O errors are reported by ereport().
     736                 :  *
     737                 :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     738                 :  * impossible seek is attempted.
     739                 :  */
     740                 : int
     741 CBC       58492 : BufFileSeek(BufFile *file, int fileno, off_t offset, int whence)
     742                 : {
     743                 :     int         newFile;
     744                 :     off_t       newOffset;
     745 ECB             : 
     746 CBC       58492 :     switch (whence)
     747 ECB             :     {
     748 GIC       58144 :         case SEEK_SET:
     749           58144 :             if (fileno < 0)
     750 UIC           0 :                 return EOF;
     751 CBC       58144 :             newFile = fileno;
     752           58144 :             newOffset = offset;
     753           58144 :             break;
     754              15 :         case SEEK_CUR:
     755                 : 
     756 ECB             :             /*
     757                 :              * Relative seek considers only the signed offset, ignoring
     758                 :              * fileno. Note that large offsets (> 1 GB) risk overflow in this
     759                 :              * add, unless we have 64-bit off_t.
     760                 :              */
     761 CBC          15 :             newFile = file->curFile;
     762              15 :             newOffset = (file->curOffset + file->pos) + offset;
     763              15 :             break;
     764 GIC         333 :         case SEEK_END:
     765 ECB             : 
     766                 :             /*
     767                 :              * The file size of the last file gives us the end offset of that
     768                 :              * file.
     769                 :              */
     770 GIC         333 :             newFile = file->numFiles - 1;
     771             333 :             newOffset = FileSize(file->files[file->numFiles - 1]);
     772             333 :             if (newOffset < 0)
     773 LBC           0 :                 ereport(ERROR,
     774                 :                         (errcode_for_file_access(),
     775 ECB             :                          errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     776                 :                                 FilePathName(file->files[file->numFiles - 1]),
     777                 :                                 file->name)));
     778 CBC         333 :             break;
     779 LBC           0 :         default:
     780 UIC           0 :             elog(ERROR, "invalid whence: %d", whence);
     781                 :             return EOF;
     782                 :     }
     783 GIC       58492 :     while (newOffset < 0)
     784                 :     {
     785 UIC           0 :         if (--newFile < 0)
     786               0 :             return EOF;
     787               0 :         newOffset += MAX_PHYSICAL_FILESIZE;
     788                 :     }
     789 GIC       58492 :     if (newFile == file->curFile &&
     790           58421 :         newOffset >= file->curOffset &&
     791           42591 :         newOffset <= file->curOffset + file->nbytes)
     792                 :     {
     793 ECB             :         /*
     794                 :          * Seek is to a point within existing buffer; we can just adjust
     795                 :          * pos-within-buffer, without flushing buffer.  Note this is OK
     796                 :          * whether reading or writing, but buffer remains dirty if we were
     797                 :          * writing.
     798                 :          */
     799 GIC       29681 :         file->pos = (int) (newOffset - file->curOffset);
     800 CBC       29681 :         return 0;
     801 ECB             :     }
     802 EUB             :     /* Otherwise, must reposition buffer, so flush any dirty data */
     803 CBC       28811 :     BufFileFlush(file);
     804 ECB             : 
     805                 :     /*
     806                 :      * At this point and no sooner, check for seek past last segment. The
     807                 :      * above flush could have created a new segment, so checking sooner would
     808                 :      * not work (at least not with this code).
     809                 :      */
     810                 : 
     811                 :     /* convert seek to "start of next seg" to "end of last seg" */
     812 GIC       28811 :     if (newFile == file->numFiles && newOffset == 0)
     813 ECB             :     {
     814 LBC           0 :         newFile--;
     815               0 :         newOffset = MAX_PHYSICAL_FILESIZE;
     816 ECB             :     }
     817 GIC       28811 :     while (newOffset > MAX_PHYSICAL_FILESIZE)
     818                 :     {
     819 UIC           0 :         if (++newFile >= file->numFiles)
     820               0 :             return EOF;
     821               0 :         newOffset -= MAX_PHYSICAL_FILESIZE;
     822 ECB             :     }
     823 CBC       28811 :     if (newFile >= file->numFiles)
     824 LBC           0 :         return EOF;
     825 EUB             :     /* Seek is OK! */
     826 GIC       28811 :     file->curFile = newFile;
     827           28811 :     file->curOffset = newOffset;
     828           28811 :     file->pos = 0;
     829           28811 :     file->nbytes = 0;
     830 CBC       28811 :     return 0;
     831 EUB             : }
     832                 : 
     833                 : void
     834 GIC       88619 : BufFileTell(BufFile *file, int *fileno, off_t *offset)
     835 ECB             : {
     836 GIC       88619 :     *fileno = file->curFile;
     837 GBC       88619 :     *offset = file->curOffset + file->pos;
     838           88619 : }
     839 EUB             : 
     840                 : /*
     841 ECB             :  * BufFileSeekBlock --- block-oriented seek
     842                 :  *
     843                 :  * Performs absolute seek to the start of the n'th BLCKSZ-sized block of
     844                 :  * the file.  Note that users of this interface will fail if their files
     845                 :  * exceed BLCKSZ * LONG_MAX bytes, but that is quite a lot; we don't work
     846                 :  * with tables bigger than that, either...
     847                 :  *
     848                 :  * Result is 0 if OK, EOF if not.  Logical position is not moved if an
     849                 :  * impossible seek is attempted.
     850                 :  */
     851                 : int
     852 CBC       56461 : BufFileSeekBlock(BufFile *file, long blknum)
     853                 : {
     854 GIC      112922 :     return BufFileSeek(file,
     855 CBC       56461 :                        (int) (blknum / BUFFILE_SEG_SIZE),
     856 GIC       56461 :                        (off_t) (blknum % BUFFILE_SEG_SIZE) * BLCKSZ,
     857                 :                        SEEK_SET);
     858                 : }
     859                 : 
     860                 : #ifdef NOT_USED
     861                 : /*
     862                 :  * BufFileTellBlock --- block-oriented tell
     863                 :  *
     864 ECB             :  * Any fractional part of a block in the current seek position is ignored.
     865                 :  */
     866 EUB             : long
     867                 : BufFileTellBlock(BufFile *file)
     868                 : {
     869 ECB             :     long        blknum;
     870                 : 
     871 EUB             :     blknum = (file->curOffset + file->pos) / BLCKSZ;
     872                 :     blknum += file->curFile * BUFFILE_SEG_SIZE;
     873                 :     return blknum;
     874                 : }
     875 ECB             : 
     876 EUB             : #endif
     877                 : 
     878 ECB             : /*
     879                 :  * Return the current fileset based BufFile size.
     880                 :  *
     881                 :  * Counts any holes left behind by BufFileAppend as part of the size.
     882                 :  * ereport()s on failure.
     883                 :  */
     884                 : int64
     885 GIC         142 : BufFileSize(BufFile *file)
     886 ECB             : {
     887                 :     int64       lastFileSize;
     888                 : 
     889 CBC         142 :     Assert(file->fileset != NULL);
     890 ECB             : 
     891                 :     /* Get the size of the last physical file. */
     892 GIC         142 :     lastFileSize = FileSize(file->files[file->numFiles - 1]);
     893             142 :     if (lastFileSize < 0)
     894 UIC           0 :         ereport(ERROR,
     895                 :                 (errcode_for_file_access(),
     896                 :                  errmsg("could not determine size of temporary file \"%s\" from BufFile \"%s\": %m",
     897                 :                         FilePathName(file->files[file->numFiles - 1]),
     898                 :                         file->name)));
     899                 : 
     900 GIC         142 :     return ((file->numFiles - 1) * (int64) MAX_PHYSICAL_FILESIZE) +
     901                 :         lastFileSize;
     902                 : }
     903                 : 
     904 ECB             : /*
     905                 :  * Append the contents of source file (managed within fileset) to
     906                 :  * end of target file (managed within same fileset).
     907                 :  *
     908                 :  * Note that operation subsumes ownership of underlying resources from
     909                 :  * "source".  Caller should never call BufFileClose against source having
     910                 :  * called here first.  Resource owners for source and target must match,
     911                 :  * too.
     912                 :  *
     913                 :  * This operation works by manipulating lists of segment files, so the
     914                 :  * file content is always appended at a MAX_PHYSICAL_FILESIZE-aligned
     915                 :  * boundary, typically creating empty holes before the boundary.  These
     916                 :  * areas do not contain any interesting data, and cannot be read from by
     917                 :  * caller.
     918                 :  *
     919                 :  * Returns the block number within target where the contents of source
     920                 :  * begins.  Caller should apply this as an offset when working off block
     921                 :  * positions that are in terms of the original BufFile space.
     922                 :  */
     923                 : long
     924 GIC          71 : BufFileAppend(BufFile *target, BufFile *source)
     925                 : {
     926              71 :     long        startBlock = target->numFiles * BUFFILE_SEG_SIZE;
     927              71 :     int         newNumFiles = target->numFiles + source->numFiles;
     928                 :     int         i;
     929                 : 
     930              71 :     Assert(target->fileset != NULL);
     931              71 :     Assert(source->readOnly);
     932              71 :     Assert(!source->dirty);
     933              71 :     Assert(source->fileset != NULL);
     934                 : 
     935              71 :     if (target->resowner != source->resowner)
     936 UIC           0 :         elog(ERROR, "could not append BufFile with non-matching resource owner");
     937 ECB             : 
     938 GIC          71 :     target->files = (File *)
     939              71 :         repalloc(target->files, sizeof(File) * newNumFiles);
     940             142 :     for (i = target->numFiles; i < newNumFiles; i++)
     941 CBC          71 :         target->files[i] = source->files[i - target->numFiles];
     942 GIC          71 :     target->numFiles = newNumFiles;
     943                 : 
     944 CBC          71 :     return startBlock;
     945 ECB             : }
     946 EUB             : 
     947                 : /*
     948                 :  * Truncate a BufFile created by BufFileCreateFileSet up to the given fileno
     949                 :  * and the offset.
     950                 :  */
     951                 : void
     952 CBC           9 : BufFileTruncateFileSet(BufFile *file, int fileno, off_t offset)
     953                 : {
     954 GIC           9 :     int         numFiles = file->numFiles;
     955               9 :     int         newFile = fileno;
     956               9 :     off_t       newOffset = file->curOffset;
     957                 :     char        segment_name[MAXPGPATH];
     958                 :     int         i;
     959                 : 
     960                 :     /*
     961                 :      * Loop over all the files up to the given fileno and remove the files
     962                 :      * that are greater than the fileno and truncate the given file up to the
     963                 :      * offset. Note that we also remove the given fileno if the offset is 0
     964                 :      * provided it is not the first file in which we truncate it.
     965                 :      */
     966              18 :     for (i = file->numFiles - 1; i >= fileno; i--)
     967                 :     {
     968               9 :         if ((i != fileno || offset == 0) && i != 0)
     969                 :         {
     970 UIC           0 :             FileSetSegmentName(segment_name, file->name, i);
     971               0 :             FileClose(file->files[i]);
     972               0 :             if (!FileSetDelete(file->fileset, segment_name, true))
     973               0 :                 ereport(ERROR,
     974                 :                         (errcode_for_file_access(),
     975                 :                          errmsg("could not delete fileset \"%s\": %m",
     976 ECB             :                                 segment_name)));
     977 UIC           0 :             numFiles--;
     978 LBC           0 :             newOffset = MAX_PHYSICAL_FILESIZE;
     979 ECB             : 
     980                 :             /*
     981                 :              * This is required to indicate that we have deleted the given
     982                 :              * fileno.
     983                 :              */
     984 LBC           0 :             if (i == fileno)
     985               0 :                 newFile--;
     986                 :         }
     987 ECB             :         else
     988 EUB             :         {
     989 GIC           9 :             if (FileTruncate(file->files[i], offset,
     990 ECB             :                              WAIT_EVENT_BUFFILE_TRUNCATE) < 0)
     991 LBC           0 :                 ereport(ERROR,
     992 ECB             :                         (errcode_for_file_access(),
     993                 :                          errmsg("could not truncate file \"%s\": %m",
     994                 :                                 FilePathName(file->files[i]))));
     995 GIC           9 :             newOffset = offset;
     996 ECB             :         }
     997                 :     }
     998                 : 
     999 GIC           9 :     file->numFiles = numFiles;
    1000                 : 
    1001                 :     /*
    1002                 :      * If the truncate point is within existing buffer then we can just adjust
    1003                 :      * pos within buffer.
    1004 ECB             :      */
    1005 GIC           9 :     if (newFile == file->curFile &&
    1006 CBC           9 :         newOffset >= file->curOffset &&
    1007               9 :         newOffset <= file->curOffset + file->nbytes)
    1008 ECB             :     {
    1009                 :         /* No need to reset the current pos if the new pos is greater. */
    1010 UIC           0 :         if (newOffset <= file->curOffset + file->pos)
    1011               0 :             file->pos = (int) (newOffset - file->curOffset);
    1012                 : 
    1013                 :         /* Adjust the nbytes for the current buffer. */
    1014               0 :         file->nbytes = (int) (newOffset - file->curOffset);
    1015                 :     }
    1016 GIC           9 :     else if (newFile == file->curFile &&
    1017               9 :              newOffset < file->curOffset)
    1018 ECB             :     {
    1019                 :         /*
    1020                 :          * The truncate point is within the existing file but prior to the
    1021                 :          * current position, so we can forget the current buffer and reset the
    1022 EUB             :          * current position.
    1023                 :          */
    1024 UBC           0 :         file->curOffset = newOffset;
    1025               0 :         file->pos = 0;
    1026 UIC           0 :         file->nbytes = 0;
    1027                 :     }
    1028 GIC           9 :     else if (newFile < file->curFile)
    1029 EUB             :     {
    1030                 :         /*
    1031                 :          * The truncate point is prior to the current file, so need to reset
    1032                 :          * the current position accordingly.
    1033                 :          */
    1034 UIC           0 :         file->curFile = newFile;
    1035               0 :         file->curOffset = newOffset;
    1036 UBC           0 :         file->pos = 0;
    1037               0 :         file->nbytes = 0;
    1038                 :     }
    1039                 :     /* Nothing to do, if the truncate point is beyond current file. */
    1040 GIC           9 : }
        

Generated by: LCOV version v1.16-55-g56c0a2a