NOTICE: This version of the NSF Unidata web site (archive.unidata.ucar.edu) is no longer being updated.
Current content can be found at unidata.ucar.edu.
To learn about what's going on, see About the Archive Site.
Dear all,using the classical NetCDF-format here at the German Climate Computing Center, we observed that we could not fully utilize the theoretical performance of our hardware. So we took a close look at the reasons for this and found, that the cause for this behavior is the caching scheme of NetCDF. In our usecase it has a few distinct effects: It divides our large write calls into a large number of small operations, and, due to the large size of the records, it forces the fill values for a record to be written to disk before they are read back page by page and overwritten with sensible data. This combination achieves only 10% of the available single stream performance.
The problem can be reduced sufficiently on our system by setting the filling policy to NC_NOFILL. Apparently, this enables our parallel filesystem to silently aggregate the many small write calls into larger ones. Since all modern systems already provide a transparent cache, utilizing the entire free RAM, the question arises in which scenarios the userland caching scheme is superior.
So we hacked the NetCDF source code to pass all reads and writes directly on to the OS, with a quite notable acceleration on our GPFS file system. This is what we measured with the attached benchmark:
With cache: 88 - 157 MiB/s (mean: 116) Without cache: 274 - 435 MiB/s (mean: 371) With cache and NC_NOFILL: 887 - 1041 MiB/s (mean: 981) Without cache and NC_NOFILL: 967 - 1025 MiB/s (mean: 994)Using a different benchmark, one that sets the cache page size to 100 MiB, we saw the following improvement:
With cache and NC_NOFILL: 316 - 397 MiB/s (mean: 351) Without cache and NC_NOFILL: 725 - 839 MiB/s (mean: 780)~80% of the improvement in the later benchmark is due to less time being spent in the IO-syscalls. Not only that the unsatisfyable read requests fall out of the scedule, but also the write requests are handled faster with more consistent timing.
Since there are probably quite a lot of scientists that use NetCDF in similar ways as our users do, we have prepared a patch for you to look at (you'll find it attached to this email). It passes all the tests that come with the NetCDF source code and allows to switch between the caching and the noncaching version via the definition of a preprocessor macro in include/ncio.h. Therefore, the user can decide at compile time if he/she wants to use the cache.
Maybe it would be nice to allow the user to turn off the cache at runtime too. Our patch, however, tries to be as minimal as possible, so we left out that part. We hope, it will be useful to the community the way it is.
Please let us know what you think about these issues, maybe there is a better solution to this issue?
Nathanael Hübbe & Julian Kunkel http://wr.informatik.uni-hamburg.de/people/start http://wr.informatik.uni-hamburg.de/research/projects/icomex/cachelessnetcdf
diff -Naurb -x netcdf_f77.3 -x man4 -x ncgenyy.c -x putget.c netcdf-4.1.3/include/ncio.h netcdf-4.1.3-cacheless/include/ncio.h --- netcdf-4.1.3/include/ncio.h 2011-07-01 01:22:19.000000000 +0200 +++ netcdf-4.1.3-cacheless/include/ncio.h 2012-04-17 10:53:41.694628205 +0200 @@ -11,6 +11,13 @@ #include <sys/types.h> /* off_t */ #include "netcdf.h" +//Comment out to use the cache for output as well as for input. +#define DONT_USE_CACHE_FOR_OUTPUT + + +//Uncomment to get size and offset of all readin/writeout calls +//#define DEBUG_READIN_WRITEOUT + typedef struct ncio ncio; /* forward reference */ /* diff -Naurb -x netcdf_f77.3 -x man4 -x ncgenyy.c -x putget.c netcdf-4.1.3/libsrc/nc.c netcdf-4.1.3-cacheless/libsrc/nc.c --- netcdf-4.1.3/libsrc/nc.c 2011-07-01 01:22:22.000000000 +0200 +++ netcdf-4.1.3-cacheless/libsrc/nc.c 2012-04-16 17:15:28.338222474 +0200 @@ -315,7 +315,7 @@ assert(!NC_readonly(ncp)); assert(!NC_indef(ncp)); - + #ifndef DONT_USE_CACHE_FOR_OUTPUT status = ncp->nciop->get(ncp->nciop, NC_NUMRECS_OFFSET, NC_NUMRECS_EXTENT, RGN_WRITE, &xp); if(status != NC_NOERR) @@ -332,6 +332,14 @@ fClr(ncp->flags, NC_NDIRTY); return status; + #else + { + size_t nrecs = NC_get_numrecs(ncp), externNrecs; + size_t* writePtr = &externNrecs; + status = ncx_put_size_t(&writePtr, &nrecs); + return writeout(ncp->nciop->fd, NC_NUMRECS_OFFSET, NC_NUMRECS_EXTENT, &externNrecs); + } + #endif } @@ -783,7 +791,11 @@ fClr(ncp->flags, NC_CREAT | NC_INDEF); + #ifdef DONT_USE_CACHE_FOR_OUTPUT + return NC_NOERR; + #else return ncp->nciop->sync(ncp->nciop); + #endif } #ifdef LOCKNUMREC @@ -1109,7 +1121,9 @@ { status = NC_sync(ncp); /* flush buffers before any filesize comparisons */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT (void) ncp->nciop->sync(ncp->nciop); +#endif } /* diff -Naurb -x netcdf_f77.3 -x man4 -x ncgenyy.c -x putget.c netcdf-4.1.3/libsrc/posixio.c netcdf-4.1.3-cacheless/libsrc/posixio.c --- netcdf-4.1.3/libsrc/posixio.c 2011-07-01 01:22:22.000000000 +0200 +++ netcdf-4.1.3-cacheless/libsrc/posixio.c 2012-04-16 17:31:26.590192472 +0200 @@ -762,6 +762,7 @@ ncio_px_move(ncio *const nciop, off_t to, off_t from, size_t nbytes, int rflags) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT ncio_px *const pxp = (ncio_px *)nciop->pvt; int status = ENOERR; off_t lower; @@ -769,6 +770,10 @@ char *base; size_t diff; size_t extent; +#else + void* buffer; + int status = ENOERR; +#endif if(to == from) return ENOERR; /* NOOP */ @@ -776,6 +781,7 @@ if(fIsSet(rflags, RGN_WRITE) && !fIsSet(nciop->ioflags, NC_WRITE)) return EPERM; /* attempt to write readonly file */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT rflags &= RGN_NOLOCK; /* filter unwanted flags */ if(to > from) @@ -859,6 +865,15 @@ (void) px_rel(pxp, lower, RGN_MODIFIED); return status; +#else + buffer = malloc(nbytes); + if(!buffer) return NC_ENOMEM; + if((status = readin(nciop->fd, from, nbytes, buffer))) goto cleanup; + if((status = writeout(nciop->fd, to, nbytes, buffer))) goto cleanup; +cleanup: + free(buffer); + return status; +#endif } @@ -873,9 +888,16 @@ if(fIsSet(pxp->bf_rflags, RGN_MODIFIED)) { assert(pxp->bf_refcount <= 0); + #ifndef DONT_USE_CACHE_FOR_OUTPUT status = px_pgout(nciop, pxp->bf_offset, pxp->bf_cnt, pxp->bf_base, &pxp->pos); + #else + status = writeout(nciop->fd, pxp->bf_offset, + pxp->bf_cnt, + pxp->bf_base); + #endif + if(status != ENOERR) return status; pxp->bf_rflags = 0; @@ -956,12 +978,14 @@ assert(pxp->bf_base == NULL); + pxp->bf_cnt = 0; + /* this is separate allocation because it may grow */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT pxp->bf_base = malloc(bufsz); if(pxp->bf_base == NULL) return ENOMEM; /* else */ - pxp->bf_cnt = 0; if(isNew) { /* save a read */ @@ -970,6 +994,7 @@ pxp->bf_extent = bufsz; (void) memset(pxp->bf_base, 0, pxp->bf_extent); } +#endif return ENOERR; } @@ -1102,11 +1127,8 @@ #ifdef X_ALIGN rem = (size_t)(offset % X_ALIGN); - if(rem != 0) - { offset -= rem; extent += rem; - } { const size_t rndup = extent % X_ALIGN; @@ -1337,12 +1359,14 @@ assert(pxp->bf_base == NULL); /* this is separate allocation because it may grow */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT pxp->bf_base = malloc(pxp->bf_extent); if(pxp->bf_base == NULL) { pxp->bf_extent = 0; return ENOMEM; } +#endif /* else */ return ENOERR; } @@ -1718,9 +1742,11 @@ if(!fIsSet(nciop->ioflags, NC_WRITE)) return EPERM; /* attempt to write readonly file */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = nciop->sync(nciop); if(status != ENOERR) return status; +#endif status = fgrow2(nciop->fd, length); if(status != ENOERR) @@ -1747,7 +1773,9 @@ if(nciop == NULL) return EINVAL; +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = nciop->sync(nciop); +#endif (void) close(nciop->fd); diff -Naurb -x netcdf_f77.3 -x man4 -x ncgenyy.c -x putget.c netcdf-4.1.3/libsrc/putget.m4 netcdf-4.1.3-cacheless/libsrc/putget.m4 --- netcdf-4.1.3/libsrc/putget.m4 2011-07-01 01:22:22.000000000 +0200 +++ netcdf-4.1.3-cacheless/libsrc/putget.m4 2012-04-16 17:40:15.990175897 +0200 @@ -62,6 +62,46 @@ } #endif /* ODEBUG */ +#ifdef DONT_USE_CACHE_FOR_OUTPUT + + int writeout(int fileDescriptor, off_t offset, size_t extent, const void *buffer) { + const char* cBuffer = (const char*)buffer; + ssize_t partial; + while((partial = pwrite(fileDescriptor, buffer, extent, offset)) != -1) { + #ifdef DEBUG_READIN_WRITEOUT + fprintf(stderr, "Writing... size: %lld offset:%lld \n", (long long)extent, (long long)offset); + #endif + + if(partial == extent) break; + cBuffer += partial; + offset += partial; + extent -= partial; + } + if(partial == -1) return errno; + return ENOERR; + } + + int readin(int fileDescriptor, off_t offset, size_t extent, void *buffer) { + char* cBuffer = (char*)buffer; + ssize_t partial; + while((partial = pread(fileDescriptor, buffer, extent, offset)) != -1) { + #ifdef DEBUG_READIN_WRITEOUT + fprintf(stderr, "Reading... size: %lld offset:%lld \n", (long long)extent, (long long)offset); + #endif + if(!partial) { //Handle reads beyond the end of file by faking to have read zeros. + for(; extent > 0; cBuffer++, offset++, extent--) *cBuffer = 0; + break; + } + if(partial == extent) break; + cBuffer += partial; + offset += partial; + extent -= partial; + } + if(partial == -1) return errno; + return ENOERR; + } + +#endif /* Begin fill */ /* @@ -152,6 +192,10 @@ void *xp; int status = NC_NOERR; + + + + /* * Set up fill value */ @@ -233,17 +277,42 @@ } assert(remaining > 0); - for(;;) + #ifdef DONT_USE_CACHE_FOR_OUTPUT { + void* outputBuffer = malloc(ncp->chunk); + char* xp = (char*)outputBuffer; + size_t ii, rem; + + // fill the chunksz buffer in units of xsz + for(ii = 0; ii < ncp->chunk/xsz; ii++) { + (void) memcpy(xp, xfillp, xsz); + xp += xsz; + } + // Deal with any remainder + rem = ncp->chunk - ii*xsz; + if(rem != 0) (void) memcpy(xp, xfillp, rem); + + // Write the data to disk. + for(;;) { const size_t chunksz = MIN(remaining, ncp->chunk); - size_t ii; - status = ncp->nciop->get(ncp->nciop, offset, chunksz, - RGN_WRITE, &xp); - if(status != NC_NOERR) - { - return status; + status = writeout(ncp->nciop->fd, offset, chunksz, outputBuffer); + + if(status != NC_NOERR) break; + + remaining -= chunksz; + if(remaining == 0) break; /* normal loop exit */ + offset += chunksz; } + free(outputBuffer); + } + #else + for(;;) { + const size_t chunksz = MIN(remaining, ncp->chunk); + size_t ii; + + status = ncp->nciop->get(ncp->nciop, offset, chunksz, RGN_WRITE, &xp); + if(status != NC_NOERR) return status; /* * fill the chunksz buffer in units of xsz @@ -279,6 +348,7 @@ offset += chunksz; } + #endif return status; } @@ -654,18 +724,31 @@ `dnl static int putNCvx_$1_$2(NC *ncp, const NC_var *varp, - const size_t *start, size_t nelems, const $2 *value) + const size_t *start, size_t nelems, const $2 *data) { + int status = NC_NOERR; off_t offset = NC_varoffset(ncp, varp, start); + if(nelems == 0) return NC_NOERR; + assert(data != NULL); + + #ifdef DONT_USE_CACHE_FOR_OUTPUT + { + size_t dataSize = varp->xsz * nelems; + $1* const buffer = ($1*)malloc(dataSize); + void* bufferPointer = (void*)buffer; + int tempStatus; + + // The ncx_putn_XXX methods only return one type of error (NC_ERANGE), which is expected not to be fatal. + status |= tempStatus = ncx_putn_$1_$2(&bufferPointer, nelems, data); + assert(!tempStatus || tempStatus == NC_ERANGE); //If this fails, an ncx_putn_XXX method has been changed to break the assumption above. + if(!(status = writeout(ncp->nciop->fd, offset, dataSize, buffer))) status = tempStatus; + free(buffer); + } + #else + { size_t remaining = varp->xsz * nelems; - int status = NC_NOERR; void *xp; - if(nelems == 0) - return NC_NOERR; - - assert(value != NULL); - for(;;) { size_t extent = MIN(remaining, ncp->chunk); @@ -676,7 +759,7 @@ if(lstatus != NC_NOERR) return lstatus; - lstatus = ncx_putn_$1_$2(&xp, nput, value); + lstatus = ncx_putn_$1_$2(&xp, nput, data); if(lstatus != NC_NOERR && status == NC_NOERR) { /* not fatal to the loop */ @@ -690,10 +773,11 @@ if(remaining == 0) break; /* normal loop exit */ offset += extent; - value += nput; + data += nput; } - + } + #endif return status; } ')dnl @@ -772,7 +856,17 @@ return NC_NOERR; assert(value != NULL); + #ifdef DONT_USE_CACHE_FOR_OUTPUT + { + $1* const buffer = ($1*)malloc(remaining); + void* bufferPointer = (void*)buffer; + status = readin(ncp->nciop->fd, offset, remaining, buffer); + status = ncx_getn_$1_$2(&bufferPointer, nelems, value); + + free(buffer); + } + #else for(;;) { size_t extent = MIN(remaining, ncp->chunk); @@ -795,6 +889,7 @@ offset += extent; value += nget; } + #endif return status; } diff -Naurb -x netcdf_f77.3 -x man4 -x ncgenyy.c -x putget.c netcdf-4.1.3/libsrc/v1hpg.c netcdf-4.1.3-cacheless/libsrc/v1hpg.c --- netcdf-4.1.3/libsrc/v1hpg.c 2011-07-01 01:22:22.000000000 +0200 +++ netcdf-4.1.3-cacheless/libsrc/v1hpg.c 2012-04-17 10:53:27.770628641 +0200 @@ -29,11 +29,12 @@ /* - * "magic number" at beginning of file: 0x43444601 (big endian) + * "magic number" at beginning of file: 0x43444601 (big endian) or 0x43444602 (for 64 Bit offset files) * assert(sizeof(ncmagic) % X_ALIGN == 0); */ -static const schar ncmagic[] = {'C', 'D', 'F', 0x02}; -static const schar ncmagic1[] = {'C', 'D', 'F', 0x01}; +#define NC_MAGIC_NUMBER_SIZE 4 +static const schar ncmagic[NC_MAGIC_NUMBER_SIZE] = {'C', 'D', 'F', 0x02}; +static const schar ncmagic1[NC_MAGIC_NUMBER_SIZE] = {'C', 'D', 'F', 0x01}; /* @@ -74,6 +75,7 @@ return status; } +#ifndef DONT_USE_CACHE_FOR_OUTPUT /* * Release the current chunk and get the next one. @@ -99,6 +101,7 @@ status = gsp->nciop->get(gsp->nciop, gsp->offset, gsp->extent, gsp->flags, &gsp->base); + if(status) return status; @@ -127,16 +130,32 @@ return fault_v1hs(gsp, nextread); } +#endif + /* End v1hs */ +#define returnErrors(functionCall) {\ + int status = functionCall;\ + if(status) return status;\ +} + /* Write a size_t to the header */ static int v1h_put_size_t(v1hs *psp, const size_t *sp) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(psp, X_SIZEOF_SIZE_T); if(status != ENOERR) return status; return ncx_put_size_t(&psp->pos, sp); +#else + size_t externalValue; + size_t* externalPointer = &externalValue; + returnErrors(ncx_put_size_t((void**)&externalPointer, sp)); //Do byte reversal, if necessary. + returnErrors(writeout(psp->nciop->fd, psp->pos, X_SIZEOF_SIZE_T, (void*)&externalValue)); //Write it out. + psp->pos = (void*)((char*)psp->pos + X_SIZEOF_SIZE_T); //Update the write position. + return ENOERR; +#endif } @@ -144,10 +163,19 @@ static int v1h_get_size_t(v1hs *gsp, size_t *sp) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(gsp, X_SIZEOF_SIZE_T); if(status != ENOERR) return status; return ncx_get_size_t((const void **)(&gsp->pos), sp); +#else + size_t externalValue; + size_t* externalPointer = &externalValue; + returnErrors(readin(gsp->nciop->fd, gsp->pos, X_SIZEOF_SIZE_T, (void*)&externalValue)); //Get the value from disk. + returnErrors(ncx_get_size_t((const void **)&externalPointer, sp)); //Do byte reversal, if necessary. + gsp->pos = (void*)((char*)gsp->pos + X_SIZEOF_SIZE_T); //Update the read position. + return ENOERR; +#endif } @@ -160,12 +188,20 @@ v1h_put_nc_type(v1hs *psp, const nc_type *typep) { const int itype = (int) *typep; +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(psp, X_SIZEOF_INT); if(status != ENOERR) return status; status = ncx_put_int_int(psp->pos, &itype); psp->pos = (void *)((char *)psp->pos + X_SIZEOF_INT); return status; +#else + int externalValue; + returnErrors(ncx_put_int_int(&externalValue, &itype)); //Do byte reversal, if necessary. + returnErrors(writeout(psp->nciop->fd, psp->pos, X_SIZEOF_INT, (void*)&externalValue)); //Write it out. + psp->pos = (void*)((char*)psp->pos + X_SIZEOF_INT); //Update the write position. + return ENOERR; +#endif } @@ -174,6 +210,7 @@ v1h_get_nc_type(v1hs *gsp, nc_type *typep) { int type = 0; +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(gsp, X_SIZEOF_INT); if(status != ENOERR) return status; @@ -181,6 +218,12 @@ gsp->pos = (void *)((char *)gsp->pos + X_SIZEOF_INT); if(status != ENOERR) return status; +#else + int externalValue; + returnErrors(readin(gsp->nciop->fd, gsp->pos, X_SIZEOF_INT, (void*)&externalValue)); //Get the value from disk. + returnErrors(ncx_get_int_int(&externalValue, &type)); //Do byte reversal, if necessary. + gsp->pos = (void*)((char*)gsp->pos + X_SIZEOF_INT); //Update the read position. +#endif assert(type == NC_BYTE || type == NC_CHAR @@ -205,12 +248,20 @@ v1h_put_NCtype(v1hs *psp, NCtype type) { const int itype = (int) type; +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(psp, X_SIZEOF_INT); if(status != ENOERR) return status; status = ncx_put_int_int(psp->pos, &itype); psp->pos = (void *)((char *)psp->pos + X_SIZEOF_INT); return status; +#else + int externalValue; + returnErrors(ncx_put_int_int((void*)&externalValue, &itype)); //Do byte reversal, if necessary. + returnErrors(writeout(psp->nciop->fd, psp->pos, X_SIZEOF_INT, (void*)&externalValue)); //Write it out. + psp->pos = (void*)((char*)psp->pos + X_SIZEOF_INT); //Update the write position. + return ENOERR; +#endif } /* Read a NCtype from the header */ @@ -218,6 +269,7 @@ v1h_get_NCtype(v1hs *gsp, NCtype *typep) { int type = 0; +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status = check_v1hs(gsp, X_SIZEOF_INT); if(status != ENOERR) return status; @@ -225,6 +277,12 @@ gsp->pos = (void *)((char *)gsp->pos + X_SIZEOF_INT); if(status != ENOERR) return status; +#else + int externalValue; + returnErrors(readin(gsp->nciop->fd, gsp->pos, X_SIZEOF_INT, (void*)&externalValue)); //Get the value from disk. + returnErrors(ncx_get_int_int(&externalValue, &type)); //Do byte reversal, if necessary. + gsp->pos = (void*)((char*)gsp->pos + X_SIZEOF_INT); //Update the read position. +#endif /* else */ *typep = (NCtype) type; return ENOERR; @@ -262,6 +320,7 @@ static int v1h_put_NC_string(v1hs *psp, const NC_string *ncstrp) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status; #if 0 @@ -279,6 +338,18 @@ return status; return ENOERR; +#else + size_t externalSize = ncx_len_NC_string(ncstrp); + char buffer[externalSize]; + //Write the external representation into the buffer. + void* writePointer = (void*)buffer; + returnErrors(ncx_put_size_t(&writePointer, &ncstrp->nchars)); + returnErrors(ncx_pad_putn_text(&writePointer, ncstrp->nchars, ncstrp->cp)); + //Write it out. + returnErrors(writeout(psp->nciop->fd, psp->pos, externalSize, (void*)buffer)); + psp->pos = (void*)((char*)psp->pos + externalSize); //Update the write position. + return ENOERR; +#endif } @@ -300,7 +371,7 @@ return NC_ENOMEM; } - +#ifndef DONT_USE_CACHE_FOR_OUTPUT #if 0 /* assert(ncstrp->nchars == nchars || ncstrp->nchars - nchars < X_ALIGN); */ assert(ncstrp->nchars % X_ALIGN == 0); @@ -316,9 +387,12 @@ nchars, ncstrp->cp); if(status != ENOERR) goto unwind_alloc; +#else + returnErrors(readin(gsp->nciop->fd, gsp->pos, nchars, (void*)ncstrp->cp)); //Get the string from disk. + gsp->pos = (void*)((char*)gsp->pos + _RNDUP(nchars, X_ALIGN)); //Update the read position. +#endif *ncstrpp = ncstrp; - return ENOERR; unwind_alloc: @@ -456,7 +530,6 @@ return ENOERR; } /* else */ - status = v1h_put_NCtype(psp, NC_DIMENSION); if(status != ENOERR) return status; @@ -563,6 +636,7 @@ static int v1h_put_NC_attrV(v1hs *psp, const NC_attr *attrp) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status; const size_t perchunk = psp->extent; size_t remaining = attrp->xsz; @@ -587,6 +661,11 @@ } while(remaining != 0); return ENOERR; +#else // I love replacements like this :-) + returnErrors(writeout(psp->nciop->fd, psp->pos, attrp->xsz, attrp->xvalue)); //Write it out. + psp->pos = (void*)((char*)psp->pos + attrp->xsz); //Update the write position. + return ENOERR; +#endif } /* Write a NC_attr to the header */ @@ -623,6 +702,7 @@ static int v1h_get_NC_attrV(v1hs *gsp, NC_attr *attrp) { +#ifndef DONT_USE_CACHE_FOR_OUTPUT int status; const size_t perchunk = gsp->extent; size_t remaining = attrp->xsz; @@ -645,6 +725,11 @@ } while(remaining != 0); return ENOERR; +#else + returnErrors(readin(gsp->nciop->fd, gsp->pos, attrp->xsz, attrp->xvalue)); //Get the string from disk. + gsp->pos = (void*)((char*)gsp->pos + attrp->xsz); //Update the read position. + return ENOERR; +#endif } @@ -859,6 +944,7 @@ if(status != ENOERR) return status; +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = check_v1hs(psp, ncx_len_int(varp->ndims)); if(status != ENOERR) return status; @@ -866,6 +952,16 @@ varp->ndims, varp->dimids); if(status != ENOERR) return status; +#else + { + size_t bufferSize = ncx_len_int(varp->ndims); + char buffer[bufferSize]; + void* writePointer = (void*)buffer; + returnErrors(ncx_putn_int_int(&writePointer, varp->ndims, varp->dimids)); + returnErrors(writeout(psp->nciop->fd, psp->pos, bufferSize, buffer)); //Write it out. + psp->pos = (void*)((char*)psp->pos + bufferSize); //Update the write position. + } +#endif status = v1h_put_NC_attrarray(psp, &varp->attrs); if(status != ENOERR) @@ -879,12 +975,23 @@ if(status != ENOERR) return status; +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = check_v1hs(psp, psp->version == 1 ? 4 : 8); if(status != ENOERR) return status; status = ncx_put_off_t(&psp->pos, &varp->begin, psp->version == 1 ? 4 : 8); if(status != ENOERR) return status; +#else + { + size_t bufferSize = (psp->version == 1) ? 4 : 8; + char buffer[bufferSize]; + void* writePointer = (void*)buffer; + returnErrors(ncx_put_off_t(&writePointer, &varp->begin, bufferSize)); + returnErrors(writeout(psp->nciop->fd, psp->pos, bufferSize, buffer)); //Write it out. + psp->pos = (void*)((char*)psp->pos + bufferSize); //Update the write position. + } +#endif return ENOERR; } @@ -914,6 +1021,7 @@ goto unwind_name; } +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = check_v1hs(gsp, ncx_len_int(ndims)); if(status != ENOERR) goto unwind_alloc; @@ -921,6 +1029,18 @@ ndims, varp->dimids); if(status != ENOERR) goto unwind_alloc; +#else + { + size_t bufferSize = ncx_len_int(varp->ndims); + char buffer[bufferSize]; + void* readPointer = (void*)buffer; + status = readin(gsp->nciop->fd, gsp->pos, bufferSize, (void*)buffer); //Get the value from disk. + if(status) goto unwind_alloc; + status = ncx_getn_int_int((const void **)&readPointer, ndims, varp->dimids); //Do byte reversal, if necessary. + if(status) goto unwind_alloc; + gsp->pos = (void*)((char*)gsp->pos + bufferSize); //Update the read position. + } +#endif status = v1h_get_NC_attrarray(gsp, &varp->attrs); if(status != ENOERR) @@ -934,6 +1054,7 @@ if(status != ENOERR) goto unwind_alloc; +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = check_v1hs(gsp, gsp->version == 1 ? 4 : 8); if(status != ENOERR) goto unwind_alloc; @@ -941,6 +1062,18 @@ &varp->begin, gsp->version == 1 ? 4 : 8); if(status != ENOERR) goto unwind_alloc; +#else + { + size_t bufferSize = (gsp->version == 1) ? 4 : 8; + char buffer[bufferSize]; + void* readPointer = (void*)buffer; + status = readin(gsp->nciop->fd, gsp->pos, bufferSize, (void*)buffer); //Get the value from disk. + if(status) goto unwind_alloc; + status = ncx_get_off_t((const void **)&readPointer, &varp->begin, bufferSize); //Do byte reversal, if necessary. + if(status) goto unwind_alloc; + gsp->pos = (void*)((char*)gsp->pos + bufferSize); //Update the read position. + } +#endif *varpp = varp; return ENOERR; @@ -1218,36 +1351,55 @@ ps.offset = 0; ps.extent = extent; ps.base = NULL; - ps.pos = ps.base; +#ifndef DONT_USE_CACHE_FOR_OUTPUT + ps.pos = ps.base; status = fault_v1hs(&ps, extent); if(status) return status; +#else + ps.pos = NULL; +#endif } else { ps.offset = offset; ps.extent = extent; ps.base = *xpp; - ps.pos = ps.base; ps.end = (char *)ps.base + ps.extent; +#ifndef DONT_USE_CACHE_FOR_OUTPUT + ps.pos = ps.base; +#else + ps.pos = NULL; +#endif } - if (ps.version == 2) - status = ncx_putn_schar_schar(&ps.pos, sizeof(ncmagic), ncmagic); - else - status = ncx_putn_schar_schar(&ps.pos, sizeof(ncmagic1), ncmagic1); +#ifndef DONT_USE_CACHE_FOR_OUTPUT + status = ncx_putn_schar_schar(&ps.pos, NC_MAGIC_NUMBER_SIZE, (ps.version == 2) ? ncmagic : ncmagic1); if(status != ENOERR) goto release; +#else + { + status = writeout(ps.nciop->fd, ps.pos, NC_MAGIC_NUMBER_SIZE, (ps.version == 2) ? ncmagic : ncmagic1); //Write it out. + if(status) goto release; + ps.pos = (void*)((char*)ps.pos + NC_MAGIC_NUMBER_SIZE); //Update the write position. + } +#endif { const size_t nrecs = NC_get_numrecs(ncp); +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = ncx_put_size_t(&ps.pos, &nrecs); +#else + status = v1h_put_size_t(&ps, &nrecs); +#endif if(status != ENOERR) goto release; } +#ifndef DONT_USE_CACHE_FOR_OUTPUT assert((char *)ps.pos < (char *)ps.end); +#endif status = v1h_put_NC_dimarray(&ps, &ncp->dims); if(status != ENOERR) @@ -1262,8 +1414,9 @@ goto release; release: +#ifndef DONT_USE_CACHE_FOR_OUTPUT (void) rel_v1hs(&ps); - +#endif return status; } @@ -1285,7 +1438,11 @@ gs.flags = 0; gs.version = 0; gs.base = NULL; +#ifndef DONT_USE_CACHE_FOR_OUTPUT gs.pos = gs.base; +#else + gs.pos = NULL; +#endif { /* @@ -1323,6 +1480,7 @@ * Invalidate the I/O buffers to force a read of the header * region. */ +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = gs.nciop->sync(gs.nciop); if(status) return status; @@ -1330,19 +1488,25 @@ status = fault_v1hs(&gs, extent); if(status) return status; +#endif } /* get the header from the stream gs */ { /* Get & check magic number */ - schar magic[sizeof(ncmagic)]; - (void) memset(magic, 0, sizeof(magic)); - + schar magic[NC_MAGIC_NUMBER_SIZE]; + (void) memset(magic, 0, NC_MAGIC_NUMBER_SIZE); +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = ncx_getn_schar_schar( (const void **)(&gs.pos), sizeof(magic), magic); if(status != ENOERR) goto unwind_get; +#else + status = readin(gs.nciop->fd, gs.pos, NC_MAGIC_NUMBER_SIZE, (void*)magic); //Get the value from disk. + if(status) goto unwind_get; + gs.pos = (void*)((char*)gs.pos + NC_MAGIC_NUMBER_SIZE); //Update the read position. +#endif if(memcmp(magic, ncmagic, sizeof(ncmagic)-1) != 0) { @@ -1369,13 +1533,19 @@ { size_t nrecs = 0; +#ifndef DONT_USE_CACHE_FOR_OUTPUT status = ncx_get_size_t((const void **)(&gs.pos), &nrecs); +#else + status = v1h_get_size_t(&gs, &nrecs); +#endif if(status != ENOERR) goto unwind_get; NC_set_numrecs(ncp, nrecs); } +#ifndef DONT_USE_CACHE_FOR_OUTPUT assert((char *)gs.pos < (char *)gs.end); +#endif status = v1h_get_NC_dimarray(&gs, &ncp->dims); if(status != ENOERR) @@ -1396,6 +1566,8 @@ goto unwind_get; unwind_get: +#ifndef DONT_USE_CACHE_FOR_OUTPUT (void) rel_v1hs(&gs); +#endif return status; }
#define _GNU_SOURCE #include <stdlib.h> #include <stdio.h> #include <sys/time.h> #include <netcdf.h> #include <ncio.h> const int kDimensionCount = 3; const int kVariableCount = 40; const int kRecordCount = 5; const size_t kDimensionSizes[] = {1, 20000, 100}; int main(){ int outputFile; int ret, intTrash; size_t chunkSize = 8*1000*1000; int varIds[kVariableCount], dimIds[kDimensionCount]; size_t startCoords[kDimensionCount]; long i, floatCount; char name[100]; //I know this is bad, but I can't use glibc on the blizzard, so there's no asprintf available. struct timeval startTime, endTime; double elapsedTime; #ifdef DONT_USE_CACHE_FOR_OUTPUT printf("Not using cache for output.\n"); #else printf("Using cache.\n"); #endif #ifdef DONT_INITIALIZE_DATA printf("Not initializing data.\n"); #else printf("Initializing data.\n"); #endif if(gettimeofday(&startTime, NULL)) return -1; // ret = nc__create("bench.nc", NC_CLASSIC_MODEL, 1024*1024, &chunkSize, &outputFile); ret = nc_create("bench.nc", NC_CLASSIC_MODEL, &outputFile); // ret = nc_set_fill(outputFile, NC_NOFILL, &intTrash); printf("Using NC_NOFILL\n"); //write the dimension descriptions floatCount = 1; for(i = 0; i < kDimensionCount; i++) { int outputDim; if(snprintf(name, sizeof(name), "dimension %li", i+1) <= 0) return -1; if(i) { if(nc_def_dim(outputFile, name, kDimensionSizes[i], &dimIds[i])) return -1; floatCount *= kDimensionSizes[i]; } else { if(nc_def_dim(outputFile, name, NC_UNLIMITED, &dimIds[i])) return -1; } } //write the variable descriptions for(i = 0; i < kVariableCount; i++) { if(snprintf(name, sizeof(name), "variable %li", i+1) <= 0) return -1; if(nc_def_var(outputFile, name, NC_FLOAT, kDimensionCount, dimIds, &varIds[i])) return -1; } if(nc_enddef(outputFile)) return -1; //fake some data float* data = (float*)malloc(sizeof(float)*floatCount); for(i = 0; i < floatCount; i++) data[i] = (float)i; //write it many times for(i = 0; i < kDimensionCount; i++) startCoords[i] = 0; for(i = 0; i < kRecordCount; i++) { startCoords[0] = i; long curVar; for(curVar = 0; curVar < kVariableCount; curVar++) { nc_put_vara_float(outputFile, varIds[curVar], startCoords, kDimensionSizes, data); } } nc_close(outputFile); if(gettimeofday(&endTime, NULL)) return -1; elapsedTime = (double)(endTime.tv_sec - startTime.tv_sec) + (endTime.tv_usec - startTime.tv_usec)/(double)1000000; printf("Elapsed time: %.6fs\n", elapsedTime); printf("Speed: %.3fMiByte/s\n", floatCount*kRecordCount*kVariableCount*4/(1024*1024*elapsedTime)); }
netcdfgroup
archives: