root/lib/tdb/common/io.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tdb_oob
  2. tdb_write
  3. tdb_convert
  4. tdb_read
  5. tdb_next_hash_chain
  6. tdb_munmap
  7. tdb_mmap
  8. tdb_expand_file
  9. tdb_expand
  10. tdb_ofs_read
  11. tdb_ofs_write
  12. tdb_alloc_read
  13. tdb_parse_data
  14. tdb_rec_read
  15. tdb_rec_write
  16. tdb_io_init

   1  /* 
   2    Unix SMB/CIFS implementation.
   3 
   4    trivial database library
   5 
   6    Copyright (C) Andrew Tridgell              1999-2005
   7    Copyright (C) Paul `Rusty' Russell              2000
   8    Copyright (C) Jeremy Allison                    2000-2003
   9    
  10      ** NOTE! The following LGPL license applies to the tdb
  11      ** library. This does NOT imply that all of Samba is released
  12      ** under the LGPL
  13    
  14    This library is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU Lesser General Public
  16    License as published by the Free Software Foundation; either
  17    version 3 of the License, or (at your option) any later version.
  18 
  19    This library is distributed in the hope that it will be useful,
  20    but WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    Lesser General Public License for more details.
  23 
  24    You should have received a copy of the GNU Lesser General Public
  25    License along with this library; if not, see <http://www.gnu.org/licenses/>.
  26 */
  27 
  28 
  29 #include "tdb_private.h"
  30 
  31 /* check for an out of bounds access - if it is out of bounds then
  32    see if the database has been expanded by someone else and expand
  33    if necessary 
  34    note that "len" is the minimum length needed for the db
  35 */
  36 static int tdb_oob(struct tdb_context *tdb, tdb_off_t len, int probe)
     /* [<][>][^][v][top][bottom][index][help] */
  37 {
  38         struct stat st;
  39         if (len <= tdb->map_size)
  40                 return 0;
  41         if (tdb->flags & TDB_INTERNAL) {
  42                 if (!probe) {
  43                         /* Ensure ecode is set for log fn. */
  44                         tdb->ecode = TDB_ERR_IO;
  45                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond internal malloc size %d\n",
  46                                  (int)len, (int)tdb->map_size));
  47                 }
  48                 return TDB_ERRCODE(TDB_ERR_IO, -1);
  49         }
  50 
  51         if (fstat(tdb->fd, &st) == -1) {
  52                 return TDB_ERRCODE(TDB_ERR_IO, -1);
  53         }
  54 
  55         if (st.st_size < (size_t)len) {
  56                 if (!probe) {
  57                         /* Ensure ecode is set for log fn. */
  58                         tdb->ecode = TDB_ERR_IO;
  59                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_oob len %d beyond eof at %d\n",
  60                                  (int)len, (int)st.st_size));
  61                 }
  62                 return TDB_ERRCODE(TDB_ERR_IO, -1);
  63         }
  64 
  65         /* Unmap, update size, remap */
  66         if (tdb_munmap(tdb) == -1)
  67                 return TDB_ERRCODE(TDB_ERR_IO, -1);
  68         tdb->map_size = st.st_size;
  69         tdb_mmap(tdb);
  70         return 0;
  71 }
  72 
  73 /* write a lump of data at a specified offset */
  74 static int tdb_write(struct tdb_context *tdb, tdb_off_t off, 
     /* [<][>][^][v][top][bottom][index][help] */
  75                      const void *buf, tdb_len_t len)
  76 {
  77         if (len == 0) {
  78                 return 0;
  79         }
  80 
  81         if (tdb->read_only || tdb->traverse_read) {
  82                 tdb->ecode = TDB_ERR_RDONLY;
  83                 return -1;
  84         }
  85 
  86         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0)
  87                 return -1;
  88 
  89         if (tdb->map_ptr) {
  90                 memcpy(off + (char *)tdb->map_ptr, buf, len);
  91         } else {
  92                 ssize_t written = pwrite(tdb->fd, buf, len, off);
  93                 if ((written != (ssize_t)len) && (written != -1)) {
  94                         /* try once more */
  95                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: wrote only "
  96                                  "%d of %d bytes at %d, trying once more\n",
  97                                  (int)written, len, off));
  98                         errno = ENOSPC;
  99                         written = pwrite(tdb->fd, (const void *)((const char *)buf+written),
 100                                          len-written,
 101                                          off+written);
 102                 }
 103                 if (written == -1) {
 104                 /* Ensure ecode is set for log fn. */
 105                 tdb->ecode = TDB_ERR_IO;
 106                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_write failed at %d "
 107                                  "len=%d (%s)\n", off, len, strerror(errno)));
 108                         return TDB_ERRCODE(TDB_ERR_IO, -1);
 109                 } else if (written != (ssize_t)len) {
 110                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "tdb_write: failed to "
 111                                  "write %d bytes at %d in two attempts\n",
 112                                  len, off));
 113                         errno = ENOSPC;
 114                 return TDB_ERRCODE(TDB_ERR_IO, -1);
 115         }
 116         }
 117         return 0;
 118 }
 119 
 120 /* Endian conversion: we only ever deal with 4 byte quantities */
 121 void *tdb_convert(void *buf, uint32_t size)
     /* [<][>][^][v][top][bottom][index][help] */
 122 {
 123         uint32_t i, *p = (uint32_t *)buf;
 124         for (i = 0; i < size / 4; i++)
 125                 p[i] = TDB_BYTEREV(p[i]);
 126         return buf;
 127 }
 128 
 129 
 130 /* read a lump of data at a specified offset, maybe convert */
 131 static int tdb_read(struct tdb_context *tdb, tdb_off_t off, void *buf, 
     /* [<][>][^][v][top][bottom][index][help] */
 132                     tdb_len_t len, int cv)
 133 {
 134         if (tdb->methods->tdb_oob(tdb, off + len, 0) != 0) {
 135                 return -1;
 136         }
 137 
 138         if (tdb->map_ptr) {
 139                 memcpy(buf, off + (char *)tdb->map_ptr, len);
 140         } else {
 141                 ssize_t ret = pread(tdb->fd, buf, len, off);
 142                 if (ret != (ssize_t)len) {
 143                         /* Ensure ecode is set for log fn. */
 144                         tdb->ecode = TDB_ERR_IO;
 145                         TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_read failed at %d "
 146                                  "len=%d ret=%d (%s) map_size=%d\n",
 147                                  (int)off, (int)len, (int)ret, strerror(errno),
 148                                  (int)tdb->map_size));
 149                         return TDB_ERRCODE(TDB_ERR_IO, -1);
 150                 }
 151         }
 152         if (cv) {
 153                 tdb_convert(buf, len);
 154         }
 155         return 0;
 156 }
 157 
 158 
 159 
 160 /*
 161   do an unlocked scan of the hash table heads to find the next non-zero head. The value
 162   will then be confirmed with the lock held
 163 */              
 164 static void tdb_next_hash_chain(struct tdb_context *tdb, uint32_t *chain)
     /* [<][>][^][v][top][bottom][index][help] */
 165 {
 166         uint32_t h = *chain;
 167         if (tdb->map_ptr) {
 168                 for (;h < tdb->header.hash_size;h++) {
 169                         if (0 != *(uint32_t *)(TDB_HASH_TOP(h) + (unsigned char *)tdb->map_ptr)) {
 170                                 break;
 171                         }
 172                 }
 173         } else {
 174                 uint32_t off=0;
 175                 for (;h < tdb->header.hash_size;h++) {
 176                         if (tdb_ofs_read(tdb, TDB_HASH_TOP(h), &off) != 0 || off != 0) {
 177                                 break;
 178                         }
 179                 }
 180         }
 181         (*chain) = h;
 182 }
 183 
 184 
 185 int tdb_munmap(struct tdb_context *tdb)
     /* [<][>][^][v][top][bottom][index][help] */
 186 {
 187         if (tdb->flags & TDB_INTERNAL)
 188                 return 0;
 189 
 190 #ifdef HAVE_MMAP
 191         if (tdb->map_ptr) {
 192                 int ret;
 193 
 194                 ret = munmap(tdb->map_ptr, tdb->map_size);
 195                 if (ret != 0)
 196                         return ret;
 197         }
 198 #endif
 199         tdb->map_ptr = NULL;
 200         return 0;
 201 }
 202 
 203 void tdb_mmap(struct tdb_context *tdb)
     /* [<][>][^][v][top][bottom][index][help] */
 204 {
 205         if (tdb->flags & TDB_INTERNAL)
 206                 return;
 207 
 208 #ifdef HAVE_MMAP
 209         if (!(tdb->flags & TDB_NOMMAP)) {
 210                 tdb->map_ptr = mmap(NULL, tdb->map_size, 
 211                                     PROT_READ|(tdb->read_only? 0:PROT_WRITE), 
 212                                     MAP_SHARED|MAP_FILE, tdb->fd, 0);
 213 
 214                 /*
 215                  * NB. When mmap fails it returns MAP_FAILED *NOT* NULL !!!!
 216                  */
 217 
 218                 if (tdb->map_ptr == MAP_FAILED) {
 219                         tdb->map_ptr = NULL;
 220                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "tdb_mmap failed for size %d (%s)\n", 
 221                                  tdb->map_size, strerror(errno)));
 222                 }
 223         } else {
 224                 tdb->map_ptr = NULL;
 225         }
 226 #else
 227         tdb->map_ptr = NULL;
 228 #endif
 229 }
 230 
 231 /* expand a file.  we prefer to use ftruncate, as that is what posix
 232   says to use for mmap expansion */
 233 static int tdb_expand_file(struct tdb_context *tdb, tdb_off_t size, tdb_off_t addition)
     /* [<][>][^][v][top][bottom][index][help] */
 234 {
 235         char buf[8192];
 236 
 237         if (tdb->read_only || tdb->traverse_read) {
 238                 tdb->ecode = TDB_ERR_RDONLY;
 239                 return -1;
 240         }
 241 
 242         if (ftruncate(tdb->fd, size+addition) == -1) {
 243                 char b = 0;
 244                 ssize_t written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
 245                 if (written == 0) {
 246                         /* try once more, potentially revealing errno */
 247                         written = pwrite(tdb->fd,  &b, 1, (size+addition) - 1);
 248                 }
 249                 if (written == 0) {
 250                         /* again - give up, guessing errno */
 251                         errno = ENOSPC;
 252                 }
 253                 if (written != 1) {
 254                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file to %d failed (%s)\n", 
 255                                  size+addition, strerror(errno)));
 256                         return -1;
 257                 }
 258         }
 259 
 260         /* now fill the file with something. This ensures that the
 261            file isn't sparse, which would be very bad if we ran out of
 262            disk. This must be done with write, not via mmap */
 263         memset(buf, TDB_PAD_BYTE, sizeof(buf));
 264         while (addition) {
 265                 size_t n = addition>sizeof(buf)?sizeof(buf):addition;
 266                 ssize_t written = pwrite(tdb->fd, buf, n, size);
 267                 if (written == 0) {
 268                         /* prevent infinite loops: try _once_ more */
 269                         written = pwrite(tdb->fd, buf, n, size);
 270                 }
 271                 if (written == 0) {
 272                         /* give up, trying to provide a useful errno */
 273                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write "
 274                                 "returned 0 twice: giving up!\n"));
 275                         errno = ENOSPC;
 276                         return -1;
 277                 } else if (written == -1) {
 278                         TDB_LOG((tdb, TDB_DEBUG_FATAL, "expand_file write of "
 279                                  "%d bytes failed (%s)\n", (int)n,
 280                                  strerror(errno)));
 281                         return -1;
 282                 } else if (written != n) {
 283                         TDB_LOG((tdb, TDB_DEBUG_WARNING, "expand_file: wrote "
 284                                  "only %d of %d bytes - retrying\n", (int)written,
 285                                  (int)n));
 286                 }
 287                 addition -= written;
 288                 size += written;
 289         }
 290         return 0;
 291 }
 292 
 293 
 294 /* expand the database at least size bytes by expanding the underlying
 295    file and doing the mmap again if necessary */
 296 int tdb_expand(struct tdb_context *tdb, tdb_off_t size)
     /* [<][>][^][v][top][bottom][index][help] */
 297 {
 298         struct list_struct rec;
 299         tdb_off_t offset, new_size;     
 300 
 301         if (tdb_lock(tdb, -1, F_WRLCK) == -1) {
 302                 TDB_LOG((tdb, TDB_DEBUG_ERROR, "lock failed in tdb_expand\n"));
 303                 return -1;
 304         }
 305 
 306         /* must know about any previous expansions by another process */
 307         tdb->methods->tdb_oob(tdb, tdb->map_size + 1, 1);
 308 
 309         /* always make room for at least 100 more records, and at
 310            least 25% more space. Round the database up to a multiple
 311            of the page size */
 312         new_size = MAX(tdb->map_size + size*100, tdb->map_size * 1.25);
 313         size = TDB_ALIGN(new_size, tdb->page_size) - tdb->map_size;
 314 
 315         if (!(tdb->flags & TDB_INTERNAL))
 316                 tdb_munmap(tdb);
 317 
 318         /*
 319          * We must ensure the file is unmapped before doing this
 320          * to ensure consistency with systems like OpenBSD where
 321          * writes and mmaps are not consistent.
 322          */
 323 
 324         /* expand the file itself */
 325         if (!(tdb->flags & TDB_INTERNAL)) {
 326                 if (tdb->methods->tdb_expand_file(tdb, tdb->map_size, size) != 0)
 327                         goto fail;
 328         }
 329 
 330         tdb->map_size += size;
 331 
 332         if (tdb->flags & TDB_INTERNAL) {
 333                 char *new_map_ptr = (char *)realloc(tdb->map_ptr,
 334                                                     tdb->map_size);
 335                 if (!new_map_ptr) {
 336                         tdb->map_size -= size;
 337                         goto fail;
 338                 }
 339                 tdb->map_ptr = new_map_ptr;
 340         } else {
 341                 /*
 342                  * We must ensure the file is remapped before adding the space
 343                  * to ensure consistency with systems like OpenBSD where
 344                  * writes and mmaps are not consistent.
 345                  */
 346 
 347                 /* We're ok if the mmap fails as we'll fallback to read/write */
 348                 tdb_mmap(tdb);
 349         }
 350 
 351         /* form a new freelist record */
 352         memset(&rec,'\0',sizeof(rec));
 353         rec.rec_len = size - sizeof(rec);
 354 
 355         /* link it into the free list */
 356         offset = tdb->map_size - size;
 357         if (tdb_free(tdb, offset, &rec) == -1)
 358                 goto fail;
 359 
 360         tdb_unlock(tdb, -1, F_WRLCK);
 361         return 0;
 362  fail:
 363         tdb_unlock(tdb, -1, F_WRLCK);
 364         return -1;
 365 }
 366 
 367 /* read/write a tdb_off_t */
 368 int tdb_ofs_read(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
     /* [<][>][^][v][top][bottom][index][help] */
 369 {
 370         return tdb->methods->tdb_read(tdb, offset, (char*)d, sizeof(*d), DOCONV());
 371 }
 372 
 373 int tdb_ofs_write(struct tdb_context *tdb, tdb_off_t offset, tdb_off_t *d)
     /* [<][>][^][v][top][bottom][index][help] */
 374 {
 375         tdb_off_t off = *d;
 376         return tdb->methods->tdb_write(tdb, offset, CONVERT(off), sizeof(*d));
 377 }
 378 
 379 
 380 /* read a lump of data, allocating the space for it */
 381 unsigned char *tdb_alloc_read(struct tdb_context *tdb, tdb_off_t offset, tdb_len_t len)
     /* [<][>][^][v][top][bottom][index][help] */
 382 {
 383         unsigned char *buf;
 384 
 385         /* some systems don't like zero length malloc */
 386         if (len == 0) {
 387                 len = 1;
 388         }
 389 
 390         if (!(buf = (unsigned char *)malloc(len))) {
 391                 /* Ensure ecode is set for log fn. */
 392                 tdb->ecode = TDB_ERR_OOM;
 393                 TDB_LOG((tdb, TDB_DEBUG_ERROR,"tdb_alloc_read malloc failed len=%d (%s)\n",
 394                            len, strerror(errno)));
 395                 return TDB_ERRCODE(TDB_ERR_OOM, buf);
 396         }
 397         if (tdb->methods->tdb_read(tdb, offset, buf, len, 0) == -1) {
 398                 SAFE_FREE(buf);
 399                 return NULL;
 400         }
 401         return buf;
 402 }
 403 
 404 /* Give a piece of tdb data to a parser */
 405 
 406 int tdb_parse_data(struct tdb_context *tdb, TDB_DATA key,
     /* [<][>][^][v][top][bottom][index][help] */
 407                    tdb_off_t offset, tdb_len_t len,
 408                    int (*parser)(TDB_DATA key, TDB_DATA data,
 409                                  void *private_data),
 410                    void *private_data)
 411 {
 412         TDB_DATA data;
 413         int result;
 414 
 415         data.dsize = len;
 416 
 417         if ((tdb->transaction == NULL) && (tdb->map_ptr != NULL)) {
 418                 /*
 419                  * Optimize by avoiding the malloc/memcpy/free, point the
 420                  * parser directly at the mmap area.
 421                  */
 422                 if (tdb->methods->tdb_oob(tdb, offset+len, 0) != 0) {
 423                         return -1;
 424                 }
 425                 data.dptr = offset + (unsigned char *)tdb->map_ptr;
 426                 return parser(key, data, private_data);
 427         }
 428 
 429         if (!(data.dptr = tdb_alloc_read(tdb, offset, len))) {
 430                 return -1;
 431         }
 432 
 433         result = parser(key, data, private_data);
 434         free(data.dptr);
 435         return result;
 436 }
 437 
 438 /* read/write a record */
 439 int tdb_rec_read(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
     /* [<][>][^][v][top][bottom][index][help] */
 440 {
 441         if (tdb->methods->tdb_read(tdb, offset, rec, sizeof(*rec),DOCONV()) == -1)
 442                 return -1;
 443         if (TDB_BAD_MAGIC(rec)) {
 444                 /* Ensure ecode is set for log fn. */
 445                 tdb->ecode = TDB_ERR_CORRUPT;
 446                 TDB_LOG((tdb, TDB_DEBUG_FATAL,"tdb_rec_read bad magic 0x%x at offset=%d\n", rec->magic, offset));
 447                 return TDB_ERRCODE(TDB_ERR_CORRUPT, -1);
 448         }
 449         return tdb->methods->tdb_oob(tdb, rec->next+sizeof(*rec), 0);
 450 }
 451 
 452 int tdb_rec_write(struct tdb_context *tdb, tdb_off_t offset, struct list_struct *rec)
     /* [<][>][^][v][top][bottom][index][help] */
 453 {
 454         struct list_struct r = *rec;
 455         return tdb->methods->tdb_write(tdb, offset, CONVERT(r), sizeof(r));
 456 }
 457 
 458 static const struct tdb_methods io_methods = {
 459         tdb_read,
 460         tdb_write,
 461         tdb_next_hash_chain,
 462         tdb_oob,
 463         tdb_expand_file,
 464         tdb_brlock
 465 };
 466 
 467 /*
 468   initialise the default methods table
 469 */
 470 void tdb_io_init(struct tdb_context *tdb)
     /* [<][>][^][v][top][bottom][index][help] */
 471 {
 472         tdb->methods = &io_methods;
 473 }

/* [<][>][^][v][top][bottom][index][help] */