Projects
Kolab:3.4
cyrus-imapd
cyrus-imapd-2.5-revert-safe-skiplist.patch
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File cyrus-imapd-2.5-revert-safe-skiplist.patch of Package cyrus-imapd (Revision 118)
Currently displaying revision
118
,
Show latest
diff --git a/lib/cyrusdb_skiplist.c b/lib/cyrusdb_skiplist.c index 18d3ca3..fc540e0 100644 --- a/lib/cyrusdb_skiplist.c +++ b/lib/cyrusdb_skiplist.c @@ -399,7 +399,7 @@ static unsigned RECSIZE_safe(struct dbengine *db, const char *ptr) case ADD: level = LEVEL_safe(db, ptr); if (!level) { - syslog(LOG_ERR, "IOERROR: skiplist RECSIZE not safe %s, offset %u", + syslog(LOG_ERR, "IOERROR: skiplist2 RECSIZE_safe not safe %s, offset %u", db->fname, (unsigned)(ptr - db->map_base)); return 0; } @@ -413,20 +413,50 @@ static unsigned RECSIZE_safe(struct dbengine *db, const char *ptr) break; case DELETE: - if (!is_safe(db, ptr+8)) { - syslog(LOG_ERR, "IOERROR: skiplist RECSIZE not safe %s, offset %u", - db->fname, (unsigned)(ptr - db->map_base)); - return 0; - } ret += 8; break; case COMMIT: - if (!is_safe(db, ptr+4)) { - syslog(LOG_ERR, "IOERROR: skiplist RECSIZE not safe %s, offset %u", - db->fname, (unsigned)(ptr - db->map_base)); - return 0; - } + ret += 4; + break; + } + + return ret; +} + +/* how many levels does this record have? */ +static unsigned LEVEL(const char *ptr) +{ + const uint32_t *p, *q; + + assert(TYPE(ptr) == DUMMY || TYPE(ptr) == INORDER || TYPE(ptr) == ADD); + p = q = (uint32_t *) FIRSTPTR(ptr); + while (*p != (uint32_t)-1) p++; + return (p - q); +} + +/* how big is this record? */ +static unsigned RECSIZE(const char *ptr) +{ + int ret = 0; + switch (TYPE(ptr)) { + case DUMMY: + case INORDER: + case ADD: + ret += 4; /* tag */ + ret += 4; /* keylen */ + ret += ROUNDUP(KEYLEN(ptr)); /* key */ + ret += 4; /* datalen */ + ret += ROUNDUP(DATALEN(ptr)); /* data */ + ret += 4 * LEVEL(ptr); /* pointers */ + ret += 4; /* padding */ + break; + + case DELETE: + ret += 8; + break; + + case COMMIT: ret += 4; break; } @@ -494,12 +524,7 @@ static int newtxn(struct dbengine *db, struct txn **tidptr) } -static unsigned PADDING_safe(struct dbengine *db, const char *ptr) -{ - unsigned size = RECSIZE_safe(db, ptr); - if (!size) return 0; - return ntohl(*((uint32_t *)((ptr) + size - 4))); -} +#define PADDING(ptr) (ntohl(*((uint32_t *)((ptr) + RECSIZE(ptr) - 4)))) /* given an open, mapped db, read in the header information */ static int read_header(struct dbengine *db) @@ -570,9 +595,9 @@ static int read_header(struct dbengine *db) db->fname); r = CYRUSDB_IOERROR; } - if (!r && LEVEL_safe(db, dptr) != db->maxlevel) { + if (!r && LEVEL(dptr) != db->maxlevel) { syslog(LOG_ERR, "DBERROR: %s: DUMMY level(%d) != db->maxlevel(%d)", - db->fname, LEVEL_safe(db, dptr), db->maxlevel); + db->fname, LEVEL(dptr), db->maxlevel); r = CYRUSDB_IOERROR; } @@ -1281,7 +1306,7 @@ static int mystore(struct dbengine *db, return CYRUSDB_EXISTS; } else { /* replace with an equal height node */ - lvl = LEVEL_safe(db, ptr); + lvl = LEVEL(ptr); /* log a removal */ WRITEV_ADD_TO_IOVEC(iov, num_iov, (char *) &delrectype, 4); @@ -1575,8 +1600,8 @@ static int myabort(struct dbengine *db, struct txn *tid) /* find the last log entry */ for (offset = tid->logstart, ptr = db->map_base + offset; - offset + RECSIZE_safe(db, ptr) != (uint32_t) tid->logend; - offset += RECSIZE_safe(db, ptr), ptr = db->map_base + offset) ; + offset + RECSIZE(ptr) != (uint32_t) tid->logend; + offset += RECSIZE(ptr), ptr = db->map_base + offset) ; offset = ptr - db->map_base; @@ -1613,7 +1638,7 @@ static int myabort(struct dbengine *db, struct txn *tid) /* re-add this record. it can't exist right now. */ netnewoffset = *((uint32_t *)(ptr + 4)); q = db->map_base + ntohl(netnewoffset); - lvl = LEVEL_safe(db, q); + lvl = LEVEL(q); (void) find_node(db, KEY(q), KEYLEN(q), updateoffsets); for (i = 0; i < lvl; i++) { /* the current pointers FROM this node are correct, @@ -1628,7 +1653,7 @@ static int myabort(struct dbengine *db, struct txn *tid) } /* remove looking at this */ - tid->logend -= RECSIZE_safe(db, ptr); + tid->logend -= RECSIZE(ptr); } /* truncate the file to remove log entries */ @@ -1742,13 +1767,13 @@ static int mycheckpoint(struct dbengine *db) uint32_t netnewoffset; ptr = db->map_base + offset; - lvl = LEVEL_safe(db, ptr); + lvl = LEVEL(ptr); db->listsize++; num_iov = 0; WRITEV_ADD_TO_IOVEC(iov, num_iov, (char *) &iorectype, 4); /* copy all but the rectype from the record */ - WRITEV_ADD_TO_IOVEC(iov, num_iov, (char *) ptr + 4, RECSIZE_safe(db, ptr) - 4); + WRITEV_ADD_TO_IOVEC(iov, num_iov, (char *) ptr + 4, RECSIZE(ptr) - 4); newoffset = lseek(db->fd, 0, SEEK_END); netnewoffset = htonl(newoffset); @@ -1915,9 +1940,9 @@ static int dump(struct dbengine *db, int detail __attribute__((unused))) case INORDER: case ADD: printf("kl=%d dl=%d lvl=%d\n", - KEYLEN(ptr), DATALEN(ptr), LEVEL_safe(db, ptr)); + KEYLEN(ptr), DATALEN(ptr), LEVEL(ptr)); printf("\t"); - for (i = 0; i < LEVEL_safe(db, ptr); i++) { + for (i = 0; i < LEVEL(ptr); i++) { printf("%04X ", FORWARD(ptr, i)); } printf("\n"); @@ -1932,7 +1957,7 @@ static int dump(struct dbengine *db, int detail __attribute__((unused))) break; } - ptr += RECSIZE_safe(db, ptr); + ptr += RECSIZE(ptr); } unlock(db); @@ -1961,7 +1986,7 @@ static int myconsistent(struct dbengine *db, struct txn *tid, int locked) ptr = db->map_base + offset; - for (i = 0; i < LEVEL_safe(db, ptr); i++) { + for (i = 0; i < LEVEL(ptr); i++) { offset = FORWARD(ptr, i); if (offset > db->map_size) { @@ -2005,11 +2030,9 @@ static int myconsistent(struct dbengine *db, struct txn *tid, int locked) static int recovery(struct dbengine *db, int flags) { const char *ptr, *keyptr; - unsigned filesize = db->map_size; unsigned updateoffsets[SKIPLIST_MAXLEVEL+1]; uint32_t offset, offsetnet, myoff = 0; - int r = 0; - int need_checkpoint = libcyrus_config_getswitch(CYRUSOPT_SKIPLIST_ALWAYS_CHECKPOINT); + int r = 0, need_checkpoint = 0; time_t start = time(NULL); unsigned i; @@ -2063,11 +2086,11 @@ static int recovery(struct dbengine *db, int flags) } /* pointers for db->maxlevel */ - if (!r && LEVEL_safe(db, ptr) != db->maxlevel) { + if (!r && LEVEL(ptr) != db->maxlevel) { r = CYRUSDB_IOERROR; syslog(LOG_ERR, "DBERROR: skiplist recovery %s: dummy node level: %d != %d", - db->fname, LEVEL_safe(db, ptr), db->maxlevel); + db->fname, LEVEL(ptr), db->maxlevel); } for (i = 0; i < db->maxlevel; i++) { @@ -2078,7 +2101,7 @@ static int recovery(struct dbengine *db, int flags) /* reset the data that was written INORDER by the last checkpoint */ offset = DUMMY_OFFSET(db) + DUMMY_SIZE(db); - while (!r && (offset < filesize) + while (!r && (offset < db->map_size) && TYPE(db->map_base + offset) == INORDER) { ptr = db->map_base + offset; offsetnet = htonl(offset); @@ -2088,9 +2111,9 @@ static int recovery(struct dbengine *db, int flags) /* xxx check \0 fill on key */ /* xxx check \0 fill on data */ - + /* update previous pointers, record these for updating */ - for (i = 0; !r && i < LEVEL_safe(db, ptr); i++) { + for (i = 0; !r && i < LEVEL(ptr); i++) { r = lseek(db->fd, updateoffsets[i], SEEK_SET); if (r < 0) { syslog(LOG_ERR, "DBERROR: lseek %s: %m", db->fname); @@ -2113,23 +2136,15 @@ static int recovery(struct dbengine *db, int flags) updateoffsets[i] = offset + (PTR(ptr, i) - ptr); } - if (!r) { - unsigned size = RECSIZE_safe(db, ptr); - if (!size) { - syslog(LOG_ERR, "skiplist recovery %s: damaged record at %u, truncating here", - db->fname, offset); - filesize = offset; - break; - } - - if (PADDING_safe(db, ptr) != (uint32_t) -1) { - syslog(LOG_ERR, "DBERROR: %s: offset %04X padding not -1", - db->fname, offset); - filesize = offset; - break; - } + /* check padding */ + if (!r && PADDING(ptr) != (uint32_t) -1) { + syslog(LOG_ERR, "DBERROR: %s: offset %04X padding not -1", + db->fname, offset); + r = CYRUSDB_IOERROR; + } - offset += size; + if (!r) { + offset += RECSIZE(ptr); } } @@ -2164,7 +2179,7 @@ static int recovery(struct dbengine *db, int flags) } /* replay the log */ - while (!r && offset < filesize) { + while (!r && offset < db->map_size) { const char *p, *q; /* refresh map, so we see the writes we've just done */ @@ -2176,7 +2191,7 @@ static int recovery(struct dbengine *db, int flags) /* bugs in recovery truncates could have left some bogus zeros here */ if (TYPE(ptr) == 0) { int orig = offset; - while (TYPE(ptr) == 0 && offset < filesize) { + while (TYPE(ptr) == 0 && offset < db->map_size) { offset += 4; ptr = db->map_base + offset; } @@ -2189,7 +2204,7 @@ static int recovery(struct dbengine *db, int flags) /* if this is a commit, we've processed everything in this txn */ if (TYPE(ptr) == COMMIT) { - offset += RECSIZE_safe(db, ptr); + offset += RECSIZE(ptr); continue; } @@ -2203,7 +2218,7 @@ static int recovery(struct dbengine *db, int flags) } /* look ahead for a commit */ - q = db->map_base + filesize; + q = db->map_base + db->map_size; p = ptr; for (;;) { if (RECSIZE_safe(db, p) <= 0) { @@ -2215,7 +2230,7 @@ static int recovery(struct dbengine *db, int flags) p = q; break; } - p += RECSIZE_safe(db, p); + p += RECSIZE(p); if (p >= q) break; if (TYPE(p) == COMMIT) break; } @@ -2224,7 +2239,16 @@ static int recovery(struct dbengine *db, int flags) "skiplist recovery %s: found partial txn, not replaying", db->fname); - filesize = offset; + /* no commit, we should truncate */ + if (ftruncate(db->fd, offset) < 0) { + syslog(LOG_ERR, + "DBERROR: skiplist recovery %s: ftruncate: %m", + db->fname); + r = CYRUSDB_IOERROR; + } + + /* set the map size back as well */ + db->map_size = offset; break; } @@ -2290,7 +2314,7 @@ static int recovery(struct dbengine *db, int flags) } offsetnet = htonl(offset); - lvl = LEVEL_safe(db, ptr); + lvl = LEVEL(ptr); if (lvl > SKIPLIST_MAXLEVEL) { syslog(LOG_ERR, "DBERROR: skiplist recovery %s: node claims level %d (greater than max %d)", @@ -2299,15 +2323,15 @@ static int recovery(struct dbengine *db, int flags) } else { /* NOTE - in the bogus case where a record with the same key already * exists, there are three possible cases: - * lvl == LEVEL_safe(db, keyptr) + * lvl == LEVEL(keyptr) * * trivial: all to me, all mine to keyptr's FORWARD - * lvl > LEVEL_safe(db, keyptr) - + * lvl > LEVEL(keyptr) - * * all updateoffsets values should point to me - * * up until LEVEL_safe(db, keyptr) set to keyptr's next values + * * up until LEVEL(keyptr) set to keyptr's next values * (updateoffsets[i] should be keyptr in these cases) * then point all my higher pointers are updateoffsets[i]'s * FORWARD instead. - * lvl < LEVEL_safe(db, keyptr) + * lvl < LEVEL(keyptr) * * updateoffsets values up to lvl should point to me * * all mine should point to keyptr's next values * * from lvl up, all updateoffsets[i] should point to @@ -2319,7 +2343,7 @@ static int recovery(struct dbengine *db, int flags) */ for (i = 0; i < lvl; i++) { /* set our next pointers */ - if (keyptr && i < LEVEL_safe(db, keyptr)) { + if (keyptr && i < LEVEL(keyptr)) { /* need to replace the matching record key */ newoffsets[i] = htonl(FORWARD(keyptr, i)); @@ -2338,9 +2362,9 @@ static int recovery(struct dbengine *db, int flags) lseek(db->fd, FIRSTPTR(ptr) - db->map_base, SEEK_SET); retry_write(db->fd, (char *) newoffsets, 4 * lvl); - if (keyptr && lvl < LEVEL_safe(db, keyptr)) { + if (keyptr && lvl < LEVEL(keyptr)) { uint32_t newoffsetnet; - for (i = lvl; i < LEVEL_safe(db, keyptr); i++) { + for (i = lvl; i < LEVEL(keyptr); i++) { newoffsetnet = htonl(FORWARD(keyptr, i)); /* replace 'updateoffsets' to point onwards */ lseek(db->fd, @@ -2356,22 +2380,21 @@ static int recovery(struct dbengine *db, int flags) } /* move to next record */ - unsigned size = RECSIZE_safe(db, ptr); - if (!size) break; - offset += size; + offset += RECSIZE(ptr); } - /* didn't read the exact end? We should truncate */ - if (offset < db->map_size) { - if (ftruncate(db->fd, offset) < 0) { - syslog(LOG_ERR, - "DBERROR: skiplist recovery %s: ftruncate: %m", - db->fname); - r = CYRUSDB_IOERROR; - } + if (libcyrus_config_getswitch(CYRUSOPT_SKIPLIST_ALWAYS_CHECKPOINT)) { + /* refresh map, so we see the writes we've just done */ + map_refresh(db->fd, 0, &db->map_base, &db->map_len, db->map_size, + db->fname, 0); - /* set the map size back as well */ - db->map_size = offset; + r = mycheckpoint(db); + + if (r || !(flags & RECOVERY_CALLER_LOCKED)) { + unlock(db); + } + + return r; } /* fsync the recovered database */ @@ -2404,16 +2427,13 @@ static int recovery(struct dbengine *db, int flags) } if (!r && need_checkpoint) { - /* refresh map, so we see the writes we've just done */ - map_refresh(db->fd, 0, &db->map_base, &db->map_len, db->map_size, - db->fname, 0); r = mycheckpoint(db); } - if (r || !(flags & RECOVERY_CALLER_LOCKED)) { + if(r || !(flags & RECOVERY_CALLER_LOCKED)) { unlock(db); } - + return r; }
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.