This includes a fix to soft database corruption that would cause Docker
to fail to start if the daemon died in the middle of a transaction
write.
Signed-off-by: Aleksa Sarai <asarai@suse.de>
| ... | ... |
@@ -44,7 +44,7 @@ clone git github.com/coreos/etcd v2.2.0 |
| 44 | 44 |
fix_rewritten_imports github.com/coreos/etcd |
| 45 | 45 |
clone git github.com/ugorji/go 5abd4e96a45c386928ed2ca2a7ef63e2533e18ec |
| 46 | 46 |
clone git github.com/hashicorp/consul v0.5.2 |
| 47 |
-clone git github.com/boltdb/bolt v1.2.0 |
|
| 47 |
+clone git github.com/boltdb/bolt v1.2.1 |
|
| 48 | 48 |
clone git github.com/miekg/dns 75e6e86cc601825c5dbcd4e0c209eab180997cd7 |
| 49 | 49 |
|
| 50 | 50 |
# get graph and distribution packages |
| ... | ... |
@@ -1,4 +1,4 @@ |
| 1 |
-Bolt [](https://drone.io/github.com/boltdb/bolt/latest) [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  |
|
| 1 |
+Bolt [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  |
|
| 2 | 2 |
==== |
| 3 | 3 |
|
| 4 | 4 |
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] |
| ... | ... |
@@ -427,6 +427,8 @@ db.View(func(tx *bolt.Tx) error {
|
| 427 | 427 |
}) |
| 428 | 428 |
``` |
| 429 | 429 |
|
| 430 |
+Note that, while RFC3339 is sortable, the Golang implementation of RFC3339Nano does not use a fixed number of digits after the decimal point and is therefore not sortable. |
|
| 431 |
+ |
|
| 430 | 432 |
|
| 431 | 433 |
#### ForEach() |
| 432 | 434 |
|
| ... | ... |
@@ -437,7 +439,7 @@ all the keys in a bucket: |
| 437 | 437 |
db.View(func(tx *bolt.Tx) error {
|
| 438 | 438 |
// Assume bucket exists and has keys |
| 439 | 439 |
b := tx.Bucket([]byte("MyBucket"))
|
| 440 |
- |
|
| 440 |
+ |
|
| 441 | 441 |
b.ForEach(func(k, v []byte) error {
|
| 442 | 442 |
fmt.Printf("key=%s, value=%s\n", k, v)
|
| 443 | 443 |
return nil |
| ... | ... |
@@ -617,7 +619,7 @@ Boltmobiledemo.BoltDB boltDB = Boltmobiledemo.NewBoltDB(path) |
| 617 | 617 |
{
|
| 618 | 618 |
NSURL* URL= [NSURL fileURLWithPath: filePathString]; |
| 619 | 619 |
assert([[NSFileManager defaultManager] fileExistsAtPath: [URL path]]); |
| 620 |
- |
|
| 620 |
+ |
|
| 621 | 621 |
NSError *error = nil; |
| 622 | 622 |
BOOL success = [URL setResourceValue: [NSNumber numberWithBool: YES] |
| 623 | 623 |
forKey: NSURLIsExcludedFromBackupKey error: &error]; |
| ... | ... |
@@ -840,5 +842,9 @@ Below is a list of public, open source projects that use Bolt: |
| 840 | 840 |
* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. |
| 841 | 841 |
* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. |
| 842 | 842 |
* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. |
| 843 |
+* [Storm](https://github.com/asdine/storm) - A simple ORM around BoltDB. |
|
| 844 |
+* [GoWebApp](https://github.com/josephspurrier/gowebapp) - A basic MVC web application in Go using BoltDB. |
|
| 845 |
+* [SimpleBolt](https://github.com/xyproto/simplebolt) - A simple way to use BoltDB. Deals mainly with strings. |
|
| 846 |
+* [Algernon](https://github.com/xyproto/algernon) - A HTTP/2 web server with built-in support for Lua. Uses BoltDB as the default database backend. |
|
| 843 | 847 |
|
| 844 | 848 |
If you are using Bolt in a project please send a pull request to add it to the list. |
| ... | ... |
@@ -36,6 +36,9 @@ const ( |
| 36 | 36 |
DefaultAllocSize = 16 * 1024 * 1024 |
| 37 | 37 |
) |
| 38 | 38 |
|
| 39 |
+// default page size for db is set to the OS page size. |
|
| 40 |
+var defaultPageSize = os.Getpagesize() |
|
| 41 |
+ |
|
| 39 | 42 |
// DB represents a collection of buckets persisted to a file on disk. |
| 40 | 43 |
// All data access is performed through transactions which can be obtained through the DB. |
| 41 | 44 |
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. |
| ... | ... |
@@ -94,7 +97,7 @@ type DB struct {
|
| 94 | 94 |
path string |
| 95 | 95 |
file *os.File |
| 96 | 96 |
lockfile *os.File // windows only |
| 97 |
- dataref []byte // mmap'ed readonly, write throws SEGV |
|
| 97 |
+ dataref []byte // mmap'ed readonly, write throws SEGV |
|
| 98 | 98 |
data *[maxMapSize]byte |
| 99 | 99 |
datasz int |
| 100 | 100 |
filesz int // current on disk file size |
| ... | ... |
@@ -107,6 +110,8 @@ type DB struct {
|
| 107 | 107 |
freelist *freelist |
| 108 | 108 |
stats Stats |
| 109 | 109 |
|
| 110 |
+ pagePool sync.Pool |
|
| 111 |
+ |
|
| 110 | 112 |
batchMu sync.Mutex |
| 111 | 113 |
batch *batch |
| 112 | 114 |
|
| ... | ... |
@@ -200,12 +205,27 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
| 200 | 200 |
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
|
| 201 | 201 |
m := db.pageInBuffer(buf[:], 0).meta() |
| 202 | 202 |
if err := m.validate(); err != nil {
|
| 203 |
- return nil, err |
|
| 203 |
+ // If we can't read the page size, we can assume it's the same |
|
| 204 |
+ // as the OS -- since that's how the page size was chosen in the |
|
| 205 |
+ // first place. |
|
| 206 |
+ // |
|
| 207 |
+ // If the first page is invalid and this OS uses a different |
|
| 208 |
+ // page size than what the database was created with then we |
|
| 209 |
+ // are out of luck and cannot access the database. |
|
| 210 |
+ db.pageSize = os.Getpagesize() |
|
| 211 |
+ } else {
|
|
| 212 |
+ db.pageSize = int(m.pageSize) |
|
| 204 | 213 |
} |
| 205 |
- db.pageSize = int(m.pageSize) |
|
| 206 | 214 |
} |
| 207 | 215 |
} |
| 208 | 216 |
|
| 217 |
+ // Initialize page pool. |
|
| 218 |
+ db.pagePool = sync.Pool{
|
|
| 219 |
+ New: func() interface{} {
|
|
| 220 |
+ return make([]byte, db.pageSize) |
|
| 221 |
+ }, |
|
| 222 |
+ } |
|
| 223 |
+ |
|
| 209 | 224 |
// Memory map the data file. |
| 210 | 225 |
if err := db.mmap(options.InitialMmapSize); err != nil {
|
| 211 | 226 |
_ = db.close() |
| ... | ... |
@@ -262,12 +282,13 @@ func (db *DB) mmap(minsz int) error {
|
| 262 | 262 |
db.meta0 = db.page(0).meta() |
| 263 | 263 |
db.meta1 = db.page(1).meta() |
| 264 | 264 |
|
| 265 |
- // Validate the meta pages. |
|
| 266 |
- if err := db.meta0.validate(); err != nil {
|
|
| 267 |
- return err |
|
| 268 |
- } |
|
| 269 |
- if err := db.meta1.validate(); err != nil {
|
|
| 270 |
- return err |
|
| 265 |
+ // Validate the meta pages. We only return an error if both meta pages fail |
|
| 266 |
+ // validation, since meta0 failing validation means that it wasn't saved |
|
| 267 |
+ // properly -- but we can recover using meta1. And vice-versa. |
|
| 268 |
+ err0 := db.meta0.validate() |
|
| 269 |
+ err1 := db.meta1.validate() |
|
| 270 |
+ if err0 != nil && err1 != nil {
|
|
| 271 |
+ return err0 |
|
| 271 | 272 |
} |
| 272 | 273 |
|
| 273 | 274 |
return nil |
| ... | ... |
@@ -339,6 +360,7 @@ func (db *DB) init() error {
|
| 339 | 339 |
m.root = bucket{root: 3}
|
| 340 | 340 |
m.pgid = 4 |
| 341 | 341 |
m.txid = txid(i) |
| 342 |
+ m.checksum = m.sum64() |
|
| 342 | 343 |
} |
| 343 | 344 |
|
| 344 | 345 |
// Write an empty freelist at page 3. |
| ... | ... |
@@ -383,11 +405,10 @@ func (db *DB) close() error {
|
| 383 | 383 |
if !db.opened {
|
| 384 | 384 |
return nil |
| 385 | 385 |
} |
| 386 |
- |
|
| 386 |
+ |
|
| 387 | 387 |
db.opened = false |
| 388 | 388 |
|
| 389 | 389 |
db.freelist = nil |
| 390 |
- db.path = "" |
|
| 391 | 390 |
|
| 392 | 391 |
// Clear ops. |
| 393 | 392 |
db.ops.writeAt = nil |
| ... | ... |
@@ -414,6 +435,7 @@ func (db *DB) close() error {
|
| 414 | 414 |
db.file = nil |
| 415 | 415 |
} |
| 416 | 416 |
|
| 417 |
+ db.path = "" |
|
| 417 | 418 |
return nil |
| 418 | 419 |
} |
| 419 | 420 |
|
| ... | ... |
@@ -778,16 +800,37 @@ func (db *DB) pageInBuffer(b []byte, id pgid) *page {
|
| 778 | 778 |
|
| 779 | 779 |
// meta retrieves the current meta page reference. |
| 780 | 780 |
func (db *DB) meta() *meta {
|
| 781 |
- if db.meta0.txid > db.meta1.txid {
|
|
| 782 |
- return db.meta0 |
|
| 781 |
+ // We have to return the meta with the highest txid which doesn't fail |
|
| 782 |
+ // validation. Otherwise, we can cause errors when in fact the database is |
|
| 783 |
+ // in a consistent state. metaA is the one with the higher txid. |
|
| 784 |
+ metaA := db.meta0 |
|
| 785 |
+ metaB := db.meta1 |
|
| 786 |
+ if db.meta1.txid > db.meta0.txid {
|
|
| 787 |
+ metaA = db.meta1 |
|
| 788 |
+ metaB = db.meta0 |
|
| 789 |
+ } |
|
| 790 |
+ |
|
| 791 |
+ // Use higher meta page if valid. Otherwise fallback to previous, if valid. |
|
| 792 |
+ if err := metaA.validate(); err == nil {
|
|
| 793 |
+ return metaA |
|
| 794 |
+ } else if err := metaB.validate(); err == nil {
|
|
| 795 |
+ return metaB |
|
| 783 | 796 |
} |
| 784 |
- return db.meta1 |
|
| 797 |
+ |
|
| 798 |
+ // This should never be reached, because both meta1 and meta0 were validated |
|
| 799 |
+ // on mmap() and we do fsync() on every write. |
|
| 800 |
+ panic("bolt.DB.meta(): invalid meta pages")
|
|
| 785 | 801 |
} |
| 786 | 802 |
|
| 787 | 803 |
// allocate returns a contiguous block of memory starting at a given page. |
| 788 | 804 |
func (db *DB) allocate(count int) (*page, error) {
|
| 789 | 805 |
// Allocate a temporary buffer for the page. |
| 790 |
- buf := make([]byte, count*db.pageSize) |
|
| 806 |
+ var buf []byte |
|
| 807 |
+ if count == 1 {
|
|
| 808 |
+ buf = db.pagePool.Get().([]byte) |
|
| 809 |
+ } else {
|
|
| 810 |
+ buf = make([]byte, count*db.pageSize) |
|
| 811 |
+ } |
|
| 791 | 812 |
p := (*page)(unsafe.Pointer(&buf[0])) |
| 792 | 813 |
p.overflow = uint32(count - 1) |
| 793 | 814 |
|
| ... | ... |
@@ -937,12 +980,12 @@ type meta struct {
|
| 937 | 937 |
|
| 938 | 938 |
// validate checks the marker bytes and version of the meta page to ensure it matches this binary. |
| 939 | 939 |
func (m *meta) validate() error {
|
| 940 |
- if m.checksum != 0 && m.checksum != m.sum64() {
|
|
| 941 |
- return ErrChecksum |
|
| 942 |
- } else if m.magic != magic {
|
|
| 940 |
+ if m.magic != magic {
|
|
| 943 | 941 |
return ErrInvalid |
| 944 | 942 |
} else if m.version != version {
|
| 945 | 943 |
return ErrVersionMismatch |
| 944 |
+ } else if m.checksum != 0 && m.checksum != m.sum64() {
|
|
| 945 |
+ return ErrChecksum |
|
| 946 | 946 |
} |
| 947 | 947 |
return nil |
| 948 | 948 |
} |
| ... | ... |
@@ -12,7 +12,8 @@ var ( |
| 12 | 12 |
// already open. |
| 13 | 13 |
ErrDatabaseOpen = errors.New("database already open")
|
| 14 | 14 |
|
| 15 |
- // ErrInvalid is returned when a data file is not a Bolt-formatted database. |
|
| 15 |
+ // ErrInvalid is returned when both meta pages on a database are invalid. |
|
| 16 |
+ // This typically occurs when a file is not a bolt database. |
|
| 16 | 17 |
ErrInvalid = errors.New("invalid database")
|
| 17 | 18 |
|
| 18 | 19 |
// ErrVersionMismatch is returned when the data file was created with a |
| ... | ... |
@@ -111,13 +111,13 @@ type leafPageElement struct {
|
| 111 | 111 |
// key returns a byte slice of the node key. |
| 112 | 112 |
func (n *leafPageElement) key() []byte {
|
| 113 | 113 |
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) |
| 114 |
- return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize] |
|
| 114 |
+ return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize:n.ksize] |
|
| 115 | 115 |
} |
| 116 | 116 |
|
| 117 | 117 |
// value returns a byte slice of the node value. |
| 118 | 118 |
func (n *leafPageElement) value() []byte {
|
| 119 | 119 |
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n)) |
| 120 |
- return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize] |
|
| 120 |
+ return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize:n.vsize] |
|
| 121 | 121 |
} |
| 122 | 122 |
|
| 123 | 123 |
// PageInfo represents human readable information about a page. |
| ... | ... |
@@ -473,6 +473,8 @@ func (tx *Tx) write() error {
|
| 473 | 473 |
for _, p := range tx.pages {
|
| 474 | 474 |
pages = append(pages, p) |
| 475 | 475 |
} |
| 476 |
+ // Clear out page cache early. |
|
| 477 |
+ tx.pages = make(map[pgid]*page) |
|
| 476 | 478 |
sort.Sort(pages) |
| 477 | 479 |
|
| 478 | 480 |
// Write pages to disk in order. |
| ... | ... |
@@ -517,8 +519,22 @@ func (tx *Tx) write() error {
|
| 517 | 517 |
} |
| 518 | 518 |
} |
| 519 | 519 |
|
| 520 |
- // Clear out page cache. |
|
| 521 |
- tx.pages = make(map[pgid]*page) |
|
| 520 |
+ // Put small pages back to page pool. |
|
| 521 |
+ for _, p := range pages {
|
|
| 522 |
+ // Ignore page sizes over 1 page. |
|
| 523 |
+ // These are allocated using make() instead of the page pool. |
|
| 524 |
+ if int(p.overflow) != 0 {
|
|
| 525 |
+ continue |
|
| 526 |
+ } |
|
| 527 |
+ |
|
| 528 |
+ buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:tx.db.pageSize] |
|
| 529 |
+ |
|
| 530 |
+ // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 |
|
| 531 |
+ for i := range buf {
|
|
| 532 |
+ buf[i] = 0 |
|
| 533 |
+ } |
|
| 534 |
+ tx.db.pagePool.Put(buf) |
|
| 535 |
+ } |
|
| 522 | 536 |
|
| 523 | 537 |
return nil |
| 524 | 538 |
} |