Signed-off-by: Alessandro Boch <aboch@docker.com>
| ... | ... |
@@ -44,7 +44,7 @@ clone git github.com/coreos/etcd v2.2.0 |
| 44 | 44 |
fix_rewritten_imports github.com/coreos/etcd |
| 45 | 45 |
clone git github.com/ugorji/go 5abd4e96a45c386928ed2ca2a7ef63e2533e18ec |
| 46 | 46 |
clone git github.com/hashicorp/consul v0.5.2 |
| 47 |
-clone git github.com/boltdb/bolt v1.1.0 |
|
| 47 |
+clone git github.com/boltdb/bolt v1.2.0 |
|
| 48 | 48 |
clone git github.com/miekg/dns 75e6e86cc601825c5dbcd4e0c209eab180997cd7 |
| 49 | 49 |
|
| 50 | 50 |
# get graph and distribution packages |
| ... | ... |
@@ -1,54 +1,18 @@ |
| 1 |
-TEST=. |
|
| 2 |
-BENCH=. |
|
| 3 |
-COVERPROFILE=/tmp/c.out |
|
| 4 | 1 |
BRANCH=`git rev-parse --abbrev-ref HEAD` |
| 5 | 2 |
COMMIT=`git rev-parse --short HEAD` |
| 6 | 3 |
GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" |
| 7 | 4 |
|
| 8 | 5 |
default: build |
| 9 | 6 |
|
| 10 |
-bench: |
|
| 11 |
- go test -v -test.run=NOTHINCONTAINSTHIS -test.bench=$(BENCH) |
|
| 12 |
- |
|
| 13 |
-# http://cloc.sourceforge.net/ |
|
| 14 |
-cloc: |
|
| 15 |
- @cloc --not-match-f='Makefile|_test.go' . |
|
| 16 |
- |
|
| 17 |
-cover: fmt |
|
| 18 |
- go test -coverprofile=$(COVERPROFILE) -test.run=$(TEST) $(COVERFLAG) . |
|
| 19 |
- go tool cover -html=$(COVERPROFILE) |
|
| 20 |
- rm $(COVERPROFILE) |
|
| 21 |
- |
|
| 22 |
-cpuprofile: fmt |
|
| 23 |
- @go test -c |
|
| 24 |
- @./bolt.test -test.v -test.run=$(TEST) -test.cpuprofile cpu.prof |
|
| 7 |
+race: |
|
| 8 |
+ @go test -v -race -test.run="TestSimulate_(100op|1000op)" |
|
| 25 | 9 |
|
| 26 | 10 |
# go get github.com/kisielk/errcheck |
| 27 | 11 |
errcheck: |
| 28 |
- @echo "=== errcheck ===" |
|
| 29 |
- @errcheck github.com/boltdb/bolt |
|
| 12 |
+ @errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt |
|
| 30 | 13 |
|
| 31 |
-fmt: |
|
| 32 |
- @go fmt ./... |
|
| 33 |
- |
|
| 34 |
-get: |
|
| 35 |
- @go get -d ./... |
|
| 36 |
- |
|
| 37 |
-build: get |
|
| 38 |
- @mkdir -p bin |
|
| 39 |
- @go build -ldflags=$(GOLDFLAGS) -a -o bin/bolt ./cmd/bolt |
|
| 40 |
- |
|
| 41 |
-test: fmt |
|
| 42 |
- @go get github.com/stretchr/testify/assert |
|
| 43 |
- @echo "=== TESTS ===" |
|
| 44 |
- @go test -v -cover -test.run=$(TEST) |
|
| 45 |
- @echo "" |
|
| 46 |
- @echo "" |
|
| 47 |
- @echo "=== CLI ===" |
|
| 48 |
- @go test -v -test.run=$(TEST) ./cmd/bolt |
|
| 49 |
- @echo "" |
|
| 50 |
- @echo "" |
|
| 51 |
- @echo "=== RACE DETECTOR ===" |
|
| 52 |
- @go test -v -race -test.run="TestSimulate_(100op|1000op)" |
|
| 14 |
+test: |
|
| 15 |
+ @go test -v -cover . |
|
| 16 |
+ @go test -v ./cmd/bolt |
|
| 53 | 17 |
|
| 54 |
-.PHONY: bench cloc cover cpuprofile fmt memprofile test |
|
| 18 |
+.PHONY: fmt test |
| ... | ... |
@@ -1,8 +1,8 @@ |
| 1 |
-Bolt [](https://drone.io/github.com/boltdb/bolt/latest) [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  |
|
| 1 |
+Bolt [](https://drone.io/github.com/boltdb/bolt/latest) [](https://coveralls.io/r/boltdb/bolt?branch=master) [](https://godoc.org/github.com/boltdb/bolt)  |
|
| 2 | 2 |
==== |
| 3 | 3 |
|
| 4 |
-Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] and |
|
| 5 |
-the [LMDB project][lmdb]. The goal of the project is to provide a simple, |
|
| 4 |
+Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] |
|
| 5 |
+[LMDB project][lmdb]. The goal of the project is to provide a simple, |
|
| 6 | 6 |
fast, and reliable database for projects that don't require a full database |
| 7 | 7 |
server such as Postgres or MySQL. |
| 8 | 8 |
|
| ... | ... |
@@ -13,7 +13,6 @@ and setting values. That's it. |
| 13 | 13 |
[hyc_symas]: https://twitter.com/hyc_symas |
| 14 | 14 |
[lmdb]: http://symas.com/mdb/ |
| 15 | 15 |
|
| 16 |
- |
|
| 17 | 16 |
## Project Status |
| 18 | 17 |
|
| 19 | 18 |
Bolt is stable and the API is fixed. Full unit test coverage and randomized |
| ... | ... |
@@ -22,6 +21,36 @@ Bolt is currently in high-load production environments serving databases as |
| 22 | 22 |
large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed |
| 23 | 23 |
services every day. |
| 24 | 24 |
|
| 25 |
+## Table of Contents |
|
| 26 |
+ |
|
| 27 |
+- [Getting Started](#getting-started) |
|
| 28 |
+ - [Installing](#installing) |
|
| 29 |
+ - [Opening a database](#opening-a-database) |
|
| 30 |
+ - [Transactions](#transactions) |
|
| 31 |
+ - [Read-write transactions](#read-write-transactions) |
|
| 32 |
+ - [Read-only transactions](#read-only-transactions) |
|
| 33 |
+ - [Batch read-write transactions](#batch-read-write-transactions) |
|
| 34 |
+ - [Managing transactions manually](#managing-transactions-manually) |
|
| 35 |
+ - [Using buckets](#using-buckets) |
|
| 36 |
+ - [Using key/value pairs](#using-keyvalue-pairs) |
|
| 37 |
+ - [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket) |
|
| 38 |
+ - [Iterating over keys](#iterating-over-keys) |
|
| 39 |
+ - [Prefix scans](#prefix-scans) |
|
| 40 |
+ - [Range scans](#range-scans) |
|
| 41 |
+ - [ForEach()](#foreach) |
|
| 42 |
+ - [Nested buckets](#nested-buckets) |
|
| 43 |
+ - [Database backups](#database-backups) |
|
| 44 |
+ - [Statistics](#statistics) |
|
| 45 |
+ - [Read-Only Mode](#read-only-mode) |
|
| 46 |
+ - [Mobile Use (iOS/Android)](#mobile-use-iosandroid) |
|
| 47 |
+- [Resources](#resources) |
|
| 48 |
+- [Comparison with other databases](#comparison-with-other-databases) |
|
| 49 |
+ - [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases) |
|
| 50 |
+ - [LevelDB, RocksDB](#leveldb-rocksdb) |
|
| 51 |
+ - [LMDB](#lmdb) |
|
| 52 |
+- [Caveats & Limitations](#caveats--limitations) |
|
| 53 |
+- [Reading the Source](#reading-the-source) |
|
| 54 |
+- [Other Projects Using Bolt](#other-projects-using-bolt) |
|
| 25 | 55 |
|
| 26 | 56 |
## Getting Started |
| 27 | 57 |
|
| ... | ... |
@@ -180,8 +209,8 @@ and then safely close your transaction if an error is returned. This is the |
| 180 | 180 |
recommended way to use Bolt transactions. |
| 181 | 181 |
|
| 182 | 182 |
However, sometimes you may want to manually start and end your transactions. |
| 183 |
-You can use the `Tx.Begin()` function directly but _please_ be sure to close the |
|
| 184 |
-transaction. |
|
| 183 |
+You can use the `Tx.Begin()` function directly but **please** be sure to close |
|
| 184 |
+the transaction. |
|
| 185 | 185 |
|
| 186 | 186 |
```go |
| 187 | 187 |
// Start a writable transaction. |
| ... | ... |
@@ -269,7 +298,7 @@ then you must use `copy()` to copy it to another byte slice. |
| 269 | 269 |
|
| 270 | 270 |
|
| 271 | 271 |
### Autoincrementing integer for the bucket |
| 272 |
-By using the NextSequence() function, you can let Bolt determine a sequence |
|
| 272 |
+By using the `NextSequence()` function, you can let Bolt determine a sequence |
|
| 273 | 273 |
which can be used as the unique identifier for your key/value pairs. See the |
| 274 | 274 |
example below. |
| 275 | 275 |
|
| ... | ... |
@@ -309,7 +338,6 @@ type User struct {
|
| 309 | 309 |
ID int |
| 310 | 310 |
... |
| 311 | 311 |
} |
| 312 |
- |
|
| 313 | 312 |
``` |
| 314 | 313 |
|
| 315 | 314 |
### Iterating over keys |
| ... | ... |
@@ -320,7 +348,9 @@ iteration over these keys extremely fast. To iterate over keys we'll use a |
| 320 | 320 |
|
| 321 | 321 |
```go |
| 322 | 322 |
db.View(func(tx *bolt.Tx) error {
|
| 323 |
+ // Assume bucket exists and has keys |
|
| 323 | 324 |
b := tx.Bucket([]byte("MyBucket"))
|
| 325 |
+ |
|
| 324 | 326 |
c := b.Cursor() |
| 325 | 327 |
|
| 326 | 328 |
for k, v := c.First(); k != nil; k, v = c.Next() {
|
| ... | ... |
@@ -344,10 +374,15 @@ Next() Move to the next key. |
| 344 | 344 |
Prev() Move to the previous key. |
| 345 | 345 |
``` |
| 346 | 346 |
|
| 347 |
-When you have iterated to the end of the cursor then `Next()` will return `nil`. |
|
| 348 |
-You must seek to a position using `First()`, `Last()`, or `Seek()` before |
|
| 349 |
-calling `Next()` or `Prev()`. If you do not seek to a position then these |
|
| 350 |
-functions will return `nil`. |
|
| 347 |
+Each of those functions has a return signature of `(key []byte, value []byte)`. |
|
| 348 |
+When you have iterated to the end of the cursor then `Next()` will return a |
|
| 349 |
+`nil` key. You must seek to a position using `First()`, `Last()`, or `Seek()` |
|
| 350 |
+before calling `Next()` or `Prev()`. If you do not seek to a position then |
|
| 351 |
+these functions will return a `nil` key. |
|
| 352 |
+ |
|
| 353 |
+During iteration, if the key is non-`nil` but the value is `nil`, that means |
|
| 354 |
+the key refers to a bucket rather than a value. Use `Bucket.Bucket()` to |
|
| 355 |
+access the sub-bucket. |
|
| 351 | 356 |
|
| 352 | 357 |
|
| 353 | 358 |
#### Prefix scans |
| ... | ... |
@@ -356,6 +391,7 @@ To iterate over a key prefix, you can combine `Seek()` and `bytes.HasPrefix()`: |
| 356 | 356 |
|
| 357 | 357 |
```go |
| 358 | 358 |
db.View(func(tx *bolt.Tx) error {
|
| 359 |
+ // Assume bucket exists and has keys |
|
| 359 | 360 |
c := tx.Bucket([]byte("MyBucket")).Cursor()
|
| 360 | 361 |
|
| 361 | 362 |
prefix := []byte("1234")
|
| ... | ... |
@@ -375,7 +411,7 @@ date range like this: |
| 375 | 375 |
|
| 376 | 376 |
```go |
| 377 | 377 |
db.View(func(tx *bolt.Tx) error {
|
| 378 |
- // Assume our events bucket has RFC3339 encoded time keys. |
|
| 378 |
+ // Assume our events bucket exists and has RFC3339 encoded time keys. |
|
| 379 | 379 |
c := tx.Bucket([]byte("Events")).Cursor()
|
| 380 | 380 |
|
| 381 | 381 |
// Our time range spans the 90's decade. |
| ... | ... |
@@ -399,7 +435,9 @@ all the keys in a bucket: |
| 399 | 399 |
|
| 400 | 400 |
```go |
| 401 | 401 |
db.View(func(tx *bolt.Tx) error {
|
| 402 |
+ // Assume bucket exists and has keys |
|
| 402 | 403 |
b := tx.Bucket([]byte("MyBucket"))
|
| 404 |
+ |
|
| 403 | 405 |
b.ForEach(func(k, v []byte) error {
|
| 404 | 406 |
fmt.Printf("key=%s, value=%s\n", k, v)
|
| 405 | 407 |
return nil |
| ... | ... |
@@ -426,8 +464,11 @@ func (*Bucket) DeleteBucket(key []byte) error |
| 426 | 426 |
Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()` |
| 427 | 427 |
function to write a consistent view of the database to a writer. If you call |
| 428 | 428 |
this from a read-only transaction, it will perform a hot backup and not block |
| 429 |
-your other database reads and writes. It will also use `O_DIRECT` when available |
|
| 430 |
-to prevent page cache trashing. |
|
| 429 |
+your other database reads and writes. |
|
| 430 |
+ |
|
| 431 |
+By default, it will use a regular file handle which will utilize the operating |
|
| 432 |
+system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx) |
|
| 433 |
+documentation for information about optimizing for larger-than-RAM datasets. |
|
| 431 | 434 |
|
| 432 | 435 |
One common use case is to backup over HTTP so you can use tools like `cURL` to |
| 433 | 436 |
do database backups: |
| ... | ... |
@@ -509,6 +550,84 @@ if err != nil {
|
| 509 | 509 |
} |
| 510 | 510 |
``` |
| 511 | 511 |
|
| 512 |
+### Mobile Use (iOS/Android) |
|
| 513 |
+ |
|
| 514 |
+Bolt is able to run on mobile devices by leveraging the binding feature of the |
|
| 515 |
+[gomobile](https://github.com/golang/mobile) tool. Create a struct that will |
|
| 516 |
+contain your database logic and a reference to a `*bolt.DB` with a initializing |
|
| 517 |
+contstructor that takes in a filepath where the database file will be stored. |
|
| 518 |
+Neither Android nor iOS require extra permissions or cleanup from using this method. |
|
| 519 |
+ |
|
| 520 |
+```go |
|
| 521 |
+func NewBoltDB(filepath string) *BoltDB {
|
|
| 522 |
+ db, err := bolt.Open(filepath+"/demo.db", 0600, nil) |
|
| 523 |
+ if err != nil {
|
|
| 524 |
+ log.Fatal(err) |
|
| 525 |
+ } |
|
| 526 |
+ |
|
| 527 |
+ return &BoltDB{db}
|
|
| 528 |
+} |
|
| 529 |
+ |
|
| 530 |
+type BoltDB struct {
|
|
| 531 |
+ db *bolt.DB |
|
| 532 |
+ ... |
|
| 533 |
+} |
|
| 534 |
+ |
|
| 535 |
+func (b *BoltDB) Path() string {
|
|
| 536 |
+ return b.db.Path() |
|
| 537 |
+} |
|
| 538 |
+ |
|
| 539 |
+func (b *BoltDB) Close() {
|
|
| 540 |
+ b.db.Close() |
|
| 541 |
+} |
|
| 542 |
+``` |
|
| 543 |
+ |
|
| 544 |
+Database logic should be defined as methods on this wrapper struct. |
|
| 545 |
+ |
|
| 546 |
+To initialize this struct from the native language (both platforms now sync |
|
| 547 |
+their local storage to the cloud. These snippets disable that functionality for the |
|
| 548 |
+database file): |
|
| 549 |
+ |
|
| 550 |
+#### Android |
|
| 551 |
+ |
|
| 552 |
+```java |
|
| 553 |
+String path; |
|
| 554 |
+if (android.os.Build.VERSION.SDK_INT >=android.os.Build.VERSION_CODES.LOLLIPOP){
|
|
| 555 |
+ path = getNoBackupFilesDir().getAbsolutePath(); |
|
| 556 |
+} else{
|
|
| 557 |
+ path = getFilesDir().getAbsolutePath(); |
|
| 558 |
+} |
|
| 559 |
+Boltmobiledemo.BoltDB boltDB = Boltmobiledemo.NewBoltDB(path) |
|
| 560 |
+``` |
|
| 561 |
+ |
|
| 562 |
+#### iOS |
|
| 563 |
+ |
|
| 564 |
+```objc |
|
| 565 |
+- (void)demo {
|
|
| 566 |
+ NSString* path = [NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, |
|
| 567 |
+ NSUserDomainMask, |
|
| 568 |
+ YES) objectAtIndex:0]; |
|
| 569 |
+ GoBoltmobiledemoBoltDB * demo = GoBoltmobiledemoNewBoltDB(path); |
|
| 570 |
+ [self addSkipBackupAttributeToItemAtPath:demo.path]; |
|
| 571 |
+ //Some DB Logic would go here |
|
| 572 |
+ [demo close]; |
|
| 573 |
+} |
|
| 574 |
+ |
|
| 575 |
+- (BOOL)addSkipBackupAttributeToItemAtPath:(NSString *) filePathString |
|
| 576 |
+{
|
|
| 577 |
+ NSURL* URL= [NSURL fileURLWithPath: filePathString]; |
|
| 578 |
+ assert([[NSFileManager defaultManager] fileExistsAtPath: [URL path]]); |
|
| 579 |
+ |
|
| 580 |
+ NSError *error = nil; |
|
| 581 |
+ BOOL success = [URL setResourceValue: [NSNumber numberWithBool: YES] |
|
| 582 |
+ forKey: NSURLIsExcludedFromBackupKey error: &error]; |
|
| 583 |
+ if(!success){
|
|
| 584 |
+ NSLog(@"Error excluding %@ from backup %@", [URL lastPathComponent], error); |
|
| 585 |
+ } |
|
| 586 |
+ return success; |
|
| 587 |
+} |
|
| 588 |
+ |
|
| 589 |
+``` |
|
| 512 | 590 |
|
| 513 | 591 |
## Resources |
| 514 | 592 |
|
| ... | ... |
@@ -544,7 +663,7 @@ they are libraries bundled into the application, however, their underlying |
| 544 | 544 |
structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes |
| 545 | 545 |
random writes by using a write ahead log and multi-tiered, sorted files called |
| 546 | 546 |
SSTables. Bolt uses a B+tree internally and only a single file. Both approaches |
| 547 |
-have trade offs. |
|
| 547 |
+have trade-offs. |
|
| 548 | 548 |
|
| 549 | 549 |
If you require a high random write throughput (>10,000 w/sec) or you need to use |
| 550 | 550 |
spinning disks then LevelDB could be a good choice. If your application is |
| ... | ... |
@@ -580,9 +699,8 @@ It's important to pick the right tool for the job and Bolt is no exception. |
| 580 | 580 |
Here are a few things to note when evaluating and using Bolt: |
| 581 | 581 |
|
| 582 | 582 |
* Bolt is good for read intensive workloads. Sequential write performance is |
| 583 |
- also fast but random writes can be slow. You can add a write-ahead log or |
|
| 584 |
- [transaction coalescer](https://github.com/boltdb/coalescer) in front of Bolt |
|
| 585 |
- to mitigate this issue. |
|
| 583 |
+ also fast but random writes can be slow. You can use `DB.Batch()` or add a |
|
| 584 |
+ write-ahead log to help mitigate this issue. |
|
| 586 | 585 |
|
| 587 | 586 |
* Bolt uses a B+tree internally so there can be a lot of random page access. |
| 588 | 587 |
SSDs provide a significant performance boost over spinning disks. |
| ... | ... |
@@ -618,7 +736,7 @@ Here are a few things to note when evaluating and using Bolt: |
| 618 | 618 |
|
| 619 | 619 |
* The data structures in the Bolt database are memory mapped so the data file |
| 620 | 620 |
will be endian specific. This means that you cannot copy a Bolt file from a |
| 621 |
- little endian machine to a big endian machine and have it work. For most |
|
| 621 |
+ little endian machine to a big endian machine and have it work. For most |
|
| 622 | 622 |
users this is not a concern since most modern CPUs are little endian. |
| 623 | 623 |
|
| 624 | 624 |
* Because of the way pages are laid out on disk, Bolt cannot truncate data files |
| ... | ... |
@@ -633,6 +751,56 @@ Here are a few things to note when evaluating and using Bolt: |
| 633 | 633 |
[page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638 |
| 634 | 634 |
|
| 635 | 635 |
|
| 636 |
+## Reading the Source |
|
| 637 |
+ |
|
| 638 |
+Bolt is a relatively small code base (<3KLOC) for an embedded, serializable, |
|
| 639 |
+transactional key/value database so it can be a good starting point for people |
|
| 640 |
+interested in how databases work. |
|
| 641 |
+ |
|
| 642 |
+The best places to start are the main entry points into Bolt: |
|
| 643 |
+ |
|
| 644 |
+- `Open()` - Initializes the reference to the database. It's responsible for |
|
| 645 |
+ creating the database if it doesn't exist, obtaining an exclusive lock on the |
|
| 646 |
+ file, reading the meta pages, & memory-mapping the file. |
|
| 647 |
+ |
|
| 648 |
+- `DB.Begin()` - Starts a read-only or read-write transaction depending on the |
|
| 649 |
+ value of the `writable` argument. This requires briefly obtaining the "meta" |
|
| 650 |
+ lock to keep track of open transactions. Only one read-write transaction can |
|
| 651 |
+ exist at a time so the "rwlock" is acquired during the life of a read-write |
|
| 652 |
+ transaction. |
|
| 653 |
+ |
|
| 654 |
+- `Bucket.Put()` - Writes a key/value pair into a bucket. After validating the |
|
| 655 |
+ arguments, a cursor is used to traverse the B+tree to the page and position |
|
| 656 |
+ where they key & value will be written. Once the position is found, the bucket |
|
| 657 |
+ materializes the underlying page and the page's parent pages into memory as |
|
| 658 |
+ "nodes". These nodes are where mutations occur during read-write transactions. |
|
| 659 |
+ These changes get flushed to disk during commit. |
|
| 660 |
+ |
|
| 661 |
+- `Bucket.Get()` - Retrieves a key/value pair from a bucket. This uses a cursor |
|
| 662 |
+ to move to the page & position of a key/value pair. During a read-only |
|
| 663 |
+ transaction, the key and value data is returned as a direct reference to the |
|
| 664 |
+ underlying mmap file so there's no allocation overhead. For read-write |
|
| 665 |
+ transactions, this data may reference the mmap file or one of the in-memory |
|
| 666 |
+ node values. |
|
| 667 |
+ |
|
| 668 |
+- `Cursor` - This object is simply for traversing the B+tree of on-disk pages |
|
| 669 |
+ or in-memory nodes. It can seek to a specific key, move to the first or last |
|
| 670 |
+ value, or it can move forward or backward. The cursor handles the movement up |
|
| 671 |
+ and down the B+tree transparently to the end user. |
|
| 672 |
+ |
|
| 673 |
+- `Tx.Commit()` - Converts the in-memory dirty nodes and the list of free pages |
|
| 674 |
+ into pages to be written to disk. Writing to disk then occurs in two phases. |
|
| 675 |
+ First, the dirty pages are written to disk and an `fsync()` occurs. Second, a |
|
| 676 |
+ new meta page with an incremented transaction ID is written and another |
|
| 677 |
+ `fsync()` occurs. This two phase write ensures that partially written data |
|
| 678 |
+ pages are ignored in the event of a crash since the meta page pointing to them |
|
| 679 |
+ is never written. Partially written meta pages are invalidated because they |
|
| 680 |
+ are written with a checksum. |
|
| 681 |
+ |
|
| 682 |
+If you have additional notes that could be helpful for others, please submit |
|
| 683 |
+them via pull request. |
|
| 684 |
+ |
|
| 685 |
+ |
|
| 636 | 686 |
## Other Projects Using Bolt |
| 637 | 687 |
|
| 638 | 688 |
Below is a list of public, open source projects that use Bolt: |
| ... | ... |
@@ -643,21 +811,21 @@ Below is a list of public, open source projects that use Bolt: |
| 643 | 643 |
* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics. |
| 644 | 644 |
* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects. |
| 645 | 645 |
* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday. |
| 646 |
-* [ChainStore](https://github.com/nulayer/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations. |
|
| 646 |
+* [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations. |
|
| 647 | 647 |
* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite. |
| 648 | 648 |
* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin". |
| 649 | 649 |
* [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka. |
| 650 | 650 |
* [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed. |
| 651 | 651 |
* [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt. |
| 652 |
-* [photosite/session](http://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site. |
|
| 652 |
+* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site. |
|
| 653 | 653 |
* [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage. |
| 654 | 654 |
* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters. |
| 655 | 655 |
* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend. |
| 656 | 656 |
* [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend. |
| 657 | 657 |
* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server. |
| 658 | 658 |
* [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database. |
| 659 |
-* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read. |
|
| 660 |
-* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics. |
|
| 659 |
+* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read. |
|
| 660 |
+* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics. |
|
| 661 | 661 |
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data. |
| 662 | 662 |
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system. |
| 663 | 663 |
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware. |
| ... | ... |
@@ -667,5 +835,10 @@ Below is a list of public, open source projects that use Bolt: |
| 667 | 667 |
backed by boltdb. |
| 668 | 668 |
* [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining |
| 669 | 669 |
simple tx and key scans. |
| 670 |
+* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. |
|
| 671 |
+* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service |
|
| 672 |
+* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service. |
|
| 673 |
+* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners. |
|
| 674 |
+* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. |
|
| 670 | 675 |
|
| 671 | 676 |
If you are using Bolt in a project please send a pull request to add it to the list. |
| 672 | 677 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,18 @@ |
| 0 |
+version: "{build}"
|
|
| 1 |
+ |
|
| 2 |
+os: Windows Server 2012 R2 |
|
| 3 |
+ |
|
| 4 |
+clone_folder: c:\gopath\src\github.com\boltdb\bolt |
|
| 5 |
+ |
|
| 6 |
+environment: |
|
| 7 |
+ GOPATH: c:\gopath |
|
| 8 |
+ |
|
| 9 |
+install: |
|
| 10 |
+ - echo %PATH% |
|
| 11 |
+ - echo %GOPATH% |
|
| 12 |
+ - go version |
|
| 13 |
+ - go env |
|
| 14 |
+ - go get -v -t ./... |
|
| 15 |
+ |
|
| 16 |
+build_script: |
|
| 17 |
+ - go test -v ./... |
| 0 | 18 |
deleted file mode 100644 |
| ... | ... |
@@ -1,138 +0,0 @@ |
| 1 |
-package bolt |
|
| 2 |
- |
|
| 3 |
-import ( |
|
| 4 |
- "errors" |
|
| 5 |
- "fmt" |
|
| 6 |
- "sync" |
|
| 7 |
- "time" |
|
| 8 |
-) |
|
| 9 |
- |
|
| 10 |
-// Batch calls fn as part of a batch. It behaves similar to Update, |
|
| 11 |
-// except: |
|
| 12 |
-// |
|
| 13 |
-// 1. concurrent Batch calls can be combined into a single Bolt |
|
| 14 |
-// transaction. |
|
| 15 |
-// |
|
| 16 |
-// 2. the function passed to Batch may be called multiple times, |
|
| 17 |
-// regardless of whether it returns error or not. |
|
| 18 |
-// |
|
| 19 |
-// This means that Batch function side effects must be idempotent and |
|
| 20 |
-// take permanent effect only after a successful return is seen in |
|
| 21 |
-// caller. |
|
| 22 |
-// |
|
| 23 |
-// The maximum batch size and delay can be adjusted with DB.MaxBatchSize |
|
| 24 |
-// and DB.MaxBatchDelay, respectively. |
|
| 25 |
-// |
|
| 26 |
-// Batch is only useful when there are multiple goroutines calling it. |
|
| 27 |
-func (db *DB) Batch(fn func(*Tx) error) error {
|
|
| 28 |
- errCh := make(chan error, 1) |
|
| 29 |
- |
|
| 30 |
- db.batchMu.Lock() |
|
| 31 |
- if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
|
|
| 32 |
- // There is no existing batch, or the existing batch is full; start a new one. |
|
| 33 |
- db.batch = &batch{
|
|
| 34 |
- db: db, |
|
| 35 |
- } |
|
| 36 |
- db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) |
|
| 37 |
- } |
|
| 38 |
- db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
|
|
| 39 |
- if len(db.batch.calls) >= db.MaxBatchSize {
|
|
| 40 |
- // wake up batch, it's ready to run |
|
| 41 |
- go db.batch.trigger() |
|
| 42 |
- } |
|
| 43 |
- db.batchMu.Unlock() |
|
| 44 |
- |
|
| 45 |
- err := <-errCh |
|
| 46 |
- if err == trySolo {
|
|
| 47 |
- err = db.Update(fn) |
|
| 48 |
- } |
|
| 49 |
- return err |
|
| 50 |
-} |
|
| 51 |
- |
|
| 52 |
-type call struct {
|
|
| 53 |
- fn func(*Tx) error |
|
| 54 |
- err chan<- error |
|
| 55 |
-} |
|
| 56 |
- |
|
| 57 |
-type batch struct {
|
|
| 58 |
- db *DB |
|
| 59 |
- timer *time.Timer |
|
| 60 |
- start sync.Once |
|
| 61 |
- calls []call |
|
| 62 |
-} |
|
| 63 |
- |
|
| 64 |
-// trigger runs the batch if it hasn't already been run. |
|
| 65 |
-func (b *batch) trigger() {
|
|
| 66 |
- b.start.Do(b.run) |
|
| 67 |
-} |
|
| 68 |
- |
|
| 69 |
-// run performs the transactions in the batch and communicates results |
|
| 70 |
-// back to DB.Batch. |
|
| 71 |
-func (b *batch) run() {
|
|
| 72 |
- b.db.batchMu.Lock() |
|
| 73 |
- b.timer.Stop() |
|
| 74 |
- // Make sure no new work is added to this batch, but don't break |
|
| 75 |
- // other batches. |
|
| 76 |
- if b.db.batch == b {
|
|
| 77 |
- b.db.batch = nil |
|
| 78 |
- } |
|
| 79 |
- b.db.batchMu.Unlock() |
|
| 80 |
- |
|
| 81 |
-retry: |
|
| 82 |
- for len(b.calls) > 0 {
|
|
| 83 |
- var failIdx = -1 |
|
| 84 |
- err := b.db.Update(func(tx *Tx) error {
|
|
| 85 |
- for i, c := range b.calls {
|
|
| 86 |
- if err := safelyCall(c.fn, tx); err != nil {
|
|
| 87 |
- failIdx = i |
|
| 88 |
- return err |
|
| 89 |
- } |
|
| 90 |
- } |
|
| 91 |
- return nil |
|
| 92 |
- }) |
|
| 93 |
- |
|
| 94 |
- if failIdx >= 0 {
|
|
| 95 |
- // take the failing transaction out of the batch. it's |
|
| 96 |
- // safe to shorten b.calls here because db.batch no longer |
|
| 97 |
- // points to us, and we hold the mutex anyway. |
|
| 98 |
- c := b.calls[failIdx] |
|
| 99 |
- b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] |
|
| 100 |
- // tell the submitter re-run it solo, continue with the rest of the batch |
|
| 101 |
- c.err <- trySolo |
|
| 102 |
- continue retry |
|
| 103 |
- } |
|
| 104 |
- |
|
| 105 |
- // pass success, or bolt internal errors, to all callers |
|
| 106 |
- for _, c := range b.calls {
|
|
| 107 |
- if c.err != nil {
|
|
| 108 |
- c.err <- err |
|
| 109 |
- } |
|
| 110 |
- } |
|
| 111 |
- break retry |
|
| 112 |
- } |
|
| 113 |
-} |
|
| 114 |
- |
|
| 115 |
-// trySolo is a special sentinel error value used for signaling that a |
|
| 116 |
-// transaction function should be re-run. It should never be seen by |
|
| 117 |
-// callers. |
|
| 118 |
-var trySolo = errors.New("batch function returned an error and should be re-run solo")
|
|
| 119 |
- |
|
| 120 |
-type panicked struct {
|
|
| 121 |
- reason interface{}
|
|
| 122 |
-} |
|
| 123 |
- |
|
| 124 |
-func (p panicked) Error() string {
|
|
| 125 |
- if err, ok := p.reason.(error); ok {
|
|
| 126 |
- return err.Error() |
|
| 127 |
- } |
|
| 128 |
- return fmt.Sprintf("panic: %v", p.reason)
|
|
| 129 |
-} |
|
| 130 |
- |
|
| 131 |
-func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
|
|
| 132 |
- defer func() {
|
|
| 133 |
- if p := recover(); p != nil {
|
|
| 134 |
- err = panicked{p}
|
|
| 135 |
- } |
|
| 136 |
- }() |
|
| 137 |
- return fn(tx) |
|
| 138 |
-} |
| 19 | 17 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,9 @@ |
| 0 |
+// +build ppc |
|
| 1 |
+ |
|
| 2 |
+package bolt |
|
| 3 |
+ |
|
| 4 |
+// maxMapSize represents the largest mmap size supported by Bolt. |
|
| 5 |
+const maxMapSize = 0x7FFFFFFF // 2GB |
|
| 6 |
+ |
|
| 7 |
+// maxAllocSize is the size used when creating array pointers. |
|
| 8 |
+const maxAllocSize = 0xFFFFFFF |
| 0 | 9 |
new file mode 100644 |
| ... | ... |
@@ -0,0 +1,9 @@ |
| 0 |
+// +build ppc64 |
|
| 1 |
+ |
|
| 2 |
+package bolt |
|
| 3 |
+ |
|
| 4 |
+// maxMapSize represents the largest mmap size supported by Bolt. |
|
| 5 |
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB |
|
| 6 |
+ |
|
| 7 |
+// maxAllocSize is the size used when creating array pointers. |
|
| 8 |
+const maxAllocSize = 0x7FFFFFFF |
| ... | ... |
@@ -11,7 +11,7 @@ import ( |
| 11 | 11 |
) |
| 12 | 12 |
|
| 13 | 13 |
// flock acquires an advisory lock on a file descriptor. |
| 14 |
-func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
|
| 14 |
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
|
| 15 | 15 |
var t time.Time |
| 16 | 16 |
for {
|
| 17 | 17 |
// If we're beyond our timeout then return an error. |
| ... | ... |
@@ -27,7 +27,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
| 27 | 27 |
} |
| 28 | 28 |
|
| 29 | 29 |
// Otherwise attempt to obtain an exclusive lock. |
| 30 |
- err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB) |
|
| 30 |
+ err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB) |
|
| 31 | 31 |
if err == nil {
|
| 32 | 32 |
return nil |
| 33 | 33 |
} else if err != syscall.EWOULDBLOCK {
|
| ... | ... |
@@ -40,25 +40,14 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
| 40 | 40 |
} |
| 41 | 41 |
|
| 42 | 42 |
// funlock releases an advisory lock on a file descriptor. |
| 43 |
-func funlock(f *os.File) error {
|
|
| 44 |
- return syscall.Flock(int(f.Fd()), syscall.LOCK_UN) |
|
| 43 |
+func funlock(db *DB) error {
|
|
| 44 |
+ return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN) |
|
| 45 | 45 |
} |
| 46 | 46 |
|
| 47 | 47 |
// mmap memory maps a DB's data file. |
| 48 | 48 |
func mmap(db *DB, sz int) error {
|
| 49 |
- // Truncate and fsync to ensure file size metadata is flushed. |
|
| 50 |
- // https://github.com/boltdb/bolt/issues/284 |
|
| 51 |
- if !db.NoGrowSync && !db.readOnly {
|
|
| 52 |
- if err := db.file.Truncate(int64(sz)); err != nil {
|
|
| 53 |
- return fmt.Errorf("file resize error: %s", err)
|
|
| 54 |
- } |
|
| 55 |
- if err := db.file.Sync(); err != nil {
|
|
| 56 |
- return fmt.Errorf("file sync error: %s", err)
|
|
| 57 |
- } |
|
| 58 |
- } |
|
| 59 |
- |
|
| 60 | 49 |
// Map the data file to memory. |
| 61 |
- b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED) |
|
| 50 |
+ b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) |
|
| 62 | 51 |
if err != nil {
|
| 63 | 52 |
return err |
| 64 | 53 |
} |
| ... | ... |
@@ -1,4 +1,3 @@ |
| 1 |
- |
|
| 2 | 1 |
package bolt |
| 3 | 2 |
|
| 4 | 3 |
import ( |
| ... | ... |
@@ -7,11 +6,12 @@ import ( |
| 7 | 7 |
"syscall" |
| 8 | 8 |
"time" |
| 9 | 9 |
"unsafe" |
| 10 |
+ |
|
| 10 | 11 |
"golang.org/x/sys/unix" |
| 11 | 12 |
) |
| 12 | 13 |
|
| 13 | 14 |
// flock acquires an advisory lock on a file descriptor. |
| 14 |
-func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
|
| 15 |
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
|
| 15 | 16 |
var t time.Time |
| 16 | 17 |
for {
|
| 17 | 18 |
// If we're beyond our timeout then return an error. |
| ... | ... |
@@ -32,7 +32,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
| 32 | 32 |
} else {
|
| 33 | 33 |
lock.Type = syscall.F_RDLCK |
| 34 | 34 |
} |
| 35 |
- err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock) |
|
| 35 |
+ err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock) |
|
| 36 | 36 |
if err == nil {
|
| 37 | 37 |
return nil |
| 38 | 38 |
} else if err != syscall.EAGAIN {
|
| ... | ... |
@@ -45,30 +45,19 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
|
| 45 | 45 |
} |
| 46 | 46 |
|
| 47 | 47 |
// funlock releases an advisory lock on a file descriptor. |
| 48 |
-func funlock(f *os.File) error {
|
|
| 48 |
+func funlock(db *DB) error {
|
|
| 49 | 49 |
var lock syscall.Flock_t |
| 50 | 50 |
lock.Start = 0 |
| 51 | 51 |
lock.Len = 0 |
| 52 | 52 |
lock.Type = syscall.F_UNLCK |
| 53 | 53 |
lock.Whence = 0 |
| 54 |
- return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock) |
|
| 54 |
+ return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock) |
|
| 55 | 55 |
} |
| 56 | 56 |
|
| 57 | 57 |
// mmap memory maps a DB's data file. |
| 58 | 58 |
func mmap(db *DB, sz int) error {
|
| 59 |
- // Truncate and fsync to ensure file size metadata is flushed. |
|
| 60 |
- // https://github.com/boltdb/bolt/issues/284 |
|
| 61 |
- if !db.NoGrowSync && !db.readOnly {
|
|
| 62 |
- if err := db.file.Truncate(int64(sz)); err != nil {
|
|
| 63 |
- return fmt.Errorf("file resize error: %s", err)
|
|
| 64 |
- } |
|
| 65 |
- if err := db.file.Sync(); err != nil {
|
|
| 66 |
- return fmt.Errorf("file sync error: %s", err)
|
|
| 67 |
- } |
|
| 68 |
- } |
|
| 69 |
- |
|
| 70 | 59 |
// Map the data file to memory. |
| 71 |
- b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED) |
|
| 60 |
+ b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags) |
|
| 72 | 61 |
if err != nil {
|
| 73 | 62 |
return err |
| 74 | 63 |
} |
| ... | ... |
@@ -8,7 +8,39 @@ import ( |
| 8 | 8 |
"unsafe" |
| 9 | 9 |
) |
| 10 | 10 |
|
| 11 |
-var odirect int |
|
| 11 |
+// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1 |
|
| 12 |
+var ( |
|
| 13 |
+ modkernel32 = syscall.NewLazyDLL("kernel32.dll")
|
|
| 14 |
+ procLockFileEx = modkernel32.NewProc("LockFileEx")
|
|
| 15 |
+ procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
|
|
| 16 |
+) |
|
| 17 |
+ |
|
| 18 |
+const ( |
|
| 19 |
+ lockExt = ".lock" |
|
| 20 |
+ |
|
| 21 |
+ // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx |
|
| 22 |
+ flagLockExclusive = 2 |
|
| 23 |
+ flagLockFailImmediately = 1 |
|
| 24 |
+ |
|
| 25 |
+ // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx |
|
| 26 |
+ errLockViolation syscall.Errno = 0x21 |
|
| 27 |
+) |
|
| 28 |
+ |
|
| 29 |
+func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
|
| 30 |
+ r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol))) |
|
| 31 |
+ if r == 0 {
|
|
| 32 |
+ return err |
|
| 33 |
+ } |
|
| 34 |
+ return nil |
|
| 35 |
+} |
|
| 36 |
+ |
|
| 37 |
+func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
|
|
| 38 |
+ r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0) |
|
| 39 |
+ if r == 0 {
|
|
| 40 |
+ return err |
|
| 41 |
+ } |
|
| 42 |
+ return nil |
|
| 43 |
+} |
|
| 12 | 44 |
|
| 13 | 45 |
// fdatasync flushes written data to a file descriptor. |
| 14 | 46 |
func fdatasync(db *DB) error {
|
| ... | ... |
@@ -16,13 +48,49 @@ func fdatasync(db *DB) error {
|
| 16 | 16 |
} |
| 17 | 17 |
|
| 18 | 18 |
// flock acquires an advisory lock on a file descriptor. |
| 19 |
-func flock(f *os.File, _ bool, _ time.Duration) error {
|
|
| 20 |
- return nil |
|
| 19 |
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
|
|
| 20 |
+ // Create a separate lock file on windows because a process |
|
| 21 |
+ // cannot share an exclusive lock on the same file. This is |
|
| 22 |
+ // needed during Tx.WriteTo(). |
|
| 23 |
+ f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode) |
|
| 24 |
+ if err != nil {
|
|
| 25 |
+ return err |
|
| 26 |
+ } |
|
| 27 |
+ db.lockfile = f |
|
| 28 |
+ |
|
| 29 |
+ var t time.Time |
|
| 30 |
+ for {
|
|
| 31 |
+ // If we're beyond our timeout then return an error. |
|
| 32 |
+ // This can only occur after we've attempted a flock once. |
|
| 33 |
+ if t.IsZero() {
|
|
| 34 |
+ t = time.Now() |
|
| 35 |
+ } else if timeout > 0 && time.Since(t) > timeout {
|
|
| 36 |
+ return ErrTimeout |
|
| 37 |
+ } |
|
| 38 |
+ |
|
| 39 |
+ var flag uint32 = flagLockFailImmediately |
|
| 40 |
+ if exclusive {
|
|
| 41 |
+ flag |= flagLockExclusive |
|
| 42 |
+ } |
|
| 43 |
+ |
|
| 44 |
+ err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
|
|
| 45 |
+ if err == nil {
|
|
| 46 |
+ return nil |
|
| 47 |
+ } else if err != errLockViolation {
|
|
| 48 |
+ return err |
|
| 49 |
+ } |
|
| 50 |
+ |
|
| 51 |
+ // Wait for a bit and try again. |
|
| 52 |
+ time.Sleep(50 * time.Millisecond) |
|
| 53 |
+ } |
|
| 21 | 54 |
} |
| 22 | 55 |
|
| 23 | 56 |
// funlock releases an advisory lock on a file descriptor. |
| 24 |
-func funlock(f *os.File) error {
|
|
| 25 |
- return nil |
|
| 57 |
+func funlock(db *DB) error {
|
|
| 58 |
+ err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
|
|
| 59 |
+ db.lockfile.Close() |
|
| 60 |
+ os.Remove(db.path+lockExt) |
|
| 61 |
+ return err |
|
| 26 | 62 |
} |
| 27 | 63 |
|
| 28 | 64 |
// mmap memory maps a DB's data file. |
| ... | ... |
@@ -11,7 +11,7 @@ const ( |
| 11 | 11 |
MaxKeySize = 32768 |
| 12 | 12 |
|
| 13 | 13 |
// MaxValueSize is the maximum length of a value, in bytes. |
| 14 |
- MaxValueSize = 4294967295 |
|
| 14 |
+ MaxValueSize = (1 << 31) - 2 |
|
| 15 | 15 |
) |
| 16 | 16 |
|
| 17 | 17 |
const ( |
| ... | ... |
@@ -273,6 +273,7 @@ func (b *Bucket) Get(key []byte) []byte {
|
| 273 | 273 |
|
| 274 | 274 |
// Put sets the value for a key in the bucket. |
| 275 | 275 |
// If the key exist then its previous value will be overwritten. |
| 276 |
+// Supplied value must remain valid for the life of the transaction. |
|
| 276 | 277 |
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large. |
| 277 | 278 |
func (b *Bucket) Put(key []byte, value []byte) error {
|
| 278 | 279 |
if b.tx.db == nil {
|
| ... | ... |
@@ -34,6 +34,13 @@ func (c *Cursor) First() (key []byte, value []byte) {
|
| 34 | 34 |
p, n := c.bucket.pageNode(c.bucket.root) |
| 35 | 35 |
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
|
| 36 | 36 |
c.first() |
| 37 |
+ |
|
| 38 |
+ // If we land on an empty page then move to the next value. |
|
| 39 |
+ // https://github.com/boltdb/bolt/issues/450 |
|
| 40 |
+ if c.stack[len(c.stack)-1].count() == 0 {
|
|
| 41 |
+ c.next() |
|
| 42 |
+ } |
|
| 43 |
+ |
|
| 37 | 44 |
k, v, flags := c.keyValue() |
| 38 | 45 |
if (flags & uint32(bucketLeafFlag)) != 0 {
|
| 39 | 46 |
return k, nil |
| ... | ... |
@@ -209,28 +216,37 @@ func (c *Cursor) last() {
|
| 209 | 209 |
// next moves to the next leaf element and returns the key and value. |
| 210 | 210 |
// If the cursor is at the last leaf element then it stays there and returns nil. |
| 211 | 211 |
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
|
| 212 |
- // Attempt to move over one element until we're successful. |
|
| 213 |
- // Move up the stack as we hit the end of each page in our stack. |
|
| 214 |
- var i int |
|
| 215 |
- for i = len(c.stack) - 1; i >= 0; i-- {
|
|
| 216 |
- elem := &c.stack[i] |
|
| 217 |
- if elem.index < elem.count()-1 {
|
|
| 218 |
- elem.index++ |
|
| 219 |
- break |
|
| 212 |
+ for {
|
|
| 213 |
+ // Attempt to move over one element until we're successful. |
|
| 214 |
+ // Move up the stack as we hit the end of each page in our stack. |
|
| 215 |
+ var i int |
|
| 216 |
+ for i = len(c.stack) - 1; i >= 0; i-- {
|
|
| 217 |
+ elem := &c.stack[i] |
|
| 218 |
+ if elem.index < elem.count()-1 {
|
|
| 219 |
+ elem.index++ |
|
| 220 |
+ break |
|
| 221 |
+ } |
|
| 220 | 222 |
} |
| 221 |
- } |
|
| 222 | 223 |
|
| 223 |
- // If we've hit the root page then stop and return. This will leave the |
|
| 224 |
- // cursor on the last element of the last page. |
|
| 225 |
- if i == -1 {
|
|
| 226 |
- return nil, nil, 0 |
|
| 227 |
- } |
|
| 224 |
+ // If we've hit the root page then stop and return. This will leave the |
|
| 225 |
+ // cursor on the last element of the last page. |
|
| 226 |
+ if i == -1 {
|
|
| 227 |
+ return nil, nil, 0 |
|
| 228 |
+ } |
|
| 228 | 229 |
|
| 229 |
- // Otherwise start from where we left off in the stack and find the |
|
| 230 |
- // first element of the first leaf page. |
|
| 231 |
- c.stack = c.stack[:i+1] |
|
| 232 |
- c.first() |
|
| 233 |
- return c.keyValue() |
|
| 230 |
+ // Otherwise start from where we left off in the stack and find the |
|
| 231 |
+ // first element of the first leaf page. |
|
| 232 |
+ c.stack = c.stack[:i+1] |
|
| 233 |
+ c.first() |
|
| 234 |
+ |
|
| 235 |
+ // If this is an empty page then restart and move back up the stack. |
|
| 236 |
+ // https://github.com/boltdb/bolt/issues/450 |
|
| 237 |
+ if c.stack[len(c.stack)-1].count() == 0 {
|
|
| 238 |
+ continue |
|
| 239 |
+ } |
|
| 240 |
+ |
|
| 241 |
+ return c.keyValue() |
|
| 242 |
+ } |
|
| 234 | 243 |
} |
| 235 | 244 |
|
| 236 | 245 |
// search recursively performs a binary search against a given page/node until it finds a given key. |
| ... | ... |
@@ -1,8 +1,10 @@ |
| 1 | 1 |
package bolt |
| 2 | 2 |
|
| 3 | 3 |
import ( |
| 4 |
+ "errors" |
|
| 4 | 5 |
"fmt" |
| 5 | 6 |
"hash/fnv" |
| 7 |
+ "log" |
|
| 6 | 8 |
"os" |
| 7 | 9 |
"runtime" |
| 8 | 10 |
"runtime/debug" |
| ... | ... |
@@ -24,13 +26,14 @@ const magic uint32 = 0xED0CDAED |
| 24 | 24 |
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when |
| 25 | 25 |
// syncing changes to a file. This is required as some operating systems, |
| 26 | 26 |
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes |
| 27 |
-// must be synchronzied using the msync(2) syscall. |
|
| 27 |
+// must be synchronized using the msync(2) syscall. |
|
| 28 | 28 |
const IgnoreNoSync = runtime.GOOS == "openbsd" |
| 29 | 29 |
|
| 30 | 30 |
// Default values if not set in a DB instance. |
| 31 | 31 |
const ( |
| 32 | 32 |
DefaultMaxBatchSize int = 1000 |
| 33 | 33 |
DefaultMaxBatchDelay = 10 * time.Millisecond |
| 34 |
+ DefaultAllocSize = 16 * 1024 * 1024 |
|
| 34 | 35 |
) |
| 35 | 36 |
|
| 36 | 37 |
// DB represents a collection of buckets persisted to a file on disk. |
| ... | ... |
@@ -63,6 +66,10 @@ type DB struct {
|
| 63 | 63 |
// https://github.com/boltdb/bolt/issues/284 |
| 64 | 64 |
NoGrowSync bool |
| 65 | 65 |
|
| 66 |
+ // If you want to read the entire database fast, you can set MmapFlag to |
|
| 67 |
+ // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. |
|
| 68 |
+ MmapFlags int |
|
| 69 |
+ |
|
| 66 | 70 |
// MaxBatchSize is the maximum size of a batch. Default value is |
| 67 | 71 |
// copied from DefaultMaxBatchSize in Open. |
| 68 | 72 |
// |
| ... | ... |
@@ -79,11 +86,18 @@ type DB struct {
|
| 79 | 79 |
// Do not change concurrently with calls to Batch. |
| 80 | 80 |
MaxBatchDelay time.Duration |
| 81 | 81 |
|
| 82 |
+ // AllocSize is the amount of space allocated when the database |
|
| 83 |
+ // needs to create new pages. This is done to amortize the cost |
|
| 84 |
+ // of truncate() and fsync() when growing the data file. |
|
| 85 |
+ AllocSize int |
|
| 86 |
+ |
|
| 82 | 87 |
path string |
| 83 | 88 |
file *os.File |
| 89 |
+ lockfile *os.File // windows only |
|
| 84 | 90 |
dataref []byte // mmap'ed readonly, write throws SEGV |
| 85 | 91 |
data *[maxMapSize]byte |
| 86 | 92 |
datasz int |
| 93 |
+ filesz int // current on disk file size |
|
| 87 | 94 |
meta0 *meta |
| 88 | 95 |
meta1 *meta |
| 89 | 96 |
pageSize int |
| ... | ... |
@@ -136,10 +150,12 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
| 136 | 136 |
options = DefaultOptions |
| 137 | 137 |
} |
| 138 | 138 |
db.NoGrowSync = options.NoGrowSync |
| 139 |
+ db.MmapFlags = options.MmapFlags |
|
| 139 | 140 |
|
| 140 | 141 |
// Set default values for later DB operations. |
| 141 | 142 |
db.MaxBatchSize = DefaultMaxBatchSize |
| 142 | 143 |
db.MaxBatchDelay = DefaultMaxBatchDelay |
| 144 |
+ db.AllocSize = DefaultAllocSize |
|
| 143 | 145 |
|
| 144 | 146 |
flag := os.O_RDWR |
| 145 | 147 |
if options.ReadOnly {
|
| ... | ... |
@@ -162,7 +178,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
| 162 | 162 |
// if !options.ReadOnly. |
| 163 | 163 |
// The database file is locked using the shared lock (more than one process may |
| 164 | 164 |
// hold a lock at the same time) otherwise (options.ReadOnly is set). |
| 165 |
- if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
|
|
| 165 |
+ if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
|
|
| 166 | 166 |
_ = db.close() |
| 167 | 167 |
return nil, err |
| 168 | 168 |
} |
| ... | ... |
@@ -172,7 +188,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
| 172 | 172 |
|
| 173 | 173 |
// Initialize the database if it doesn't exist. |
| 174 | 174 |
if info, err := db.file.Stat(); err != nil {
|
| 175 |
- return nil, fmt.Errorf("stat error: %s", err)
|
|
| 175 |
+ return nil, err |
|
| 176 | 176 |
} else if info.Size() == 0 {
|
| 177 | 177 |
// Initialize new files with meta pages. |
| 178 | 178 |
if err := db.init(); err != nil {
|
| ... | ... |
@@ -184,14 +200,14 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
|
| 184 | 184 |
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
|
| 185 | 185 |
m := db.pageInBuffer(buf[:], 0).meta() |
| 186 | 186 |
if err := m.validate(); err != nil {
|
| 187 |
- return nil, fmt.Errorf("meta0 error: %s", err)
|
|
| 187 |
+ return nil, err |
|
| 188 | 188 |
} |
| 189 | 189 |
db.pageSize = int(m.pageSize) |
| 190 | 190 |
} |
| 191 | 191 |
} |
| 192 | 192 |
|
| 193 | 193 |
// Memory map the data file. |
| 194 |
- if err := db.mmap(0); err != nil {
|
|
| 194 |
+ if err := db.mmap(options.InitialMmapSize); err != nil {
|
|
| 195 | 195 |
_ = db.close() |
| 196 | 196 |
return nil, err |
| 197 | 197 |
} |
| ... | ... |
@@ -248,10 +264,10 @@ func (db *DB) mmap(minsz int) error {
|
| 248 | 248 |
|
| 249 | 249 |
// Validate the meta pages. |
| 250 | 250 |
if err := db.meta0.validate(); err != nil {
|
| 251 |
- return fmt.Errorf("meta0 error: %s", err)
|
|
| 251 |
+ return err |
|
| 252 | 252 |
} |
| 253 | 253 |
if err := db.meta1.validate(); err != nil {
|
| 254 |
- return fmt.Errorf("meta1 error: %s", err)
|
|
| 254 |
+ return err |
|
| 255 | 255 |
} |
| 256 | 256 |
|
| 257 | 257 |
return nil |
| ... | ... |
@@ -266,7 +282,7 @@ func (db *DB) munmap() error {
|
| 266 | 266 |
} |
| 267 | 267 |
|
| 268 | 268 |
// mmapSize determines the appropriate size for the mmap given the current size |
| 269 |
-// of the database. The minimum size is 1MB and doubles until it reaches 1GB. |
|
| 269 |
+// of the database. The minimum size is 32KB and doubles until it reaches 1GB. |
|
| 270 | 270 |
// Returns an error if the new mmap size is greater than the max allowed. |
| 271 | 271 |
func (db *DB) mmapSize(size int) (int, error) {
|
| 272 | 272 |
// Double the size from 32KB until 1GB. |
| ... | ... |
@@ -364,6 +380,10 @@ func (db *DB) Close() error {
|
| 364 | 364 |
} |
| 365 | 365 |
|
| 366 | 366 |
func (db *DB) close() error {
|
| 367 |
+ if !db.opened {
|
|
| 368 |
+ return nil |
|
| 369 |
+ } |
|
| 370 |
+ |
|
| 367 | 371 |
db.opened = false |
| 368 | 372 |
|
| 369 | 373 |
db.freelist = nil |
| ... | ... |
@@ -382,7 +402,9 @@ func (db *DB) close() error {
|
| 382 | 382 |
// No need to unlock read-only file. |
| 383 | 383 |
if !db.readOnly {
|
| 384 | 384 |
// Unlock the file. |
| 385 |
- _ = funlock(db.file) |
|
| 385 |
+ if err := funlock(db); err != nil {
|
|
| 386 |
+ log.Printf("bolt.Close(): funlock error: %s", err)
|
|
| 387 |
+ } |
|
| 386 | 388 |
} |
| 387 | 389 |
|
| 388 | 390 |
// Close the file descriptor. |
| ... | ... |
@@ -401,11 +423,15 @@ func (db *DB) close() error {
|
| 401 | 401 |
// will cause the calls to block and be serialized until the current write |
| 402 | 402 |
// transaction finishes. |
| 403 | 403 |
// |
| 404 |
-// Transactions should not be depedent on one another. Opening a read |
|
| 404 |
+// Transactions should not be dependent on one another. Opening a read |
|
| 405 | 405 |
// transaction and a write transaction in the same goroutine can cause the |
| 406 | 406 |
// writer to deadlock because the database periodically needs to re-mmap itself |
| 407 | 407 |
// as it grows and it cannot do that while a read transaction is open. |
| 408 | 408 |
// |
| 409 |
+// If a long running read transaction (for example, a snapshot transaction) is |
|
| 410 |
+// needed, you might want to set DB.InitialMmapSize to a large enough value |
|
| 411 |
+// to avoid potential blocking of write transaction. |
|
| 412 |
+// |
|
| 409 | 413 |
// IMPORTANT: You must close read-only transactions after you are finished or |
| 410 | 414 |
// else the database will not reclaim old pages. |
| 411 | 415 |
func (db *DB) Begin(writable bool) (*Tx, error) {
|
| ... | ... |
@@ -589,6 +615,136 @@ func (db *DB) View(fn func(*Tx) error) error {
|
| 589 | 589 |
return nil |
| 590 | 590 |
} |
| 591 | 591 |
|
| 592 |
+// Batch calls fn as part of a batch. It behaves similar to Update, |
|
| 593 |
+// except: |
|
| 594 |
+// |
|
| 595 |
+// 1. concurrent Batch calls can be combined into a single Bolt |
|
| 596 |
+// transaction. |
|
| 597 |
+// |
|
| 598 |
+// 2. the function passed to Batch may be called multiple times, |
|
| 599 |
+// regardless of whether it returns error or not. |
|
| 600 |
+// |
|
| 601 |
+// This means that Batch function side effects must be idempotent and |
|
| 602 |
+// take permanent effect only after a successful return is seen in |
|
| 603 |
+// caller. |
|
| 604 |
+// |
|
| 605 |
+// The maximum batch size and delay can be adjusted with DB.MaxBatchSize |
|
| 606 |
+// and DB.MaxBatchDelay, respectively. |
|
| 607 |
+// |
|
| 608 |
+// Batch is only useful when there are multiple goroutines calling it. |
|
| 609 |
+func (db *DB) Batch(fn func(*Tx) error) error {
|
|
| 610 |
+ errCh := make(chan error, 1) |
|
| 611 |
+ |
|
| 612 |
+ db.batchMu.Lock() |
|
| 613 |
+ if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
|
|
| 614 |
+ // There is no existing batch, or the existing batch is full; start a new one. |
|
| 615 |
+ db.batch = &batch{
|
|
| 616 |
+ db: db, |
|
| 617 |
+ } |
|
| 618 |
+ db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger) |
|
| 619 |
+ } |
|
| 620 |
+ db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
|
|
| 621 |
+ if len(db.batch.calls) >= db.MaxBatchSize {
|
|
| 622 |
+ // wake up batch, it's ready to run |
|
| 623 |
+ go db.batch.trigger() |
|
| 624 |
+ } |
|
| 625 |
+ db.batchMu.Unlock() |
|
| 626 |
+ |
|
| 627 |
+ err := <-errCh |
|
| 628 |
+ if err == trySolo {
|
|
| 629 |
+ err = db.Update(fn) |
|
| 630 |
+ } |
|
| 631 |
+ return err |
|
| 632 |
+} |
|
| 633 |
+ |
|
| 634 |
+type call struct {
|
|
| 635 |
+ fn func(*Tx) error |
|
| 636 |
+ err chan<- error |
|
| 637 |
+} |
|
| 638 |
+ |
|
| 639 |
+type batch struct {
|
|
| 640 |
+ db *DB |
|
| 641 |
+ timer *time.Timer |
|
| 642 |
+ start sync.Once |
|
| 643 |
+ calls []call |
|
| 644 |
+} |
|
| 645 |
+ |
|
| 646 |
+// trigger runs the batch if it hasn't already been run. |
|
| 647 |
+func (b *batch) trigger() {
|
|
| 648 |
+ b.start.Do(b.run) |
|
| 649 |
+} |
|
| 650 |
+ |
|
| 651 |
+// run performs the transactions in the batch and communicates results |
|
| 652 |
+// back to DB.Batch. |
|
| 653 |
+func (b *batch) run() {
|
|
| 654 |
+ b.db.batchMu.Lock() |
|
| 655 |
+ b.timer.Stop() |
|
| 656 |
+ // Make sure no new work is added to this batch, but don't break |
|
| 657 |
+ // other batches. |
|
| 658 |
+ if b.db.batch == b {
|
|
| 659 |
+ b.db.batch = nil |
|
| 660 |
+ } |
|
| 661 |
+ b.db.batchMu.Unlock() |
|
| 662 |
+ |
|
| 663 |
+retry: |
|
| 664 |
+ for len(b.calls) > 0 {
|
|
| 665 |
+ var failIdx = -1 |
|
| 666 |
+ err := b.db.Update(func(tx *Tx) error {
|
|
| 667 |
+ for i, c := range b.calls {
|
|
| 668 |
+ if err := safelyCall(c.fn, tx); err != nil {
|
|
| 669 |
+ failIdx = i |
|
| 670 |
+ return err |
|
| 671 |
+ } |
|
| 672 |
+ } |
|
| 673 |
+ return nil |
|
| 674 |
+ }) |
|
| 675 |
+ |
|
| 676 |
+ if failIdx >= 0 {
|
|
| 677 |
+ // take the failing transaction out of the batch. it's |
|
| 678 |
+ // safe to shorten b.calls here because db.batch no longer |
|
| 679 |
+ // points to us, and we hold the mutex anyway. |
|
| 680 |
+ c := b.calls[failIdx] |
|
| 681 |
+ b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1] |
|
| 682 |
+ // tell the submitter re-run it solo, continue with the rest of the batch |
|
| 683 |
+ c.err <- trySolo |
|
| 684 |
+ continue retry |
|
| 685 |
+ } |
|
| 686 |
+ |
|
| 687 |
+ // pass success, or bolt internal errors, to all callers |
|
| 688 |
+ for _, c := range b.calls {
|
|
| 689 |
+ if c.err != nil {
|
|
| 690 |
+ c.err <- err |
|
| 691 |
+ } |
|
| 692 |
+ } |
|
| 693 |
+ break retry |
|
| 694 |
+ } |
|
| 695 |
+} |
|
| 696 |
+ |
|
| 697 |
+// trySolo is a special sentinel error value used for signaling that a |
|
| 698 |
+// transaction function should be re-run. It should never be seen by |
|
| 699 |
+// callers. |
|
| 700 |
+var trySolo = errors.New("batch function returned an error and should be re-run solo")
|
|
| 701 |
+ |
|
| 702 |
+type panicked struct {
|
|
| 703 |
+ reason interface{}
|
|
| 704 |
+} |
|
| 705 |
+ |
|
| 706 |
+func (p panicked) Error() string {
|
|
| 707 |
+ if err, ok := p.reason.(error); ok {
|
|
| 708 |
+ return err.Error() |
|
| 709 |
+ } |
|
| 710 |
+ return fmt.Sprintf("panic: %v", p.reason)
|
|
| 711 |
+} |
|
| 712 |
+ |
|
| 713 |
+func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
|
|
| 714 |
+ defer func() {
|
|
| 715 |
+ if p := recover(); p != nil {
|
|
| 716 |
+ err = panicked{p}
|
|
| 717 |
+ } |
|
| 718 |
+ }() |
|
| 719 |
+ return fn(tx) |
|
| 720 |
+} |
|
| 721 |
+ |
|
| 592 | 722 |
// Sync executes fdatasync() against the database file handle. |
| 593 | 723 |
// |
| 594 | 724 |
// This is not necessary under normal operation, however, if you use NoSync |
| ... | ... |
@@ -655,6 +811,38 @@ func (db *DB) allocate(count int) (*page, error) {
|
| 655 | 655 |
return p, nil |
| 656 | 656 |
} |
| 657 | 657 |
|
| 658 |
+// grow grows the size of the database to the given sz. |
|
| 659 |
+func (db *DB) grow(sz int) error {
|
|
| 660 |
+ // Ignore if the new size is less than available file size. |
|
| 661 |
+ if sz <= db.filesz {
|
|
| 662 |
+ return nil |
|
| 663 |
+ } |
|
| 664 |
+ |
|
| 665 |
+ // If the data is smaller than the alloc size then only allocate what's needed. |
|
| 666 |
+ // Once it goes over the allocation size then allocate in chunks. |
|
| 667 |
+ if db.datasz < db.AllocSize {
|
|
| 668 |
+ sz = db.datasz |
|
| 669 |
+ } else {
|
|
| 670 |
+ sz += db.AllocSize |
|
| 671 |
+ } |
|
| 672 |
+ |
|
| 673 |
+ // Truncate and fsync to ensure file size metadata is flushed. |
|
| 674 |
+ // https://github.com/boltdb/bolt/issues/284 |
|
| 675 |
+ if !db.NoGrowSync && !db.readOnly {
|
|
| 676 |
+ if runtime.GOOS != "windows" {
|
|
| 677 |
+ if err := db.file.Truncate(int64(sz)); err != nil {
|
|
| 678 |
+ return fmt.Errorf("file resize error: %s", err)
|
|
| 679 |
+ } |
|
| 680 |
+ } |
|
| 681 |
+ if err := db.file.Sync(); err != nil {
|
|
| 682 |
+ return fmt.Errorf("file sync error: %s", err)
|
|
| 683 |
+ } |
|
| 684 |
+ } |
|
| 685 |
+ |
|
| 686 |
+ db.filesz = sz |
|
| 687 |
+ return nil |
|
| 688 |
+} |
|
| 689 |
+ |
|
| 658 | 690 |
func (db *DB) IsReadOnly() bool {
|
| 659 | 691 |
return db.readOnly |
| 660 | 692 |
} |
| ... | ... |
@@ -672,6 +860,19 @@ type Options struct {
|
| 672 | 672 |
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to |
| 673 | 673 |
// grab a shared lock (UNIX). |
| 674 | 674 |
ReadOnly bool |
| 675 |
+ |
|
| 676 |
+ // Sets the DB.MmapFlags flag before memory mapping the file. |
|
| 677 |
+ MmapFlags int |
|
| 678 |
+ |
|
| 679 |
+ // InitialMmapSize is the initial mmap size of the database |
|
| 680 |
+ // in bytes. Read transactions won't block write transaction |
|
| 681 |
+ // if the InitialMmapSize is large enough to hold database mmap |
|
| 682 |
+ // size. (See DB.Begin for more information) |
|
| 683 |
+ // |
|
| 684 |
+ // If <=0, the initial map size is 0. |
|
| 685 |
+ // If initialMmapSize is smaller than the previous database size, |
|
| 686 |
+ // it takes no effect. |
|
| 687 |
+ InitialMmapSize int |
|
| 675 | 688 |
} |
| 676 | 689 |
|
| 677 | 690 |
// DefaultOptions represent the options used if nil options are passed into Open(). |
| ... | ... |
@@ -463,43 +463,6 @@ func (n *node) rebalance() {
|
| 463 | 463 |
target = n.prevSibling() |
| 464 | 464 |
} |
| 465 | 465 |
|
| 466 |
- // If target node has extra nodes then just move one over. |
|
| 467 |
- if target.numChildren() > target.minKeys() {
|
|
| 468 |
- if useNextSibling {
|
|
| 469 |
- // Reparent and move node. |
|
| 470 |
- if child, ok := n.bucket.nodes[target.inodes[0].pgid]; ok {
|
|
| 471 |
- child.parent.removeChild(child) |
|
| 472 |
- child.parent = n |
|
| 473 |
- child.parent.children = append(child.parent.children, child) |
|
| 474 |
- } |
|
| 475 |
- n.inodes = append(n.inodes, target.inodes[0]) |
|
| 476 |
- target.inodes = target.inodes[1:] |
|
| 477 |
- |
|
| 478 |
- // Update target key on parent. |
|
| 479 |
- target.parent.put(target.key, target.inodes[0].key, nil, target.pgid, 0) |
|
| 480 |
- target.key = target.inodes[0].key |
|
| 481 |
- _assert(len(target.key) > 0, "rebalance(1): zero-length node key") |
|
| 482 |
- } else {
|
|
| 483 |
- // Reparent and move node. |
|
| 484 |
- if child, ok := n.bucket.nodes[target.inodes[len(target.inodes)-1].pgid]; ok {
|
|
| 485 |
- child.parent.removeChild(child) |
|
| 486 |
- child.parent = n |
|
| 487 |
- child.parent.children = append(child.parent.children, child) |
|
| 488 |
- } |
|
| 489 |
- n.inodes = append(n.inodes, inode{})
|
|
| 490 |
- copy(n.inodes[1:], n.inodes) |
|
| 491 |
- n.inodes[0] = target.inodes[len(target.inodes)-1] |
|
| 492 |
- target.inodes = target.inodes[:len(target.inodes)-1] |
|
| 493 |
- } |
|
| 494 |
- |
|
| 495 |
- // Update parent key for node. |
|
| 496 |
- n.parent.put(n.key, n.inodes[0].key, nil, n.pgid, 0) |
|
| 497 |
- n.key = n.inodes[0].key |
|
| 498 |
- _assert(len(n.key) > 0, "rebalance(2): zero-length node key") |
|
| 499 |
- |
|
| 500 |
- return |
|
| 501 |
- } |
|
| 502 |
- |
|
| 503 | 466 |
// If both this node and the target node are too small then merge them. |
| 504 | 467 |
if useNextSibling {
|
| 505 | 468 |
// Reparent all child nodes being moved. |
| ... | ... |
@@ -5,6 +5,7 @@ import ( |
| 5 | 5 |
"io" |
| 6 | 6 |
"os" |
| 7 | 7 |
"sort" |
| 8 |
+ "strings" |
|
| 8 | 9 |
"time" |
| 9 | 10 |
"unsafe" |
| 10 | 11 |
) |
| ... | ... |
@@ -29,6 +30,14 @@ type Tx struct {
|
| 29 | 29 |
pages map[pgid]*page |
| 30 | 30 |
stats TxStats |
| 31 | 31 |
commitHandlers []func() |
| 32 |
+ |
|
| 33 |
+ // WriteFlag specifies the flag for write-related methods like WriteTo(). |
|
| 34 |
+ // Tx opens the database file with the specified flag to copy the data. |
|
| 35 |
+ // |
|
| 36 |
+ // By default, the flag is unset, which works well for mostly in-memory |
|
| 37 |
+ // workloads. For databases that are much larger than available RAM, |
|
| 38 |
+ // set the flag to syscall.O_DIRECT to avoid trashing the page cache. |
|
| 39 |
+ WriteFlag int |
|
| 32 | 40 |
} |
| 33 | 41 |
|
| 34 | 42 |
// init initializes the transaction. |
| ... | ... |
@@ -160,6 +169,8 @@ func (tx *Tx) Commit() error {
|
| 160 | 160 |
// Free the old root bucket. |
| 161 | 161 |
tx.meta.root.root = tx.root.root |
| 162 | 162 |
|
| 163 |
+ opgid := tx.meta.pgid |
|
| 164 |
+ |
|
| 163 | 165 |
// Free the freelist and allocate new pages for it. This will overestimate |
| 164 | 166 |
// the size of the freelist but not underestimate the size (which would be bad). |
| 165 | 167 |
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist)) |
| ... | ... |
@@ -174,6 +185,14 @@ func (tx *Tx) Commit() error {
|
| 174 | 174 |
} |
| 175 | 175 |
tx.meta.freelist = p.id |
| 176 | 176 |
|
| 177 |
+ // If the high water mark has moved up then attempt to grow the database. |
|
| 178 |
+ if tx.meta.pgid > opgid {
|
|
| 179 |
+ if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
|
|
| 180 |
+ tx.rollback() |
|
| 181 |
+ return err |
|
| 182 |
+ } |
|
| 183 |
+ } |
|
| 184 |
+ |
|
| 177 | 185 |
// Write dirty pages to disk. |
| 178 | 186 |
startTime = time.Now() |
| 179 | 187 |
if err := tx.write(); err != nil {
|
| ... | ... |
@@ -184,8 +203,17 @@ func (tx *Tx) Commit() error {
|
| 184 | 184 |
// If strict mode is enabled then perform a consistency check. |
| 185 | 185 |
// Only the first consistency error is reported in the panic. |
| 186 | 186 |
if tx.db.StrictMode {
|
| 187 |
- if err, ok := <-tx.Check(); ok {
|
|
| 188 |
- panic("check fail: " + err.Error())
|
|
| 187 |
+ ch := tx.Check() |
|
| 188 |
+ var errs []string |
|
| 189 |
+ for {
|
|
| 190 |
+ err, ok := <-ch |
|
| 191 |
+ if !ok {
|
|
| 192 |
+ break |
|
| 193 |
+ } |
|
| 194 |
+ errs = append(errs, err.Error()) |
|
| 195 |
+ } |
|
| 196 |
+ if len(errs) > 0 {
|
|
| 197 |
+ panic("check fail: " + strings.Join(errs, "\n"))
|
|
| 189 | 198 |
} |
| 190 | 199 |
} |
| 191 | 200 |
|
| ... | ... |
@@ -263,7 +291,7 @@ func (tx *Tx) close() {
|
| 263 | 263 |
} |
| 264 | 264 |
|
| 265 | 265 |
// Copy writes the entire database to a writer. |
| 266 |
-// This function exists for backwards compatibility. Use WriteTo() in |
|
| 266 |
+// This function exists for backwards compatibility. Use WriteTo() instead. |
|
| 267 | 267 |
func (tx *Tx) Copy(w io.Writer) error {
|
| 268 | 268 |
_, err := tx.WriteTo(w) |
| 269 | 269 |
return err |
| ... | ... |
@@ -272,29 +300,47 @@ func (tx *Tx) Copy(w io.Writer) error {
|
| 272 | 272 |
// WriteTo writes the entire database to a writer. |
| 273 | 273 |
// If err == nil then exactly tx.Size() bytes will be written into the writer. |
| 274 | 274 |
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
|
| 275 |
- // Attempt to open reader directly. |
|
| 276 |
- var f *os.File |
|
| 277 |
- if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil {
|
|
| 278 |
- // Fallback to a regular open if that doesn't work. |
|
| 279 |
- if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil {
|
|
| 280 |
- return 0, err |
|
| 281 |
- } |
|
| 275 |
+ // Attempt to open reader with WriteFlag |
|
| 276 |
+ f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0) |
|
| 277 |
+ if err != nil {
|
|
| 278 |
+ return 0, err |
|
| 282 | 279 |
} |
| 280 |
+ defer func() { _ = f.Close() }()
|
|
| 283 | 281 |
|
| 284 |
- // Copy the meta pages. |
|
| 285 |
- tx.db.metalock.Lock() |
|
| 286 |
- n, err = io.CopyN(w, f, int64(tx.db.pageSize*2)) |
|
| 287 |
- tx.db.metalock.Unlock() |
|
| 282 |
+ // Generate a meta page. We use the same page data for both meta pages. |
|
| 283 |
+ buf := make([]byte, tx.db.pageSize) |
|
| 284 |
+ page := (*page)(unsafe.Pointer(&buf[0])) |
|
| 285 |
+ page.flags = metaPageFlag |
|
| 286 |
+ *page.meta() = *tx.meta |
|
| 287 |
+ |
|
| 288 |
+ // Write meta 0. |
|
| 289 |
+ page.id = 0 |
|
| 290 |
+ page.meta().checksum = page.meta().sum64() |
|
| 291 |
+ nn, err := w.Write(buf) |
|
| 292 |
+ n += int64(nn) |
|
| 288 | 293 |
if err != nil {
|
| 289 |
- _ = f.Close() |
|
| 290 |
- return n, fmt.Errorf("meta copy: %s", err)
|
|
| 294 |
+ return n, fmt.Errorf("meta 0 copy: %s", err)
|
|
| 295 |
+ } |
|
| 296 |
+ |
|
| 297 |
+ // Write meta 1 with a lower transaction id. |
|
| 298 |
+ page.id = 1 |
|
| 299 |
+ page.meta().txid -= 1 |
|
| 300 |
+ page.meta().checksum = page.meta().sum64() |
|
| 301 |
+ nn, err = w.Write(buf) |
|
| 302 |
+ n += int64(nn) |
|
| 303 |
+ if err != nil {
|
|
| 304 |
+ return n, fmt.Errorf("meta 1 copy: %s", err)
|
|
| 305 |
+ } |
|
| 306 |
+ |
|
| 307 |
+ // Move past the meta pages in the file. |
|
| 308 |
+ if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
|
|
| 309 |
+ return n, fmt.Errorf("seek: %s", err)
|
|
| 291 | 310 |
} |
| 292 | 311 |
|
| 293 | 312 |
// Copy data pages. |
| 294 | 313 |
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2)) |
| 295 | 314 |
n += wn |
| 296 | 315 |
if err != nil {
|
| 297 |
- _ = f.Close() |
|
| 298 | 316 |
return n, err |
| 299 | 317 |
} |
| 300 | 318 |
|
| ... | ... |
@@ -501,7 +547,7 @@ func (tx *Tx) writeMeta() error {
|
| 501 | 501 |
} |
| 502 | 502 |
|
| 503 | 503 |
// page returns a reference to the page with a given id. |
| 504 |
-// If page has been written to then a temporary bufferred page is returned. |
|
| 504 |
+// If page has been written to then a temporary buffered page is returned. |
|
| 505 | 505 |
func (tx *Tx) page(id pgid) *page {
|
| 506 | 506 |
// Check the dirty pages first. |
| 507 | 507 |
if tx.pages != nil {
|