Browse code

Vendoring boltb/bolt v1.2.0

Signed-off-by: Alessandro Boch <aboch@docker.com>

Alessandro Boch authored on 2016/03/29 07:49:41
Showing 18 changed files
... ...
@@ -44,7 +44,7 @@ clone git github.com/coreos/etcd v2.2.0
44 44
 fix_rewritten_imports github.com/coreos/etcd
45 45
 clone git github.com/ugorji/go 5abd4e96a45c386928ed2ca2a7ef63e2533e18ec
46 46
 clone git github.com/hashicorp/consul v0.5.2
47
-clone git github.com/boltdb/bolt v1.1.0
47
+clone git github.com/boltdb/bolt v1.2.0
48 48
 clone git github.com/miekg/dns 75e6e86cc601825c5dbcd4e0c209eab180997cd7
49 49
 
50 50
 # get graph and distribution packages
... ...
@@ -1,54 +1,18 @@
1
-TEST=.
2
-BENCH=.
3
-COVERPROFILE=/tmp/c.out
4 1
 BRANCH=`git rev-parse --abbrev-ref HEAD`
5 2
 COMMIT=`git rev-parse --short HEAD`
6 3
 GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
7 4
 
8 5
 default: build
9 6
 
10
-bench:
11
-	go test -v -test.run=NOTHINCONTAINSTHIS -test.bench=$(BENCH)
12
-
13
-# http://cloc.sourceforge.net/
14
-cloc:
15
-	@cloc --not-match-f='Makefile|_test.go' .
16
-
17
-cover: fmt
18
-	go test -coverprofile=$(COVERPROFILE) -test.run=$(TEST) $(COVERFLAG) .
19
-	go tool cover -html=$(COVERPROFILE)
20
-	rm $(COVERPROFILE)
21
-
22
-cpuprofile: fmt
23
-	@go test -c
24
-	@./bolt.test -test.v -test.run=$(TEST) -test.cpuprofile cpu.prof
7
+race:
8
+	@go test -v -race -test.run="TestSimulate_(100op|1000op)"
25 9
 
26 10
 # go get github.com/kisielk/errcheck
27 11
 errcheck:
28
-	@echo "=== errcheck ==="
29
-	@errcheck github.com/boltdb/bolt
12
+	@errcheck -ignorepkg=bytes -ignore=os:Remove github.com/boltdb/bolt
30 13
 
31
-fmt:
32
-	@go fmt ./...
33
-
34
-get:
35
-	@go get -d ./...
36
-
37
-build: get
38
-	@mkdir -p bin
39
-	@go build -ldflags=$(GOLDFLAGS) -a -o bin/bolt ./cmd/bolt
40
-
41
-test: fmt
42
-	@go get github.com/stretchr/testify/assert
43
-	@echo "=== TESTS ==="
44
-	@go test -v -cover -test.run=$(TEST)
45
-	@echo ""
46
-	@echo ""
47
-	@echo "=== CLI ==="
48
-	@go test -v -test.run=$(TEST) ./cmd/bolt
49
-	@echo ""
50
-	@echo ""
51
-	@echo "=== RACE DETECTOR ==="
52
-	@go test -v -race -test.run="TestSimulate_(100op|1000op)"
14
+test: 
15
+	@go test -v -cover .
16
+	@go test -v ./cmd/bolt
53 17
 
54
-.PHONY: bench cloc cover cpuprofile fmt memprofile test
18
+.PHONY: fmt test
... ...
@@ -1,8 +1,8 @@
1
-Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.png?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.png)](https://godoc.org/github.com/boltdb/bolt) ![Version](http://img.shields.io/badge/version-1.0-green.png)
1
+Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.svg?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.svg)](https://godoc.org/github.com/boltdb/bolt) ![Version](https://img.shields.io/badge/version-1.0-green.svg)
2 2
 ====
3 3
 
4
-Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] and
5
-the [LMDB project][lmdb]. The goal of the project is to provide a simple,
4
+Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas]
5
+[LMDB project][lmdb]. The goal of the project is to provide a simple,
6 6
 fast, and reliable database for projects that don't require a full database
7 7
 server such as Postgres or MySQL.
8 8
 
... ...
@@ -13,7 +13,6 @@ and setting values. That's it.
13 13
 [hyc_symas]: https://twitter.com/hyc_symas
14 14
 [lmdb]: http://symas.com/mdb/
15 15
 
16
-
17 16
 ## Project Status
18 17
 
19 18
 Bolt is stable and the API is fixed. Full unit test coverage and randomized
... ...
@@ -22,6 +21,36 @@ Bolt is currently in high-load production environments serving databases as
22 22
 large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed
23 23
 services every day.
24 24
 
25
+## Table of Contents
26
+
27
+- [Getting Started](#getting-started)
28
+  - [Installing](#installing)
29
+  - [Opening a database](#opening-a-database)
30
+  - [Transactions](#transactions)
31
+    - [Read-write transactions](#read-write-transactions)
32
+    - [Read-only transactions](#read-only-transactions)
33
+    - [Batch read-write transactions](#batch-read-write-transactions)
34
+    - [Managing transactions manually](#managing-transactions-manually)
35
+  - [Using buckets](#using-buckets)
36
+  - [Using key/value pairs](#using-keyvalue-pairs)
37
+  - [Autoincrementing integer for the bucket](#autoincrementing-integer-for-the-bucket)
38
+  - [Iterating over keys](#iterating-over-keys)
39
+    - [Prefix scans](#prefix-scans)
40
+    - [Range scans](#range-scans)
41
+    - [ForEach()](#foreach)
42
+  - [Nested buckets](#nested-buckets)
43
+  - [Database backups](#database-backups)
44
+  - [Statistics](#statistics)
45
+  - [Read-Only Mode](#read-only-mode)
46
+  - [Mobile Use (iOS/Android)](#mobile-use-iosandroid)
47
+- [Resources](#resources)
48
+- [Comparison with other databases](#comparison-with-other-databases)
49
+  - [Postgres, MySQL, & other relational databases](#postgres-mysql--other-relational-databases)
50
+  - [LevelDB, RocksDB](#leveldb-rocksdb)
51
+  - [LMDB](#lmdb)
52
+- [Caveats & Limitations](#caveats--limitations)
53
+- [Reading the Source](#reading-the-source)
54
+- [Other Projects Using Bolt](#other-projects-using-bolt)
25 55
 
26 56
 ## Getting Started
27 57
 
... ...
@@ -180,8 +209,8 @@ and then safely close your transaction if an error is returned. This is the
180 180
 recommended way to use Bolt transactions.
181 181
 
182 182
 However, sometimes you may want to manually start and end your transactions.
183
-You can use the `Tx.Begin()` function directly but _please_ be sure to close the
184
-transaction.
183
+You can use the `Tx.Begin()` function directly but **please** be sure to close
184
+the transaction.
185 185
 
186 186
 ```go
187 187
 // Start a writable transaction.
... ...
@@ -269,7 +298,7 @@ then you must use `copy()` to copy it to another byte slice.
269 269
 
270 270
 
271 271
 ### Autoincrementing integer for the bucket
272
-By using the NextSequence() function, you can let Bolt determine a sequence
272
+By using the `NextSequence()` function, you can let Bolt determine a sequence
273 273
 which can be used as the unique identifier for your key/value pairs. See the
274 274
 example below.
275 275
 
... ...
@@ -309,7 +338,6 @@ type User struct {
309 309
     ID int
310 310
     ...
311 311
 }
312
-
313 312
 ```
314 313
 
315 314
 ### Iterating over keys
... ...
@@ -320,7 +348,9 @@ iteration over these keys extremely fast. To iterate over keys we'll use a
320 320
 
321 321
 ```go
322 322
 db.View(func(tx *bolt.Tx) error {
323
+	// Assume bucket exists and has keys
323 324
 	b := tx.Bucket([]byte("MyBucket"))
325
+
324 326
 	c := b.Cursor()
325 327
 
326 328
 	for k, v := c.First(); k != nil; k, v = c.Next() {
... ...
@@ -344,10 +374,15 @@ Next()   Move to the next key.
344 344
 Prev()   Move to the previous key.
345 345
 ```
346 346
 
347
-When you have iterated to the end of the cursor then `Next()` will return `nil`.
348
-You must seek to a position using `First()`, `Last()`, or `Seek()` before
349
-calling `Next()` or `Prev()`. If you do not seek to a position then these
350
-functions will return `nil`.
347
+Each of those functions has a return signature of `(key []byte, value []byte)`.
348
+When you have iterated to the end of the cursor then `Next()` will return a
349
+`nil` key.  You must seek to a position using `First()`, `Last()`, or `Seek()`
350
+before calling `Next()` or `Prev()`. If you do not seek to a position then
351
+these functions will return a `nil` key.
352
+
353
+During iteration, if the key is non-`nil` but the value is `nil`, that means
354
+the key refers to a bucket rather than a value.  Use `Bucket.Bucket()` to
355
+access the sub-bucket.
351 356
 
352 357
 
353 358
 #### Prefix scans
... ...
@@ -356,6 +391,7 @@ To iterate over a key prefix, you can combine `Seek()` and `bytes.HasPrefix()`:
356 356
 
357 357
 ```go
358 358
 db.View(func(tx *bolt.Tx) error {
359
+	// Assume bucket exists and has keys
359 360
 	c := tx.Bucket([]byte("MyBucket")).Cursor()
360 361
 
361 362
 	prefix := []byte("1234")
... ...
@@ -375,7 +411,7 @@ date range like this:
375 375
 
376 376
 ```go
377 377
 db.View(func(tx *bolt.Tx) error {
378
-	// Assume our events bucket has RFC3339 encoded time keys.
378
+	// Assume our events bucket exists and has RFC3339 encoded time keys.
379 379
 	c := tx.Bucket([]byte("Events")).Cursor()
380 380
 
381 381
 	// Our time range spans the 90's decade.
... ...
@@ -399,7 +435,9 @@ all the keys in a bucket:
399 399
 
400 400
 ```go
401 401
 db.View(func(tx *bolt.Tx) error {
402
+	// Assume bucket exists and has keys
402 403
 	b := tx.Bucket([]byte("MyBucket"))
404
+	
403 405
 	b.ForEach(func(k, v []byte) error {
404 406
 		fmt.Printf("key=%s, value=%s\n", k, v)
405 407
 		return nil
... ...
@@ -426,8 +464,11 @@ func (*Bucket) DeleteBucket(key []byte) error
426 426
 Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()`
427 427
 function to write a consistent view of the database to a writer. If you call
428 428
 this from a read-only transaction, it will perform a hot backup and not block
429
-your other database reads and writes. It will also use `O_DIRECT` when available
430
-to prevent page cache trashing.
429
+your other database reads and writes.
430
+
431
+By default, it will use a regular file handle which will utilize the operating
432
+system's page cache. See the [`Tx`](https://godoc.org/github.com/boltdb/bolt#Tx)
433
+documentation for information about optimizing for larger-than-RAM datasets.
431 434
 
432 435
 One common use case is to backup over HTTP so you can use tools like `cURL` to
433 436
 do database backups:
... ...
@@ -509,6 +550,84 @@ if err != nil {
509 509
 }
510 510
 ```
511 511
 
512
+### Mobile Use (iOS/Android)
513
+
514
+Bolt is able to run on mobile devices by leveraging the binding feature of the
515
+[gomobile](https://github.com/golang/mobile) tool. Create a struct that will
516
+contain your database logic and a reference to a `*bolt.DB` with a initializing
517
+contstructor that takes in a filepath where the database file will be stored.
518
+Neither Android nor iOS require extra permissions or cleanup from using this method.
519
+
520
+```go
521
+func NewBoltDB(filepath string) *BoltDB {
522
+	db, err := bolt.Open(filepath+"/demo.db", 0600, nil)
523
+	if err != nil {
524
+		log.Fatal(err)
525
+	}
526
+
527
+	return &BoltDB{db}
528
+}
529
+
530
+type BoltDB struct {
531
+	db *bolt.DB
532
+	...
533
+}
534
+
535
+func (b *BoltDB) Path() string {
536
+	return b.db.Path()
537
+}
538
+
539
+func (b *BoltDB) Close() {
540
+	b.db.Close()
541
+}
542
+```
543
+
544
+Database logic should be defined as methods on this wrapper struct.
545
+
546
+To initialize this struct from the native language (both platforms now sync
547
+their local storage to the cloud. These snippets disable that functionality for the
548
+database file):
549
+
550
+#### Android
551
+
552
+```java
553
+String path;
554
+if (android.os.Build.VERSION.SDK_INT >=android.os.Build.VERSION_CODES.LOLLIPOP){
555
+    path = getNoBackupFilesDir().getAbsolutePath();
556
+} else{
557
+    path = getFilesDir().getAbsolutePath();
558
+}
559
+Boltmobiledemo.BoltDB boltDB = Boltmobiledemo.NewBoltDB(path)
560
+```
561
+
562
+#### iOS
563
+
564
+```objc
565
+- (void)demo {
566
+    NSString* path = [NSSearchPathForDirectoriesInDomains(NSLibraryDirectory,
567
+                                                          NSUserDomainMask,
568
+                                                          YES) objectAtIndex:0];
569
+	GoBoltmobiledemoBoltDB * demo = GoBoltmobiledemoNewBoltDB(path);
570
+	[self addSkipBackupAttributeToItemAtPath:demo.path];
571
+	//Some DB Logic would go here
572
+	[demo close];
573
+}
574
+
575
+- (BOOL)addSkipBackupAttributeToItemAtPath:(NSString *) filePathString
576
+{
577
+    NSURL* URL= [NSURL fileURLWithPath: filePathString];
578
+    assert([[NSFileManager defaultManager] fileExistsAtPath: [URL path]]);
579
+    
580
+    NSError *error = nil;
581
+    BOOL success = [URL setResourceValue: [NSNumber numberWithBool: YES]
582
+                                  forKey: NSURLIsExcludedFromBackupKey error: &error];
583
+    if(!success){
584
+        NSLog(@"Error excluding %@ from backup %@", [URL lastPathComponent], error);
585
+    }
586
+    return success;
587
+}
588
+
589
+```
512 590
 
513 591
 ## Resources
514 592
 
... ...
@@ -544,7 +663,7 @@ they are libraries bundled into the application, however, their underlying
544 544
 structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes
545 545
 random writes by using a write ahead log and multi-tiered, sorted files called
546 546
 SSTables. Bolt uses a B+tree internally and only a single file. Both approaches
547
-have trade offs.
547
+have trade-offs.
548 548
 
549 549
 If you require a high random write throughput (>10,000 w/sec) or you need to use
550 550
 spinning disks then LevelDB could be a good choice. If your application is
... ...
@@ -580,9 +699,8 @@ It's important to pick the right tool for the job and Bolt is no exception.
580 580
 Here are a few things to note when evaluating and using Bolt:
581 581
 
582 582
 * Bolt is good for read intensive workloads. Sequential write performance is
583
-  also fast but random writes can be slow. You can add a write-ahead log or
584
-  [transaction coalescer](https://github.com/boltdb/coalescer) in front of Bolt
585
-  to mitigate this issue.
583
+  also fast but random writes can be slow. You can use `DB.Batch()` or add a
584
+  write-ahead log to help mitigate this issue.
586 585
 
587 586
 * Bolt uses a B+tree internally so there can be a lot of random page access.
588 587
   SSDs provide a significant performance boost over spinning disks.
... ...
@@ -618,7 +736,7 @@ Here are a few things to note when evaluating and using Bolt:
618 618
 
619 619
 * The data structures in the Bolt database are memory mapped so the data file
620 620
   will be endian specific. This means that you cannot copy a Bolt file from a
621
-  little endian machine to a big endian machine and have it work. For most 
621
+  little endian machine to a big endian machine and have it work. For most
622 622
   users this is not a concern since most modern CPUs are little endian.
623 623
 
624 624
 * Because of the way pages are laid out on disk, Bolt cannot truncate data files
... ...
@@ -633,6 +751,56 @@ Here are a few things to note when evaluating and using Bolt:
633 633
 [page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638
634 634
 
635 635
 
636
+## Reading the Source
637
+
638
+Bolt is a relatively small code base (<3KLOC) for an embedded, serializable,
639
+transactional key/value database so it can be a good starting point for people
640
+interested in how databases work.
641
+
642
+The best places to start are the main entry points into Bolt:
643
+
644
+- `Open()` - Initializes the reference to the database. It's responsible for
645
+  creating the database if it doesn't exist, obtaining an exclusive lock on the
646
+  file, reading the meta pages, & memory-mapping the file.
647
+
648
+- `DB.Begin()` - Starts a read-only or read-write transaction depending on the
649
+  value of the `writable` argument. This requires briefly obtaining the "meta"
650
+  lock to keep track of open transactions. Only one read-write transaction can
651
+  exist at a time so the "rwlock" is acquired during the life of a read-write
652
+  transaction.
653
+
654
+- `Bucket.Put()` - Writes a key/value pair into a bucket. After validating the
655
+  arguments, a cursor is used to traverse the B+tree to the page and position
656
+  where they key & value will be written. Once the position is found, the bucket
657
+  materializes the underlying page and the page's parent pages into memory as
658
+  "nodes". These nodes are where mutations occur during read-write transactions.
659
+  These changes get flushed to disk during commit.
660
+
661
+- `Bucket.Get()` - Retrieves a key/value pair from a bucket. This uses a cursor
662
+  to move to the page & position of a key/value pair. During a read-only
663
+  transaction, the key and value data is returned as a direct reference to the
664
+  underlying mmap file so there's no allocation overhead. For read-write
665
+  transactions, this data may reference the mmap file or one of the in-memory
666
+  node values.
667
+
668
+- `Cursor` - This object is simply for traversing the B+tree of on-disk pages
669
+  or in-memory nodes. It can seek to a specific key, move to the first or last
670
+  value, or it can move forward or backward. The cursor handles the movement up
671
+  and down the B+tree transparently to the end user.
672
+
673
+- `Tx.Commit()` - Converts the in-memory dirty nodes and the list of free pages
674
+  into pages to be written to disk. Writing to disk then occurs in two phases.
675
+  First, the dirty pages are written to disk and an `fsync()` occurs. Second, a
676
+  new meta page with an incremented transaction ID is written and another
677
+  `fsync()` occurs. This two phase write ensures that partially written data
678
+  pages are ignored in the event of a crash since the meta page pointing to them
679
+  is never written. Partially written meta pages are invalidated because they
680
+  are written with a checksum.
681
+
682
+If you have additional notes that could be helpful for others, please submit
683
+them via pull request.
684
+
685
+
636 686
 ## Other Projects Using Bolt
637 687
 
638 688
 Below is a list of public, open source projects that use Bolt:
... ...
@@ -643,21 +811,21 @@ Below is a list of public, open source projects that use Bolt:
643 643
 * [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
644 644
 * [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
645 645
 * [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday.
646
-* [ChainStore](https://github.com/nulayer/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations.
646
+* [ChainStore](https://github.com/pressly/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations.
647 647
 * [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite.
648 648
 * [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin".
649 649
 * [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka.
650 650
 * [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed.
651 651
 * [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt.
652
-* [photosite/session](http://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site.
652
+* [photosite/session](https://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site.
653 653
 * [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage.
654 654
 * [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters.
655 655
 * [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend.
656 656
 * [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend.
657 657
 * [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
658 658
 * [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
659
-* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
660
-* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
659
+* [Seaweed File System](https://github.com/chrislusf/seaweedfs) - Highly scalable distributed key~file system with O(1) disk read.
660
+* [InfluxDB](https://influxdata.com) - Scalable datastore for metrics, events, and real-time analytics.
661 661
 * [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
662 662
 * [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
663 663
 * [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
... ...
@@ -667,5 +835,10 @@ Below is a list of public, open source projects that use Bolt:
667 667
   backed by boltdb.
668 668
 * [buckets](https://github.com/joyrexus/buckets) - a bolt wrapper streamlining
669 669
   simple tx and key scans.
670
+* [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets.
671
+* [Request Baskets](https://github.com/darklynx/request-baskets) - A web service to collect arbitrary HTTP requests and inspect them via REST API or simple web UI, similar to [RequestBin](http://requestb.in/) service
672
+* [Go Report Card](https://goreportcard.com/) - Go code quality report cards as a (free and open source) service.
673
+* [Boltdb Boilerplate](https://github.com/bobintornado/boltdb-boilerplate) - Boilerplate wrapper around bolt aiming to make simple calls one-liners.
674
+* [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores.
670 675
 
671 676
 If you are using Bolt in a project please send a pull request to add it to the list.
672 677
new file mode 100644
... ...
@@ -0,0 +1,18 @@
0
+version: "{build}"
1
+
2
+os: Windows Server 2012 R2
3
+
4
+clone_folder: c:\gopath\src\github.com\boltdb\bolt
5
+
6
+environment:
7
+  GOPATH: c:\gopath
8
+
9
+install:
10
+  - echo %PATH%
11
+  - echo %GOPATH%
12
+  - go version
13
+  - go env
14
+  - go get -v -t ./...
15
+
16
+build_script:
17
+  - go test -v ./...
0 18
deleted file mode 100644
... ...
@@ -1,138 +0,0 @@
1
-package bolt
2
-
3
-import (
4
-	"errors"
5
-	"fmt"
6
-	"sync"
7
-	"time"
8
-)
9
-
10
-// Batch calls fn as part of a batch. It behaves similar to Update,
11
-// except:
12
-//
13
-// 1. concurrent Batch calls can be combined into a single Bolt
14
-// transaction.
15
-//
16
-// 2. the function passed to Batch may be called multiple times,
17
-// regardless of whether it returns error or not.
18
-//
19
-// This means that Batch function side effects must be idempotent and
20
-// take permanent effect only after a successful return is seen in
21
-// caller.
22
-//
23
-// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
24
-// and DB.MaxBatchDelay, respectively.
25
-//
26
-// Batch is only useful when there are multiple goroutines calling it.
27
-func (db *DB) Batch(fn func(*Tx) error) error {
28
-	errCh := make(chan error, 1)
29
-
30
-	db.batchMu.Lock()
31
-	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
32
-		// There is no existing batch, or the existing batch is full; start a new one.
33
-		db.batch = &batch{
34
-			db: db,
35
-		}
36
-		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
37
-	}
38
-	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
39
-	if len(db.batch.calls) >= db.MaxBatchSize {
40
-		// wake up batch, it's ready to run
41
-		go db.batch.trigger()
42
-	}
43
-	db.batchMu.Unlock()
44
-
45
-	err := <-errCh
46
-	if err == trySolo {
47
-		err = db.Update(fn)
48
-	}
49
-	return err
50
-}
51
-
52
-type call struct {
53
-	fn  func(*Tx) error
54
-	err chan<- error
55
-}
56
-
57
-type batch struct {
58
-	db    *DB
59
-	timer *time.Timer
60
-	start sync.Once
61
-	calls []call
62
-}
63
-
64
-// trigger runs the batch if it hasn't already been run.
65
-func (b *batch) trigger() {
66
-	b.start.Do(b.run)
67
-}
68
-
69
-// run performs the transactions in the batch and communicates results
70
-// back to DB.Batch.
71
-func (b *batch) run() {
72
-	b.db.batchMu.Lock()
73
-	b.timer.Stop()
74
-	// Make sure no new work is added to this batch, but don't break
75
-	// other batches.
76
-	if b.db.batch == b {
77
-		b.db.batch = nil
78
-	}
79
-	b.db.batchMu.Unlock()
80
-
81
-retry:
82
-	for len(b.calls) > 0 {
83
-		var failIdx = -1
84
-		err := b.db.Update(func(tx *Tx) error {
85
-			for i, c := range b.calls {
86
-				if err := safelyCall(c.fn, tx); err != nil {
87
-					failIdx = i
88
-					return err
89
-				}
90
-			}
91
-			return nil
92
-		})
93
-
94
-		if failIdx >= 0 {
95
-			// take the failing transaction out of the batch. it's
96
-			// safe to shorten b.calls here because db.batch no longer
97
-			// points to us, and we hold the mutex anyway.
98
-			c := b.calls[failIdx]
99
-			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
100
-			// tell the submitter re-run it solo, continue with the rest of the batch
101
-			c.err <- trySolo
102
-			continue retry
103
-		}
104
-
105
-		// pass success, or bolt internal errors, to all callers
106
-		for _, c := range b.calls {
107
-			if c.err != nil {
108
-				c.err <- err
109
-			}
110
-		}
111
-		break retry
112
-	}
113
-}
114
-
115
-// trySolo is a special sentinel error value used for signaling that a
116
-// transaction function should be re-run. It should never be seen by
117
-// callers.
118
-var trySolo = errors.New("batch function returned an error and should be re-run solo")
119
-
120
-type panicked struct {
121
-	reason interface{}
122
-}
123
-
124
-func (p panicked) Error() string {
125
-	if err, ok := p.reason.(error); ok {
126
-		return err.Error()
127
-	}
128
-	return fmt.Sprintf("panic: %v", p.reason)
129
-}
130
-
131
-func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
132
-	defer func() {
133
-		if p := recover(); p != nil {
134
-			err = panicked{p}
135
-		}
136
-	}()
137
-	return fn(tx)
138
-}
... ...
@@ -4,8 +4,6 @@ import (
4 4
 	"syscall"
5 5
 )
6 6
 
7
-var odirect = syscall.O_DIRECT
8
-
9 7
 // fdatasync flushes written data to a file descriptor.
10 8
 func fdatasync(db *DB) error {
11 9
 	return syscall.Fdatasync(int(db.file.Fd()))
... ...
@@ -11,8 +11,6 @@ const (
11 11
 	msInvalidate             // invalidate cached data
12 12
 )
13 13
 
14
-var odirect int
15
-
16 14
 func msync(db *DB) error {
17 15
 	_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
18 16
 	if errno != 0 {
19 17
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+// +build ppc
1
+
2
+package bolt
3
+
4
+// maxMapSize represents the largest mmap size supported by Bolt.
5
+const maxMapSize = 0x7FFFFFFF // 2GB
6
+
7
+// maxAllocSize is the size used when creating array pointers.
8
+const maxAllocSize = 0xFFFFFFF
0 9
new file mode 100644
... ...
@@ -0,0 +1,9 @@
0
+// +build ppc64
1
+
2
+package bolt
3
+
4
+// maxMapSize represents the largest mmap size supported by Bolt.
5
+const maxMapSize = 0xFFFFFFFFFFFF // 256TB
6
+
7
+// maxAllocSize is the size used when creating array pointers.
8
+const maxAllocSize = 0x7FFFFFFF
... ...
@@ -11,7 +11,7 @@ import (
11 11
 )
12 12
 
13 13
 // flock acquires an advisory lock on a file descriptor.
14
-func flock(f *os.File, exclusive bool, timeout time.Duration) error {
14
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
15 15
 	var t time.Time
16 16
 	for {
17 17
 		// If we're beyond our timeout then return an error.
... ...
@@ -27,7 +27,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
27 27
 		}
28 28
 
29 29
 		// Otherwise attempt to obtain an exclusive lock.
30
-		err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
30
+		err := syscall.Flock(int(db.file.Fd()), flag|syscall.LOCK_NB)
31 31
 		if err == nil {
32 32
 			return nil
33 33
 		} else if err != syscall.EWOULDBLOCK {
... ...
@@ -40,25 +40,14 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
40 40
 }
41 41
 
42 42
 // funlock releases an advisory lock on a file descriptor.
43
-func funlock(f *os.File) error {
44
-	return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
43
+func funlock(db *DB) error {
44
+	return syscall.Flock(int(db.file.Fd()), syscall.LOCK_UN)
45 45
 }
46 46
 
47 47
 // mmap memory maps a DB's data file.
48 48
 func mmap(db *DB, sz int) error {
49
-	// Truncate and fsync to ensure file size metadata is flushed.
50
-	// https://github.com/boltdb/bolt/issues/284
51
-	if !db.NoGrowSync && !db.readOnly {
52
-		if err := db.file.Truncate(int64(sz)); err != nil {
53
-			return fmt.Errorf("file resize error: %s", err)
54
-		}
55
-		if err := db.file.Sync(); err != nil {
56
-			return fmt.Errorf("file sync error: %s", err)
57
-		}
58
-	}
59
-
60 49
 	// Map the data file to memory.
61
-	b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
50
+	b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
62 51
 	if err != nil {
63 52
 		return err
64 53
 	}
... ...
@@ -1,4 +1,3 @@
1
-
2 1
 package bolt
3 2
 
4 3
 import (
... ...
@@ -7,11 +6,12 @@ import (
7 7
 	"syscall"
8 8
 	"time"
9 9
 	"unsafe"
10
+
10 11
 	"golang.org/x/sys/unix"
11 12
 )
12 13
 
13 14
 // flock acquires an advisory lock on a file descriptor.
14
-func flock(f *os.File, exclusive bool, timeout time.Duration) error {
15
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
15 16
 	var t time.Time
16 17
 	for {
17 18
 		// If we're beyond our timeout then return an error.
... ...
@@ -32,7 +32,7 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
32 32
 		} else {
33 33
 			lock.Type = syscall.F_RDLCK
34 34
 		}
35
-		err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock)
35
+		err := syscall.FcntlFlock(db.file.Fd(), syscall.F_SETLK, &lock)
36 36
 		if err == nil {
37 37
 			return nil
38 38
 		} else if err != syscall.EAGAIN {
... ...
@@ -45,30 +45,19 @@ func flock(f *os.File, exclusive bool, timeout time.Duration) error {
45 45
 }
46 46
 
47 47
 // funlock releases an advisory lock on a file descriptor.
48
-func funlock(f *os.File) error {
48
+func funlock(db *DB) error {
49 49
 	var lock syscall.Flock_t
50 50
 	lock.Start = 0
51 51
 	lock.Len = 0
52 52
 	lock.Type = syscall.F_UNLCK
53 53
 	lock.Whence = 0
54
-	return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock)
54
+	return syscall.FcntlFlock(uintptr(db.file.Fd()), syscall.F_SETLK, &lock)
55 55
 }
56 56
 
57 57
 // mmap memory maps a DB's data file.
58 58
 func mmap(db *DB, sz int) error {
59
-	// Truncate and fsync to ensure file size metadata is flushed.
60
-	// https://github.com/boltdb/bolt/issues/284
61
-	if !db.NoGrowSync && !db.readOnly {
62
-		if err := db.file.Truncate(int64(sz)); err != nil {
63
-			return fmt.Errorf("file resize error: %s", err)
64
-		}
65
-		if err := db.file.Sync(); err != nil {
66
-			return fmt.Errorf("file sync error: %s", err)
67
-		}
68
-	}
69
-
70 59
 	// Map the data file to memory.
71
-	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
60
+	b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED|db.MmapFlags)
72 61
 	if err != nil {
73 62
 		return err
74 63
 	}
... ...
@@ -8,7 +8,39 @@ import (
8 8
 	"unsafe"
9 9
 )
10 10
 
11
-var odirect int
11
+// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1
12
+var (
13
+	modkernel32      = syscall.NewLazyDLL("kernel32.dll")
14
+	procLockFileEx   = modkernel32.NewProc("LockFileEx")
15
+	procUnlockFileEx = modkernel32.NewProc("UnlockFileEx")
16
+)
17
+
18
+const (
19
+	lockExt = ".lock"
20
+
21
+	// see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx
22
+	flagLockExclusive       = 2
23
+	flagLockFailImmediately = 1
24
+
25
+	// see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx
26
+	errLockViolation syscall.Errno = 0x21
27
+)
28
+
29
+func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
30
+	r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)))
31
+	if r == 0 {
32
+		return err
33
+	}
34
+	return nil
35
+}
36
+
37
+func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) {
38
+	r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0)
39
+	if r == 0 {
40
+		return err
41
+	}
42
+	return nil
43
+}
12 44
 
13 45
 // fdatasync flushes written data to a file descriptor.
14 46
 func fdatasync(db *DB) error {
... ...
@@ -16,13 +48,49 @@ func fdatasync(db *DB) error {
16 16
 }
17 17
 
18 18
 // flock acquires an advisory lock on a file descriptor.
19
-func flock(f *os.File, _ bool, _ time.Duration) error {
20
-	return nil
19
+func flock(db *DB, mode os.FileMode, exclusive bool, timeout time.Duration) error {
20
+	// Create a separate lock file on windows because a process
21
+	// cannot share an exclusive lock on the same file. This is
22
+	// needed during Tx.WriteTo().
23
+	f, err := os.OpenFile(db.path+lockExt, os.O_CREATE, mode)
24
+	if err != nil {
25
+		return err
26
+	}
27
+	db.lockfile = f
28
+
29
+	var t time.Time
30
+	for {
31
+		// If we're beyond our timeout then return an error.
32
+		// This can only occur after we've attempted a flock once.
33
+		if t.IsZero() {
34
+			t = time.Now()
35
+		} else if timeout > 0 && time.Since(t) > timeout {
36
+			return ErrTimeout
37
+		}
38
+
39
+		var flag uint32 = flagLockFailImmediately
40
+		if exclusive {
41
+			flag |= flagLockExclusive
42
+		}
43
+
44
+		err := lockFileEx(syscall.Handle(db.lockfile.Fd()), flag, 0, 1, 0, &syscall.Overlapped{})
45
+		if err == nil {
46
+			return nil
47
+		} else if err != errLockViolation {
48
+			return err
49
+		}
50
+
51
+		// Wait for a bit and try again.
52
+		time.Sleep(50 * time.Millisecond)
53
+	}
21 54
 }
22 55
 
23 56
 // funlock releases an advisory lock on a file descriptor.
24
-func funlock(f *os.File) error {
25
-	return nil
57
+func funlock(db *DB) error {
58
+	err := unlockFileEx(syscall.Handle(db.lockfile.Fd()), 0, 1, 0, &syscall.Overlapped{})
59
+	db.lockfile.Close()
60
+	os.Remove(db.path+lockExt)
61
+	return err
26 62
 }
27 63
 
28 64
 // mmap memory maps a DB's data file.
... ...
@@ -2,8 +2,6 @@
2 2
 
3 3
 package bolt
4 4
 
5
-var odirect int
6
-
7 5
 // fdatasync flushes written data to a file descriptor.
8 6
 func fdatasync(db *DB) error {
9 7
 	return db.file.Sync()
... ...
@@ -11,7 +11,7 @@ const (
11 11
 	MaxKeySize = 32768
12 12
 
13 13
 	// MaxValueSize is the maximum length of a value, in bytes.
14
-	MaxValueSize = 4294967295
14
+	MaxValueSize = (1 << 31) - 2
15 15
 )
16 16
 
17 17
 const (
... ...
@@ -273,6 +273,7 @@ func (b *Bucket) Get(key []byte) []byte {
273 273
 
274 274
 // Put sets the value for a key in the bucket.
275 275
 // If the key exist then its previous value will be overwritten.
276
+// Supplied value must remain valid for the life of the transaction.
276 277
 // Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
277 278
 func (b *Bucket) Put(key []byte, value []byte) error {
278 279
 	if b.tx.db == nil {
... ...
@@ -34,6 +34,13 @@ func (c *Cursor) First() (key []byte, value []byte) {
34 34
 	p, n := c.bucket.pageNode(c.bucket.root)
35 35
 	c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
36 36
 	c.first()
37
+
38
+	// If we land on an empty page then move to the next value.
39
+	// https://github.com/boltdb/bolt/issues/450
40
+	if c.stack[len(c.stack)-1].count() == 0 {
41
+		c.next()
42
+	}
43
+
37 44
 	k, v, flags := c.keyValue()
38 45
 	if (flags & uint32(bucketLeafFlag)) != 0 {
39 46
 		return k, nil
... ...
@@ -209,28 +216,37 @@ func (c *Cursor) last() {
209 209
 // next moves to the next leaf element and returns the key and value.
210 210
 // If the cursor is at the last leaf element then it stays there and returns nil.
211 211
 func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
212
-	// Attempt to move over one element until we're successful.
213
-	// Move up the stack as we hit the end of each page in our stack.
214
-	var i int
215
-	for i = len(c.stack) - 1; i >= 0; i-- {
216
-		elem := &c.stack[i]
217
-		if elem.index < elem.count()-1 {
218
-			elem.index++
219
-			break
212
+	for {
213
+		// Attempt to move over one element until we're successful.
214
+		// Move up the stack as we hit the end of each page in our stack.
215
+		var i int
216
+		for i = len(c.stack) - 1; i >= 0; i-- {
217
+			elem := &c.stack[i]
218
+			if elem.index < elem.count()-1 {
219
+				elem.index++
220
+				break
221
+			}
220 222
 		}
221
-	}
222 223
 
223
-	// If we've hit the root page then stop and return. This will leave the
224
-	// cursor on the last element of the last page.
225
-	if i == -1 {
226
-		return nil, nil, 0
227
-	}
224
+		// If we've hit the root page then stop and return. This will leave the
225
+		// cursor on the last element of the last page.
226
+		if i == -1 {
227
+			return nil, nil, 0
228
+		}
228 229
 
229
-	// Otherwise start from where we left off in the stack and find the
230
-	// first element of the first leaf page.
231
-	c.stack = c.stack[:i+1]
232
-	c.first()
233
-	return c.keyValue()
230
+		// Otherwise start from where we left off in the stack and find the
231
+		// first element of the first leaf page.
232
+		c.stack = c.stack[:i+1]
233
+		c.first()
234
+
235
+		// If this is an empty page then restart and move back up the stack.
236
+		// https://github.com/boltdb/bolt/issues/450
237
+		if c.stack[len(c.stack)-1].count() == 0 {
238
+			continue
239
+		}
240
+
241
+		return c.keyValue()
242
+	}
234 243
 }
235 244
 
236 245
 // search recursively performs a binary search against a given page/node until it finds a given key.
... ...
@@ -1,8 +1,10 @@
1 1
 package bolt
2 2
 
3 3
 import (
4
+	"errors"
4 5
 	"fmt"
5 6
 	"hash/fnv"
7
+	"log"
6 8
 	"os"
7 9
 	"runtime"
8 10
 	"runtime/debug"
... ...
@@ -24,13 +26,14 @@ const magic uint32 = 0xED0CDAED
24 24
 // IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
25 25
 // syncing changes to a file.  This is required as some operating systems,
26 26
 // such as OpenBSD, do not have a unified buffer cache (UBC) and writes
27
-// must be synchronzied using the msync(2) syscall.
27
+// must be synchronized using the msync(2) syscall.
28 28
 const IgnoreNoSync = runtime.GOOS == "openbsd"
29 29
 
30 30
 // Default values if not set in a DB instance.
31 31
 const (
32 32
 	DefaultMaxBatchSize  int = 1000
33 33
 	DefaultMaxBatchDelay     = 10 * time.Millisecond
34
+	DefaultAllocSize         = 16 * 1024 * 1024
34 35
 )
35 36
 
36 37
 // DB represents a collection of buckets persisted to a file on disk.
... ...
@@ -63,6 +66,10 @@ type DB struct {
63 63
 	// https://github.com/boltdb/bolt/issues/284
64 64
 	NoGrowSync bool
65 65
 
66
+	// If you want to read the entire database fast, you can set MmapFlag to
67
+	// syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead.
68
+	MmapFlags int
69
+
66 70
 	// MaxBatchSize is the maximum size of a batch. Default value is
67 71
 	// copied from DefaultMaxBatchSize in Open.
68 72
 	//
... ...
@@ -79,11 +86,18 @@ type DB struct {
79 79
 	// Do not change concurrently with calls to Batch.
80 80
 	MaxBatchDelay time.Duration
81 81
 
82
+	// AllocSize is the amount of space allocated when the database
83
+	// needs to create new pages. This is done to amortize the cost
84
+	// of truncate() and fsync() when growing the data file.
85
+	AllocSize int
86
+
82 87
 	path     string
83 88
 	file     *os.File
89
+	lockfile *os.File // windows only
84 90
 	dataref  []byte // mmap'ed readonly, write throws SEGV
85 91
 	data     *[maxMapSize]byte
86 92
 	datasz   int
93
+	filesz   int // current on disk file size
87 94
 	meta0    *meta
88 95
 	meta1    *meta
89 96
 	pageSize int
... ...
@@ -136,10 +150,12 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
136 136
 		options = DefaultOptions
137 137
 	}
138 138
 	db.NoGrowSync = options.NoGrowSync
139
+	db.MmapFlags = options.MmapFlags
139 140
 
140 141
 	// Set default values for later DB operations.
141 142
 	db.MaxBatchSize = DefaultMaxBatchSize
142 143
 	db.MaxBatchDelay = DefaultMaxBatchDelay
144
+	db.AllocSize = DefaultAllocSize
143 145
 
144 146
 	flag := os.O_RDWR
145 147
 	if options.ReadOnly {
... ...
@@ -162,7 +178,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
162 162
 	// if !options.ReadOnly.
163 163
 	// The database file is locked using the shared lock (more than one process may
164 164
 	// hold a lock at the same time) otherwise (options.ReadOnly is set).
165
-	if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
165
+	if err := flock(db, mode, !db.readOnly, options.Timeout); err != nil {
166 166
 		_ = db.close()
167 167
 		return nil, err
168 168
 	}
... ...
@@ -172,7 +188,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
172 172
 
173 173
 	// Initialize the database if it doesn't exist.
174 174
 	if info, err := db.file.Stat(); err != nil {
175
-		return nil, fmt.Errorf("stat error: %s", err)
175
+		return nil, err
176 176
 	} else if info.Size() == 0 {
177 177
 		// Initialize new files with meta pages.
178 178
 		if err := db.init(); err != nil {
... ...
@@ -184,14 +200,14 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
184 184
 		if _, err := db.file.ReadAt(buf[:], 0); err == nil {
185 185
 			m := db.pageInBuffer(buf[:], 0).meta()
186 186
 			if err := m.validate(); err != nil {
187
-				return nil, fmt.Errorf("meta0 error: %s", err)
187
+				return nil, err
188 188
 			}
189 189
 			db.pageSize = int(m.pageSize)
190 190
 		}
191 191
 	}
192 192
 
193 193
 	// Memory map the data file.
194
-	if err := db.mmap(0); err != nil {
194
+	if err := db.mmap(options.InitialMmapSize); err != nil {
195 195
 		_ = db.close()
196 196
 		return nil, err
197 197
 	}
... ...
@@ -248,10 +264,10 @@ func (db *DB) mmap(minsz int) error {
248 248
 
249 249
 	// Validate the meta pages.
250 250
 	if err := db.meta0.validate(); err != nil {
251
-		return fmt.Errorf("meta0 error: %s", err)
251
+		return err
252 252
 	}
253 253
 	if err := db.meta1.validate(); err != nil {
254
-		return fmt.Errorf("meta1 error: %s", err)
254
+		return err
255 255
 	}
256 256
 
257 257
 	return nil
... ...
@@ -266,7 +282,7 @@ func (db *DB) munmap() error {
266 266
 }
267 267
 
268 268
 // mmapSize determines the appropriate size for the mmap given the current size
269
-// of the database. The minimum size is 1MB and doubles until it reaches 1GB.
269
+// of the database. The minimum size is 32KB and doubles until it reaches 1GB.
270 270
 // Returns an error if the new mmap size is greater than the max allowed.
271 271
 func (db *DB) mmapSize(size int) (int, error) {
272 272
 	// Double the size from 32KB until 1GB.
... ...
@@ -364,6 +380,10 @@ func (db *DB) Close() error {
364 364
 }
365 365
 
366 366
 func (db *DB) close() error {
367
+	if !db.opened {
368
+		return nil
369
+	}
370
+	
367 371
 	db.opened = false
368 372
 
369 373
 	db.freelist = nil
... ...
@@ -382,7 +402,9 @@ func (db *DB) close() error {
382 382
 		// No need to unlock read-only file.
383 383
 		if !db.readOnly {
384 384
 			// Unlock the file.
385
-			_ = funlock(db.file)
385
+			if err := funlock(db); err != nil {
386
+				log.Printf("bolt.Close(): funlock error: %s", err)
387
+			}
386 388
 		}
387 389
 
388 390
 		// Close the file descriptor.
... ...
@@ -401,11 +423,15 @@ func (db *DB) close() error {
401 401
 // will cause the calls to block and be serialized until the current write
402 402
 // transaction finishes.
403 403
 //
404
-// Transactions should not be depedent on one another. Opening a read
404
+// Transactions should not be dependent on one another. Opening a read
405 405
 // transaction and a write transaction in the same goroutine can cause the
406 406
 // writer to deadlock because the database periodically needs to re-mmap itself
407 407
 // as it grows and it cannot do that while a read transaction is open.
408 408
 //
409
+// If a long running read transaction (for example, a snapshot transaction) is
410
+// needed, you might want to set DB.InitialMmapSize to a large enough value
411
+// to avoid potential blocking of write transaction.
412
+//
409 413
 // IMPORTANT: You must close read-only transactions after you are finished or
410 414
 // else the database will not reclaim old pages.
411 415
 func (db *DB) Begin(writable bool) (*Tx, error) {
... ...
@@ -589,6 +615,136 @@ func (db *DB) View(fn func(*Tx) error) error {
589 589
 	return nil
590 590
 }
591 591
 
592
+// Batch calls fn as part of a batch. It behaves similar to Update,
593
+// except:
594
+//
595
+// 1. concurrent Batch calls can be combined into a single Bolt
596
+// transaction.
597
+//
598
+// 2. the function passed to Batch may be called multiple times,
599
+// regardless of whether it returns error or not.
600
+//
601
+// This means that Batch function side effects must be idempotent and
602
+// take permanent effect only after a successful return is seen in
603
+// caller.
604
+//
605
+// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
606
+// and DB.MaxBatchDelay, respectively.
607
+//
608
+// Batch is only useful when there are multiple goroutines calling it.
609
+func (db *DB) Batch(fn func(*Tx) error) error {
610
+	errCh := make(chan error, 1)
611
+
612
+	db.batchMu.Lock()
613
+	if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
614
+		// There is no existing batch, or the existing batch is full; start a new one.
615
+		db.batch = &batch{
616
+			db: db,
617
+		}
618
+		db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
619
+	}
620
+	db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
621
+	if len(db.batch.calls) >= db.MaxBatchSize {
622
+		// wake up batch, it's ready to run
623
+		go db.batch.trigger()
624
+	}
625
+	db.batchMu.Unlock()
626
+
627
+	err := <-errCh
628
+	if err == trySolo {
629
+		err = db.Update(fn)
630
+	}
631
+	return err
632
+}
633
+
634
+type call struct {
635
+	fn  func(*Tx) error
636
+	err chan<- error
637
+}
638
+
639
+type batch struct {
640
+	db    *DB
641
+	timer *time.Timer
642
+	start sync.Once
643
+	calls []call
644
+}
645
+
646
+// trigger runs the batch if it hasn't already been run.
647
+func (b *batch) trigger() {
648
+	b.start.Do(b.run)
649
+}
650
+
651
+// run performs the transactions in the batch and communicates results
652
+// back to DB.Batch.
653
+func (b *batch) run() {
654
+	b.db.batchMu.Lock()
655
+	b.timer.Stop()
656
+	// Make sure no new work is added to this batch, but don't break
657
+	// other batches.
658
+	if b.db.batch == b {
659
+		b.db.batch = nil
660
+	}
661
+	b.db.batchMu.Unlock()
662
+
663
+retry:
664
+	for len(b.calls) > 0 {
665
+		var failIdx = -1
666
+		err := b.db.Update(func(tx *Tx) error {
667
+			for i, c := range b.calls {
668
+				if err := safelyCall(c.fn, tx); err != nil {
669
+					failIdx = i
670
+					return err
671
+				}
672
+			}
673
+			return nil
674
+		})
675
+
676
+		if failIdx >= 0 {
677
+			// take the failing transaction out of the batch. it's
678
+			// safe to shorten b.calls here because db.batch no longer
679
+			// points to us, and we hold the mutex anyway.
680
+			c := b.calls[failIdx]
681
+			b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
682
+			// tell the submitter re-run it solo, continue with the rest of the batch
683
+			c.err <- trySolo
684
+			continue retry
685
+		}
686
+
687
+		// pass success, or bolt internal errors, to all callers
688
+		for _, c := range b.calls {
689
+			if c.err != nil {
690
+				c.err <- err
691
+			}
692
+		}
693
+		break retry
694
+	}
695
+}
696
+
697
+// trySolo is a special sentinel error value used for signaling that a
698
+// transaction function should be re-run. It should never be seen by
699
+// callers.
700
+var trySolo = errors.New("batch function returned an error and should be re-run solo")
701
+
702
+type panicked struct {
703
+	reason interface{}
704
+}
705
+
706
+func (p panicked) Error() string {
707
+	if err, ok := p.reason.(error); ok {
708
+		return err.Error()
709
+	}
710
+	return fmt.Sprintf("panic: %v", p.reason)
711
+}
712
+
713
+func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
714
+	defer func() {
715
+		if p := recover(); p != nil {
716
+			err = panicked{p}
717
+		}
718
+	}()
719
+	return fn(tx)
720
+}
721
+
592 722
 // Sync executes fdatasync() against the database file handle.
593 723
 //
594 724
 // This is not necessary under normal operation, however, if you use NoSync
... ...
@@ -655,6 +811,38 @@ func (db *DB) allocate(count int) (*page, error) {
655 655
 	return p, nil
656 656
 }
657 657
 
658
+// grow grows the size of the database to the given sz.
659
+func (db *DB) grow(sz int) error {
660
+	// Ignore if the new size is less than available file size.
661
+	if sz <= db.filesz {
662
+		return nil
663
+	}
664
+
665
+	// If the data is smaller than the alloc size then only allocate what's needed.
666
+	// Once it goes over the allocation size then allocate in chunks.
667
+	if db.datasz < db.AllocSize {
668
+		sz = db.datasz
669
+	} else {
670
+		sz += db.AllocSize
671
+	}
672
+
673
+	// Truncate and fsync to ensure file size metadata is flushed.
674
+	// https://github.com/boltdb/bolt/issues/284
675
+	if !db.NoGrowSync && !db.readOnly {
676
+		if runtime.GOOS != "windows" {
677
+			if err := db.file.Truncate(int64(sz)); err != nil {
678
+				return fmt.Errorf("file resize error: %s", err)
679
+			}
680
+		}
681
+		if err := db.file.Sync(); err != nil {
682
+			return fmt.Errorf("file sync error: %s", err)
683
+		}
684
+	}
685
+
686
+	db.filesz = sz
687
+	return nil
688
+}
689
+
658 690
 func (db *DB) IsReadOnly() bool {
659 691
 	return db.readOnly
660 692
 }
... ...
@@ -672,6 +860,19 @@ type Options struct {
672 672
 	// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
673 673
 	// grab a shared lock (UNIX).
674 674
 	ReadOnly bool
675
+
676
+	// Sets the DB.MmapFlags flag before memory mapping the file.
677
+	MmapFlags int
678
+
679
+	// InitialMmapSize is the initial mmap size of the database
680
+	// in bytes. Read transactions won't block write transaction
681
+	// if the InitialMmapSize is large enough to hold database mmap
682
+	// size. (See DB.Begin for more information)
683
+	//
684
+	// If <=0, the initial map size is 0.
685
+	// If initialMmapSize is smaller than the previous database size,
686
+	// it takes no effect.
687
+	InitialMmapSize int
675 688
 }
676 689
 
677 690
 // DefaultOptions represent the options used if nil options are passed into Open().
... ...
@@ -463,43 +463,6 @@ func (n *node) rebalance() {
463 463
 		target = n.prevSibling()
464 464
 	}
465 465
 
466
-	// If target node has extra nodes then just move one over.
467
-	if target.numChildren() > target.minKeys() {
468
-		if useNextSibling {
469
-			// Reparent and move node.
470
-			if child, ok := n.bucket.nodes[target.inodes[0].pgid]; ok {
471
-				child.parent.removeChild(child)
472
-				child.parent = n
473
-				child.parent.children = append(child.parent.children, child)
474
-			}
475
-			n.inodes = append(n.inodes, target.inodes[0])
476
-			target.inodes = target.inodes[1:]
477
-
478
-			// Update target key on parent.
479
-			target.parent.put(target.key, target.inodes[0].key, nil, target.pgid, 0)
480
-			target.key = target.inodes[0].key
481
-			_assert(len(target.key) > 0, "rebalance(1): zero-length node key")
482
-		} else {
483
-			// Reparent and move node.
484
-			if child, ok := n.bucket.nodes[target.inodes[len(target.inodes)-1].pgid]; ok {
485
-				child.parent.removeChild(child)
486
-				child.parent = n
487
-				child.parent.children = append(child.parent.children, child)
488
-			}
489
-			n.inodes = append(n.inodes, inode{})
490
-			copy(n.inodes[1:], n.inodes)
491
-			n.inodes[0] = target.inodes[len(target.inodes)-1]
492
-			target.inodes = target.inodes[:len(target.inodes)-1]
493
-		}
494
-
495
-		// Update parent key for node.
496
-		n.parent.put(n.key, n.inodes[0].key, nil, n.pgid, 0)
497
-		n.key = n.inodes[0].key
498
-		_assert(len(n.key) > 0, "rebalance(2): zero-length node key")
499
-
500
-		return
501
-	}
502
-
503 466
 	// If both this node and the target node are too small then merge them.
504 467
 	if useNextSibling {
505 468
 		// Reparent all child nodes being moved.
... ...
@@ -5,6 +5,7 @@ import (
5 5
 	"io"
6 6
 	"os"
7 7
 	"sort"
8
+	"strings"
8 9
 	"time"
9 10
 	"unsafe"
10 11
 )
... ...
@@ -29,6 +30,14 @@ type Tx struct {
29 29
 	pages          map[pgid]*page
30 30
 	stats          TxStats
31 31
 	commitHandlers []func()
32
+
33
+	// WriteFlag specifies the flag for write-related methods like WriteTo().
34
+	// Tx opens the database file with the specified flag to copy the data.
35
+	//
36
+	// By default, the flag is unset, which works well for mostly in-memory
37
+	// workloads. For databases that are much larger than available RAM,
38
+	// set the flag to syscall.O_DIRECT to avoid trashing the page cache.
39
+	WriteFlag int
32 40
 }
33 41
 
34 42
 // init initializes the transaction.
... ...
@@ -160,6 +169,8 @@ func (tx *Tx) Commit() error {
160 160
 	// Free the old root bucket.
161 161
 	tx.meta.root.root = tx.root.root
162 162
 
163
+	opgid := tx.meta.pgid
164
+
163 165
 	// Free the freelist and allocate new pages for it. This will overestimate
164 166
 	// the size of the freelist but not underestimate the size (which would be bad).
165 167
 	tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
... ...
@@ -174,6 +185,14 @@ func (tx *Tx) Commit() error {
174 174
 	}
175 175
 	tx.meta.freelist = p.id
176 176
 
177
+	// If the high water mark has moved up then attempt to grow the database.
178
+	if tx.meta.pgid > opgid {
179
+		if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil {
180
+			tx.rollback()
181
+			return err
182
+		}
183
+	}
184
+
177 185
 	// Write dirty pages to disk.
178 186
 	startTime = time.Now()
179 187
 	if err := tx.write(); err != nil {
... ...
@@ -184,8 +203,17 @@ func (tx *Tx) Commit() error {
184 184
 	// If strict mode is enabled then perform a consistency check.
185 185
 	// Only the first consistency error is reported in the panic.
186 186
 	if tx.db.StrictMode {
187
-		if err, ok := <-tx.Check(); ok {
188
-			panic("check fail: " + err.Error())
187
+		ch := tx.Check()
188
+		var errs []string
189
+		for {
190
+			err, ok := <-ch
191
+			if !ok {
192
+				break
193
+			}
194
+			errs = append(errs, err.Error())
195
+		}
196
+		if len(errs) > 0 {
197
+			panic("check fail: " + strings.Join(errs, "\n"))
189 198
 		}
190 199
 	}
191 200
 
... ...
@@ -263,7 +291,7 @@ func (tx *Tx) close() {
263 263
 }
264 264
 
265 265
 // Copy writes the entire database to a writer.
266
-// This function exists for backwards compatibility. Use WriteTo() in
266
+// This function exists for backwards compatibility. Use WriteTo() instead.
267 267
 func (tx *Tx) Copy(w io.Writer) error {
268 268
 	_, err := tx.WriteTo(w)
269 269
 	return err
... ...
@@ -272,29 +300,47 @@ func (tx *Tx) Copy(w io.Writer) error {
272 272
 // WriteTo writes the entire database to a writer.
273 273
 // If err == nil then exactly tx.Size() bytes will be written into the writer.
274 274
 func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
275
-	// Attempt to open reader directly.
276
-	var f *os.File
277
-	if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil {
278
-		// Fallback to a regular open if that doesn't work.
279
-		if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil {
280
-			return 0, err
281
-		}
275
+	// Attempt to open reader with WriteFlag
276
+	f, err := os.OpenFile(tx.db.path, os.O_RDONLY|tx.WriteFlag, 0)
277
+	if err != nil {
278
+		return 0, err
282 279
 	}
280
+	defer func() { _ = f.Close() }()
283 281
 
284
-	// Copy the meta pages.
285
-	tx.db.metalock.Lock()
286
-	n, err = io.CopyN(w, f, int64(tx.db.pageSize*2))
287
-	tx.db.metalock.Unlock()
282
+	// Generate a meta page. We use the same page data for both meta pages.
283
+	buf := make([]byte, tx.db.pageSize)
284
+	page := (*page)(unsafe.Pointer(&buf[0]))
285
+	page.flags = metaPageFlag
286
+	*page.meta() = *tx.meta
287
+
288
+	// Write meta 0.
289
+	page.id = 0
290
+	page.meta().checksum = page.meta().sum64()
291
+	nn, err := w.Write(buf)
292
+	n += int64(nn)
288 293
 	if err != nil {
289
-		_ = f.Close()
290
-		return n, fmt.Errorf("meta copy: %s", err)
294
+		return n, fmt.Errorf("meta 0 copy: %s", err)
295
+	}
296
+
297
+	// Write meta 1 with a lower transaction id.
298
+	page.id = 1
299
+	page.meta().txid -= 1
300
+	page.meta().checksum = page.meta().sum64()
301
+	nn, err = w.Write(buf)
302
+	n += int64(nn)
303
+	if err != nil {
304
+		return n, fmt.Errorf("meta 1 copy: %s", err)
305
+	}
306
+
307
+	// Move past the meta pages in the file.
308
+	if _, err := f.Seek(int64(tx.db.pageSize*2), os.SEEK_SET); err != nil {
309
+		return n, fmt.Errorf("seek: %s", err)
291 310
 	}
292 311
 
293 312
 	// Copy data pages.
294 313
 	wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
295 314
 	n += wn
296 315
 	if err != nil {
297
-		_ = f.Close()
298 316
 		return n, err
299 317
 	}
300 318
 
... ...
@@ -501,7 +547,7 @@ func (tx *Tx) writeMeta() error {
501 501
 }
502 502
 
503 503
 // page returns a reference to the page with a given id.
504
-// If page has been written to then a temporary bufferred page is returned.
504
+// If page has been written to then a temporary buffered page is returned.
505 505
 func (tx *Tx) page(id pgid) *page {
506 506
 	// Check the dirty pages first.
507 507
 	if tx.pages != nil {