1 Star 0 Fork 0

shan123 / etcd

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
kvstore.go 13.41 KB
一键复制 编辑 原始数据 按行查看 历史
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package storage
import (
"errors"
"log"
"math"
"math/rand"
"sync"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/pkg/schedule"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/storage/storagepb"
)
var (
keyBucketName = []byte("key")
metaBucketName = []byte("meta")
// markedRevBytesLen is the byte length of marked revision.
// The first `revBytesLen` bytes represents a normal revision. The last
// one byte is the mark.
markedRevBytesLen = revBytesLen + 1
markBytePosition = markedRevBytesLen - 1
markTombstone byte = 't'
scheduledCompactKeyName = []byte("scheduledCompactRev")
finishedCompactKeyName = []byte("finishedCompactRev")
ErrTxnIDMismatch = errors.New("storage: txn id mismatch")
ErrCompacted = errors.New("storage: required revision has been compacted")
ErrFutureRev = errors.New("storage: required revision is a future revision")
ErrCanceled = errors.New("storage: watcher is canceled")
)
type store struct {
mu sync.Mutex // guards the following
b backend.Backend
kvindex index
le lease.Lessor
currentRev revision
// the main revision of the last compaction
compactMainRev int64
tx backend.BatchTx
txnID int64 // tracks the current txnID to verify txn operations
changes []storagepb.KeyValue
fifoSched schedule.Scheduler
stopc chan struct{}
}
// NewStore returns a new store. It is useful to create a store inside
// storage pkg. It should only be used for testing externally.
func NewStore(b backend.Backend, le lease.Lessor) *store {
s := &store{
b: b,
kvindex: newTreeIndex(),
le: le,
currentRev: revision{main: 1},
compactMainRev: -1,
fifoSched: schedule.NewFIFOScheduler(),
stopc: make(chan struct{}),
}
if s.le != nil {
s.le.SetRangeDeleter(s)
}
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafeCreateBucket(keyBucketName)
tx.UnsafeCreateBucket(metaBucketName)
tx.Unlock()
s.b.ForceCommit()
if err := s.restore(); err != nil {
// TODO: return the error instead of panic here?
panic("failed to recover store from backend")
}
return s
}
func (s *store) Rev() int64 {
s.mu.Lock()
defer s.mu.Unlock()
return s.currentRev.main
}
func (s *store) FirstRev() int64 {
s.mu.Lock()
defer s.mu.Unlock()
return s.compactMainRev
}
func (s *store) Put(key, value []byte, lease lease.LeaseID) int64 {
id := s.TxnBegin()
s.put(key, value, lease)
s.txnEnd(id)
putCounter.Inc()
return int64(s.currentRev.main)
}
func (s *store) Range(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error) {
id := s.TxnBegin()
kvs, rev, err = s.rangeKeys(key, end, limit, rangeRev)
s.txnEnd(id)
rangeCounter.Inc()
return kvs, rev, err
}
func (s *store) DeleteRange(key, end []byte) (n, rev int64) {
id := s.TxnBegin()
n = s.deleteRange(key, end)
s.txnEnd(id)
deleteCounter.Inc()
return n, int64(s.currentRev.main)
}
func (s *store) TxnBegin() int64 {
s.mu.Lock()
s.currentRev.sub = 0
s.tx = s.b.BatchTx()
s.tx.Lock()
s.txnID = rand.Int63()
return s.txnID
}
func (s *store) TxnEnd(txnID int64) error {
err := s.txnEnd(txnID)
if err != nil {
return err
}
txnCounter.Inc()
return nil
}
// txnEnd is used for unlocking an internal txn. It does
// not increase the txnCounter.
func (s *store) txnEnd(txnID int64) error {
if txnID != s.txnID {
return ErrTxnIDMismatch
}
s.tx.Unlock()
if s.currentRev.sub != 0 {
s.currentRev.main += 1
}
s.currentRev.sub = 0
dbTotalSize.Set(float64(s.b.Size()))
s.mu.Unlock()
return nil
}
func (s *store) TxnRange(txnID int64, key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, rev int64, err error) {
if txnID != s.txnID {
return nil, 0, ErrTxnIDMismatch
}
return s.rangeKeys(key, end, limit, rangeRev)
}
func (s *store) TxnPut(txnID int64, key, value []byte, lease lease.LeaseID) (rev int64, err error) {
if txnID != s.txnID {
return 0, ErrTxnIDMismatch
}
s.put(key, value, lease)
return int64(s.currentRev.main + 1), nil
}
func (s *store) TxnDeleteRange(txnID int64, key, end []byte) (n, rev int64, err error) {
if txnID != s.txnID {
return 0, 0, ErrTxnIDMismatch
}
n = s.deleteRange(key, end)
if n != 0 || s.currentRev.sub != 0 {
rev = int64(s.currentRev.main + 1)
} else {
rev = int64(s.currentRev.main)
}
return n, rev, nil
}
func (s *store) Compact(rev int64) error {
s.mu.Lock()
defer s.mu.Unlock()
if rev <= s.compactMainRev {
return ErrCompacted
}
if rev > s.currentRev.main {
return ErrFutureRev
}
start := time.Now()
s.compactMainRev = rev
rbytes := newRevBytes()
revToBytes(revision{main: rev}, rbytes)
tx := s.b.BatchTx()
tx.Lock()
tx.UnsafePut(metaBucketName, scheduledCompactKeyName, rbytes)
tx.Unlock()
// ensure that desired compaction is persisted
s.b.ForceCommit()
keep := s.kvindex.Compact(rev)
var j = func(ctx context.Context) {
select {
case <-ctx.Done():
return
default:
}
s.scheduleCompaction(rev, keep)
}
s.fifoSched.Schedule(j)
indexCompactionPauseDurations.Observe(float64(time.Now().Sub(start) / time.Millisecond))
return nil
}
func (s *store) Hash() (uint32, error) {
s.b.ForceCommit()
return s.b.Hash()
}
func (s *store) Commit() { s.b.ForceCommit() }
func (s *store) Restore(b backend.Backend) error {
s.mu.Lock()
defer s.mu.Unlock()
close(s.stopc)
s.fifoSched.Stop()
s.b = b
s.kvindex = newTreeIndex()
s.currentRev = revision{main: 1}
s.compactMainRev = -1
s.tx = b.BatchTx()
s.txnID = -1
s.fifoSched = schedule.NewFIFOScheduler()
s.stopc = make(chan struct{})
return s.restore()
}
func (s *store) restore() error {
min, max := newRevBytes(), newRevBytes()
revToBytes(revision{main: 1}, min)
revToBytes(revision{main: math.MaxInt64, sub: math.MaxInt64}, max)
// restore index
tx := s.b.BatchTx()
tx.Lock()
_, finishedCompactBytes := tx.UnsafeRange(metaBucketName, finishedCompactKeyName, nil, 0)
if len(finishedCompactBytes) != 0 {
s.compactMainRev = bytesToRev(finishedCompactBytes[0]).main
log.Printf("storage: restore compact to %d", s.compactMainRev)
}
// TODO: limit N to reduce max memory usage
keys, vals := tx.UnsafeRange(keyBucketName, min, max, 0)
for i, key := range keys {
var kv storagepb.KeyValue
if err := kv.Unmarshal(vals[i]); err != nil {
log.Fatalf("storage: cannot unmarshal event: %v", err)
}
rev := bytesToRev(key[:revBytesLen])
// restore index
switch {
case isTombstone(key):
s.kvindex.Tombstone(kv.Key, rev)
if lease.LeaseID(kv.Lease) != lease.NoLease {
err := s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
if err != nil && err != lease.ErrLeaseNotFound {
log.Fatalf("storage: unexpected Detach error %v", err)
}
}
default:
s.kvindex.Restore(kv.Key, revision{kv.CreateRevision, 0}, rev, kv.Version)
if lease.LeaseID(kv.Lease) != lease.NoLease {
if s.le == nil {
panic("no lessor to attach lease")
}
err := s.le.Attach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
// We are walking through the kv history here. It is possible that we attached a key to
// the lease and the lease was revoked later.
// Thus attaching an old version of key to a none existing lease is possible here, and
// we should just ignore the error.
if err != nil && err != lease.ErrLeaseNotFound {
panic("unexpected Attach error")
}
}
}
// update revision
s.currentRev = rev
}
_, scheduledCompactBytes := tx.UnsafeRange(metaBucketName, scheduledCompactKeyName, nil, 0)
if len(scheduledCompactBytes) != 0 {
scheduledCompact := bytesToRev(scheduledCompactBytes[0]).main
if scheduledCompact > s.compactMainRev {
log.Printf("storage: resume scheduled compaction at %d", scheduledCompact)
go s.Compact(scheduledCompact)
}
}
tx.Unlock()
return nil
}
func (s *store) Close() error {
close(s.stopc)
s.fifoSched.Stop()
return nil
}
func (a *store) Equal(b *store) bool {
if a.currentRev != b.currentRev {
return false
}
if a.compactMainRev != b.compactMainRev {
return false
}
return a.kvindex.Equal(b.kvindex)
}
// range is a keyword in Go, add Keys suffix.
func (s *store) rangeKeys(key, end []byte, limit, rangeRev int64) (kvs []storagepb.KeyValue, curRev int64, err error) {
curRev = int64(s.currentRev.main)
if s.currentRev.sub > 0 {
curRev += 1
}
if rangeRev > curRev {
return nil, s.currentRev.main, ErrFutureRev
}
var rev int64
if rangeRev <= 0 {
rev = curRev
} else {
rev = rangeRev
}
if rev <= s.compactMainRev {
return nil, 0, ErrCompacted
}
_, revpairs := s.kvindex.Range(key, end, int64(rev))
if len(revpairs) == 0 {
return nil, curRev, nil
}
for _, revpair := range revpairs {
start, end := revBytesRange(revpair)
_, vs := s.tx.UnsafeRange(keyBucketName, start, end, 0)
if len(vs) != 1 {
log.Fatalf("storage: range cannot find rev (%d,%d)", revpair.main, revpair.sub)
}
var kv storagepb.KeyValue
if err := kv.Unmarshal(vs[0]); err != nil {
log.Fatalf("storage: cannot unmarshal event: %v", err)
}
kvs = append(kvs, kv)
if limit > 0 && len(kvs) >= int(limit) {
break
}
}
return kvs, curRev, nil
}
func (s *store) put(key, value []byte, leaseID lease.LeaseID) {
rev := s.currentRev.main + 1
c := rev
oldLease := lease.NoLease
// if the key exists before, use its previous created and
// get its previous leaseID
grev, created, ver, err := s.kvindex.Get(key, rev)
if err == nil {
c = created.main
ibytes := newRevBytes()
revToBytes(grev, ibytes)
_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)
var kv storagepb.KeyValue
if err = kv.Unmarshal(vs[0]); err != nil {
log.Fatalf("storage: cannot unmarshal value: %v", err)
}
oldLease = lease.LeaseID(kv.Lease)
}
ibytes := newRevBytes()
revToBytes(revision{main: rev, sub: s.currentRev.sub}, ibytes)
ver = ver + 1
kv := storagepb.KeyValue{
Key: key,
Value: value,
CreateRevision: c,
ModRevision: rev,
Version: ver,
Lease: int64(leaseID),
}
d, err := kv.Marshal()
if err != nil {
log.Fatalf("storage: cannot marshal event: %v", err)
}
s.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
s.kvindex.Put(key, revision{main: rev, sub: s.currentRev.sub})
s.changes = append(s.changes, kv)
s.currentRev.sub += 1
if oldLease != lease.NoLease {
if s.le == nil {
panic("no lessor to detach lease")
}
err = s.le.Detach(oldLease, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
panic("unexpected error from lease detach")
}
}
if leaseID != lease.NoLease {
if s.le == nil {
panic("no lessor to attach lease")
}
err = s.le.Attach(leaseID, []lease.LeaseItem{{Key: string(key)}})
if err != nil {
panic("unexpected error from lease Attach")
}
}
}
func (s *store) deleteRange(key, end []byte) int64 {
rrev := s.currentRev.main
if s.currentRev.sub > 0 {
rrev += 1
}
keys, revs := s.kvindex.Range(key, end, rrev)
if len(keys) == 0 {
return 0
}
for i, key := range keys {
s.delete(key, revs[i])
}
return int64(len(keys))
}
func (s *store) delete(key []byte, rev revision) {
mainrev := s.currentRev.main + 1
ibytes := newRevBytes()
revToBytes(revision{main: mainrev, sub: s.currentRev.sub}, ibytes)
ibytes = appendMarkTombstone(ibytes)
kv := storagepb.KeyValue{
Key: key,
}
d, err := kv.Marshal()
if err != nil {
log.Fatalf("storage: cannot marshal event: %v", err)
}
s.tx.UnsafeSeqPut(keyBucketName, ibytes, d)
err = s.kvindex.Tombstone(key, revision{main: mainrev, sub: s.currentRev.sub})
if err != nil {
log.Fatalf("storage: cannot tombstone an existing key (%s): %v", string(key), err)
}
s.changes = append(s.changes, kv)
s.currentRev.sub += 1
ibytes = newRevBytes()
revToBytes(rev, ibytes)
_, vs := s.tx.UnsafeRange(keyBucketName, ibytes, nil, 0)
kv.Reset()
if err = kv.Unmarshal(vs[0]); err != nil {
log.Fatalf("storage: cannot unmarshal value: %v", err)
}
if lease.LeaseID(kv.Lease) != lease.NoLease {
err = s.le.Detach(lease.LeaseID(kv.Lease), []lease.LeaseItem{{Key: string(kv.Key)}})
if err != nil {
log.Fatalf("storage: cannot detach %v", err)
}
}
}
func (s *store) getChanges() []storagepb.KeyValue {
changes := s.changes
s.changes = make([]storagepb.KeyValue, 0, 128)
return changes
}
// appendMarkTombstone appends tombstone mark to normal revision bytes.
func appendMarkTombstone(b []byte) []byte {
if len(b) != revBytesLen {
log.Panicf("cannot append mark to non normal revision bytes")
}
return append(b, markTombstone)
}
// isTombstone checks whether the revision bytes is a tombstone.
func isTombstone(b []byte) bool {
return len(b) == markedRevBytesLen && b[markBytePosition] == markTombstone
}
// revBytesRange returns the range of revision bytes at
// the given revision.
func revBytesRange(rev revision) (start, end []byte) {
start = newRevBytes()
revToBytes(rev, start)
end = newRevBytes()
endRev := revision{main: rev.main, sub: rev.sub + 1}
revToBytes(endRev, end)
return start, end
}
1
https://gitee.com/shanmenghao/etcd.git
git@gitee.com:shanmenghao/etcd.git
shanmenghao
etcd
etcd
v2.3.7

搜索帮助