2 Star 2 Fork 1

cockroachdb/cockroach

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
flow.go 14.27 KB
一键复制 编辑 原始数据 按行查看 历史
// Copyright 2016 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
package distsqlrun
import (
"sync"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"golang.org/x/net/context"
"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/rpc"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/sql/jobs"
"github.com/cockroachdb/cockroach/pkg/sql/mon"
"github.com/cockroachdb/cockroach/pkg/sql/parser"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/storage/engine"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
)
// StreamID identifies a stream; it may be local to a flow or it may cross
// machine boundaries. The identifier can only be used in the context of a
// specific flow.
type StreamID int
// FlowID identifies a flow. It is most importantly used when setting up streams
// between nodes.
type FlowID struct {
uuid.UUID
}
// FlowCtx encompasses the contexts needed for various flow components.
type FlowCtx struct {
log.AmbientContext
Settings *cluster.Settings
stopper *stop.Stopper
// id is a unique identifier for a flow.
id FlowID
// EvalCtx is used by all the processors in the flow to evaluate expressions.
EvalCtx parser.EvalContext
// rpcCtx is used by the Outboxes that may be present in the flow for
// connecting to other nodes.
rpcCtx *rpc.Context
// The transaction in which kv operations performed by processors in the flow
// must be performed. Processors in the Flow will use this txn concurrently.
txn *client.Txn
// clientDB is a handle to the cluster. Used for performing requests outside
// of the transaction in which the flow's query is running.
clientDB *client.DB
// nodeID is the ID of the node on which the processors using this FlowCtx
// run.
nodeID roachpb.NodeID
testingKnobs TestingKnobs
// TempStorage is used by some DistSQL processors to store Rows when the
// working set is larger than can be stored in memory.
TempStorage engine.Engine
// diskMonitor is used to monitor temporary storage disk usage.
diskMonitor *mon.BytesMonitor
// JobRegistry is used during backfill to load jobs which keep state.
JobRegistry *jobs.Registry
}
type flowStatus int
// Flow status indicators.
const (
FlowNotStarted flowStatus = iota
FlowRunning
FlowFinished
)
type startable interface {
start(ctx context.Context, wg *sync.WaitGroup, ctxCancel context.CancelFunc)
}
// Flow represents a flow which consists of processors and streams.
type Flow struct {
FlowCtx
flowRegistry *flowRegistry
processors []Processor
// startables are entities that must be started when the flow starts;
// currently these are outboxes and routers.
startables []startable
// syncFlowConsumer is a special outbox which instead of sending rows to
// another host, returns them directly (as a result to a SetupSyncFlow RPC,
// or to the local host).
syncFlowConsumer RowReceiver
localStreams map[StreamID]RowReceiver
// inboundStreams are streams that receive data from other hosts; this map
// is to be passed to flowRegistry.RegisterFlow.
inboundStreams map[StreamID]*inboundStreamInfo
// waitGroup is used to wait for async components of the flow:
// - processors
// - inbound streams
// - outboxes
waitGroup sync.WaitGroup
doneFn func()
status flowStatus
// Context used for all execution within the flow.
// Created in Start(), cancelled in Cleanup().
ctx context.Context
// Cancel function for ctx. Call this to cancel the flow (safe to be called
// multiple times).
ctxCancel context.CancelFunc
// spec is the request that produced this flow. Only used for debugging.
spec *FlowSpec
}
func newFlow(flowCtx FlowCtx, flowReg *flowRegistry, syncFlowConsumer RowReceiver) *Flow {
f := &Flow{
FlowCtx: flowCtx,
flowRegistry: flowReg,
syncFlowConsumer: syncFlowConsumer,
}
f.status = FlowNotStarted
return f
}
// setupInboundStream adds a stream to the stream map (inboundStreams or
// localStreams).
func (f *Flow) setupInboundStream(
ctx context.Context, spec StreamEndpointSpec, receiver RowReceiver,
) error {
if spec.TargetAddr != "" {
return errors.Errorf("inbound stream has target address set: %s", spec.TargetAddr)
}
sid := spec.StreamID
switch spec.Type {
case StreamEndpointSpec_SYNC_RESPONSE:
return errors.Errorf("inbound stream of type SYNC_RESPONSE")
case StreamEndpointSpec_REMOTE:
if _, found := f.inboundStreams[sid]; found {
return errors.Errorf("inbound stream %d has multiple consumers", sid)
}
if f.inboundStreams == nil {
f.inboundStreams = make(map[StreamID]*inboundStreamInfo)
}
if log.V(2) {
log.Infof(ctx, "set up inbound stream %d", sid)
}
f.inboundStreams[sid] = &inboundStreamInfo{receiver: receiver, waitGroup: &f.waitGroup}
case StreamEndpointSpec_LOCAL:
if _, found := f.localStreams[sid]; found {
return errors.Errorf("local stream %d has multiple consumers", sid)
}
if f.localStreams == nil {
f.localStreams = make(map[StreamID]RowReceiver)
}
f.localStreams[sid] = receiver
default:
return errors.Errorf("invalid stream type %d", spec.Type)
}
return nil
}
// setupOutboundStream sets up an output stream; if the stream is local, the
// RowChannel is looked up in the localStreams map; otherwise an outgoing
// mailbox is created.
func (f *Flow) setupOutboundStream(spec StreamEndpointSpec) (RowReceiver, error) {
sid := spec.StreamID
switch spec.Type {
case StreamEndpointSpec_SYNC_RESPONSE:
return f.syncFlowConsumer, nil
case StreamEndpointSpec_REMOTE:
outbox := newOutbox(&f.FlowCtx, spec.TargetAddr, f.id, sid)
f.startables = append(f.startables, outbox)
return outbox, nil
case StreamEndpointSpec_LOCAL:
rowChan, found := f.localStreams[sid]
if !found {
return nil, errors.Errorf("unconnected inbound stream %d", sid)
}
// Once we "connect" a stream, we set the value in the map to nil.
if rowChan == nil {
return nil, errors.Errorf("stream %d has multiple connections", sid)
}
f.localStreams[sid] = nil
return rowChan, nil
default:
return nil, errors.Errorf("invalid stream type %d", spec.Type)
}
}
// setupRouter initializes a router and the outbound streams.
//
// Pass-through routers are not supported; they should be handled separately.
func (f *Flow) setupRouter(spec *OutputRouterSpec) (router, error) {
streams := make([]RowReceiver, len(spec.Streams))
for i := range spec.Streams {
var err error
streams[i], err = f.setupOutboundStream(spec.Streams[i])
if err != nil {
return nil, err
}
}
return makeRouter(spec, streams)
}
func checkNumInOut(inputs []RowSource, outputs []RowReceiver, numIn, numOut int) error {
if len(inputs) != numIn {
return errors.Errorf("expected %d input(s), got %d", numIn, len(inputs))
}
if len(outputs) != numOut {
return errors.Errorf("expected %d output(s), got %d", numOut, len(outputs))
}
return nil
}
func (f *Flow) makeProcessor(ps *ProcessorSpec, inputs []RowSource) (Processor, error) {
if len(ps.Output) != 1 {
return nil, errors.Errorf("only single-output processors supported")
}
outputs := make([]RowReceiver, len(ps.Output))
for i := range ps.Output {
spec := &ps.Output[i]
if spec.Type == OutputRouterSpec_PASS_THROUGH {
// There is no entity that corresponds to a pass-through router - we just
// use its output stream directly.
if len(spec.Streams) != 1 {
return nil, errors.Errorf("expected one stream for passthrough router")
}
var err error
outputs[i], err = f.setupOutboundStream(spec.Streams[0])
if err != nil {
return nil, err
}
continue
}
r, err := f.setupRouter(spec)
if err != nil {
return nil, err
}
outputs[i] = r
f.startables = append(f.startables, r)
}
proc, err := newProcessor(&f.FlowCtx, &ps.Core, &ps.Post, inputs, outputs)
if err != nil {
return nil, err
}
// Initialize any routers (the setupRouter case above).
types := proc.OutputTypes()
for _, o := range outputs {
if r, ok := o.(router); ok {
r.init(&f.FlowCtx, types)
}
}
return proc, nil
}
func (f *Flow) setup(ctx context.Context, spec *FlowSpec) error {
f.spec = spec
// First step: setup the input synchronizers for all processors.
inputSyncs := make([][]RowSource, len(spec.Processors))
for pIdx, ps := range spec.Processors {
for _, is := range ps.Input {
if len(is.Streams) == 0 {
return errors.Errorf("input sync with no streams")
}
var sync RowSource
switch is.Type {
case InputSyncSpec_UNORDERED:
if len(is.Streams) == 1 {
rowChan := &RowChannel{}
rowChan.Init(is.ColumnTypes)
if err := f.setupInboundStream(ctx, is.Streams[0], rowChan); err != nil {
return err
}
sync = rowChan
} else {
mrc := &MultiplexedRowChannel{}
mrc.Init(len(is.Streams), is.ColumnTypes)
for _, s := range is.Streams {
if err := f.setupInboundStream(ctx, s, mrc); err != nil {
return err
}
}
sync = mrc
}
case InputSyncSpec_ORDERED:
// Ordered synchronizer: create a RowChannel for each input.
streams := make([]RowSource, len(is.Streams))
for i, s := range is.Streams {
rowChan := &RowChannel{}
rowChan.Init(is.ColumnTypes)
if err := f.setupInboundStream(ctx, s, rowChan); err != nil {
return err
}
streams[i] = rowChan
}
var err error
sync, err = makeOrderedSync(convertToColumnOrdering(is.Ordering), &f.EvalCtx, streams)
if err != nil {
return err
}
default:
return errors.Errorf("unsupported input sync type %s", is.Type)
}
inputSyncs[pIdx] = append(inputSyncs[pIdx], sync)
}
}
f.processors = make([]Processor, len(spec.Processors))
for i := range spec.Processors {
var err error
f.processors[i], err = f.makeProcessor(&spec.Processors[i], inputSyncs[i])
if err != nil {
return err
}
}
return nil
}
// Start starts the flow (each processor runs in their own goroutine).
//
// Generally if errors are encountered during the setup part, they're returned.
// But if the flow is a synchronous one, then no error is returned; instead the
// setup error is pushed to the syncFlowConsumer. In this case, a subsequent
// call to f.Wait() will not block.
func (f *Flow) Start(ctx context.Context, doneFn func()) error {
f.doneFn = doneFn
log.VEventf(
ctx, 1, "starting (%d processors, %d startables)", len(f.processors), len(f.startables),
)
f.status = FlowRunning
f.ctx, f.ctxCancel = context.WithCancel(ctx)
// Once we call RegisterFlow, the inbound streams become accessible; we must
// set up the WaitGroup counter before.
// The counter will be further incremented below to account for the
// processors.
f.waitGroup.Add(len(f.inboundStreams))
if err := f.flowRegistry.RegisterFlow(
f.ctx, f.id, f, f.inboundStreams, flowStreamDefaultTimeout,
); err != nil {
if f.syncFlowConsumer != nil {
// For sync flows, the error goes to the consumer.
f.syncFlowConsumer.Push(nil /* row */, ProducerMetadata{Err: err})
f.syncFlowConsumer.ProducerDone()
return nil
}
return err
}
if log.V(1) {
log.Infof(f.ctx, "registered flow %s", f.id.Short())
}
for _, s := range f.startables {
s.start(f.ctx, &f.waitGroup, f.ctxCancel)
}
f.waitGroup.Add(len(f.processors))
for _, p := range f.processors {
go p.Run(f.ctx, &f.waitGroup)
}
return nil
}
// Wait waits for all the goroutines for this flow to exit. If the context gets
// cancelled before all goroutines exit, it calls f.cancel().
func (f *Flow) Wait() {
waitChan := make(chan struct{})
go func() {
f.waitGroup.Wait()
close(waitChan)
}()
select {
case <-f.ctx.Done():
f.cancel()
<-waitChan
case <-waitChan:
// Exit normally
}
}
// Cleanup should be called when the flow completes (after all processors and
// mailboxes exited).
func (f *Flow) Cleanup(ctx context.Context) {
if f.status == FlowFinished {
panic("flow cleanup called twice")
}
// This closes the account and monitor opened in ServerImpl.setupFlow.
f.EvalCtx.ActiveMemAcc.Close(ctx)
f.EvalCtx.Stop(ctx)
if log.V(1) {
log.Infof(ctx, "cleaning up")
}
sp := opentracing.SpanFromContext(ctx)
sp.Finish()
if f.status != FlowNotStarted {
f.flowRegistry.UnregisterFlow(f.id)
}
f.status = FlowFinished
f.ctxCancel()
f.doneFn()
f.doneFn = nil
}
// RunSync runs the processors in the flow in order (serially), in the same
// context (no goroutines are spawned).
func (f *Flow) RunSync(ctx context.Context) {
for _, p := range f.processors {
p.Run(ctx, nil)
}
f.Cleanup(ctx)
}
// cancel iterates through all unconnected streams of this flow and marks them cancelled.
// If the syncFlowConsumer is of type CancellableRowReceiver, mark it as cancelled.
// This function is called in Wait() after the associated context has been cancelled.
// In order to cancel a flow, call f.ctxCancel() instead of this function.
//
// For a detailed description of the distsql query cancellation mechanism,
// read docs/RFCS/query_cancellation.md.
func (f *Flow) cancel() {
f.flowRegistry.Lock()
defer f.flowRegistry.Unlock()
entry := f.flowRegistry.flows[f.id]
for streamID, is := range entry.inboundStreams {
// Connected, non-finished inbound streams will get an error
// returned in ProcessInboundStream(). Non-connected streams
// are handled below.
if !is.connected && !is.finished {
is.cancelled = true
// Stream has yet to be started; send an error to its
// receiver and prevent it from being connected.
is.receiver.Push(
nil, /* row */
ProducerMetadata{Err: sqlbase.NewQueryCanceledError()})
is.receiver.ProducerDone()
f.flowRegistry.finishInboundStreamLocked(f.id, streamID)
}
}
if f.syncFlowConsumer != nil {
if recv, ok := f.syncFlowConsumer.(CancellableRowReceiver); ok {
recv.SetCancelled()
}
}
}
var _ = (*Flow).RunSync
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/mirrors_cockroachdb/cockroach.git
git@gitee.com:mirrors_cockroachdb/cockroach.git
mirrors_cockroachdb
cockroach
cockroach
v1.1.6

搜索帮助