Skip to content

Commit c5022ef

Browse files
authored
storage/raft: Advertise the configured cluster address (#9008) (#9031)
* storage/raft: Advertise the configured cluster address * Don't allow raft to start with unspecified IP * Fix concurrent map write panic * Add test file * changelog++ * changelog++ * changelog++ * Update tcp_layer.go * Update tcp_layer.go * Only set the adverise addr if set
1 parent 1d47186 commit c5022ef

File tree

8 files changed

+142
-16
lines changed

8 files changed

+142
-16
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ BUG FIXES:
1414
* serviceregistration: Fix a regression for Consul service registration that ignored using the listener address as
1515
the redirect address unless api_addr was provided. It now properly uses the same redirect address as the one
1616
used by Vault's Core object. [[GH-8976](https://github.com/hashicorp/vault/pull/8976)]
17+
* storage/raft: Advertise the configured cluster address to the rest of the nodes in the raft cluster. This fixes
18+
an issue where a node advertising 0.0.0.0 is not using a unique hostname. [[GH-9008](https://github.com/hashicorp/vault/pull/9008)]
19+
* storage/raft: Fix panic when multiple nodes attempt to join the cluster at once. [[GH-9008](https://github.com/hashicorp/vault/pull/9008)]
1720
* sys: The path provided in `sys/internal/ui/mounts/:path` is now namespace-aware. This fixes an issue
1821
with `vault kv` subcommands that had namespaces provided in the path returning permission denied all the time.
1922
[[GH-8962](https://github.com/hashicorp/vault/pull/8962)]

physical/raft/streamlayer.go

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"math/big"
1616
mathrand "math/rand"
1717
"net"
18+
"net/url"
1819
"sync"
1920
"time"
2021

@@ -135,13 +136,15 @@ func GenerateTLSKey(reader io.Reader) (*TLSKey, error) {
135136
}, nil
136137
}
137138

138-
// Make sure raftLayer satisfies the raft.StreamLayer interface
139-
var _ raft.StreamLayer = (*raftLayer)(nil)
139+
var (
140+
// Make sure raftLayer satisfies the raft.StreamLayer interface
141+
_ raft.StreamLayer = (*raftLayer)(nil)
140142

141-
// Make sure raftLayer satisfies the cluster.Handler and cluster.Client
142-
// interfaces
143-
var _ cluster.Handler = (*raftLayer)(nil)
144-
var _ cluster.Client = (*raftLayer)(nil)
143+
// Make sure raftLayer satisfies the cluster.Handler and cluster.Client
144+
// interfaces
145+
_ cluster.Handler = (*raftLayer)(nil)
146+
_ cluster.Client = (*raftLayer)(nil)
147+
)
145148

146149
// RaftLayer implements the raft.StreamLayer interface,
147150
// so that we can use a single RPC layer for Raft and Vault
@@ -170,12 +173,21 @@ type raftLayer struct {
170173
// from the network config.
171174
func NewRaftLayer(logger log.Logger, raftTLSKeyring *TLSKeyring, clusterListener cluster.ClusterHook) (*raftLayer, error) {
172175
clusterAddr := clusterListener.Addr()
173-
switch {
174-
case clusterAddr == nil:
175-
// Clustering disabled on the server, don't try to look for params
176+
if clusterAddr == nil {
176177
return nil, errors.New("no raft addr found")
177178
}
178179

180+
{
181+
// Test the advertised address to make sure it's not an unspecified IP
182+
u := url.URL{
183+
Host: clusterAddr.String(),
184+
}
185+
ip := net.ParseIP(u.Hostname())
186+
if ip != nil && ip.IsUnspecified() {
187+
return nil, fmt.Errorf("cannot use unspecified IP with raft storage: %s", clusterAddr.String())
188+
}
189+
}
190+
179191
layer := &raftLayer{
180192
addr: clusterAddr,
181193
connCh: make(chan net.Conn),

physical/raft/streamlayer_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package raft
2+
3+
import (
4+
"context"
5+
"crypto/rand"
6+
"crypto/tls"
7+
"net"
8+
"testing"
9+
"time"
10+
11+
"github.com/hashicorp/vault/vault/cluster"
12+
)
13+
14+
type mockClusterHook struct {
15+
address net.Addr
16+
}
17+
18+
func (*mockClusterHook) AddClient(alpn string, client cluster.Client) {}
19+
func (*mockClusterHook) RemoveClient(alpn string) {}
20+
func (*mockClusterHook) AddHandler(alpn string, handler cluster.Handler) {}
21+
func (*mockClusterHook) StopHandler(alpn string) {}
22+
func (*mockClusterHook) TLSConfig(ctx context.Context) (*tls.Config, error) { return nil, nil }
23+
func (m *mockClusterHook) Addr() net.Addr { return m.address }
24+
func (*mockClusterHook) GetDialerFunc(ctx context.Context, alpnProto string) func(string, time.Duration) (net.Conn, error) {
25+
return func(string, time.Duration) (net.Conn, error) {
26+
return nil, nil
27+
}
28+
}
29+
30+
func TestStreamLayer_UnspecifiedIP(t *testing.T) {
31+
m := &mockClusterHook{
32+
address: &cluster.NetAddr{
33+
Host: "0.0.0.0:8200",
34+
},
35+
}
36+
37+
raftTLSKey, err := GenerateTLSKey(rand.Reader)
38+
if err != nil {
39+
t.Fatal(err)
40+
}
41+
42+
raftTLS := &TLSKeyring{
43+
Keys: []*TLSKey{raftTLSKey},
44+
ActiveKeyID: raftTLSKey.ID,
45+
}
46+
47+
layer, err := NewRaftLayer(nil, raftTLS, m)
48+
if err == nil {
49+
t.Fatal("expected error")
50+
}
51+
52+
if err.Error() != "cannot use unspecified IP with raft storage: 0.0.0.0:8200" {
53+
t.Fatalf("unexpected error: %s", err.Error())
54+
}
55+
56+
if layer != nil {
57+
t.Fatal("expected nil layer")
58+
}
59+
60+
m.address.(*cluster.NetAddr).Host = "10.0.0.1:8200"
61+
62+
layer, err = NewRaftLayer(nil, raftTLS, m)
63+
if err != nil {
64+
t.Fatal(err)
65+
}
66+
67+
if layer == nil {
68+
t.Fatal("nil layer")
69+
}
70+
}

vault/cluster.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,13 @@ func (c *Core) startClusterListener(ctx context.Context) error {
317317
// If we listened on port 0, record the port the OS gave us.
318318
c.clusterAddr.Store(fmt.Sprintf("https://%s", c.getClusterListener().Addr()))
319319
}
320+
321+
if len(c.ClusterAddr()) != 0 {
322+
if err := c.getClusterListener().SetAdvertiseAddr(c.ClusterAddr()); err != nil {
323+
return err
324+
}
325+
}
326+
320327
return nil
321328
}
322329

vault/cluster/cluster.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,12 @@ import (
77
"errors"
88
"fmt"
99
"net"
10+
"net/url"
1011
"sync"
1112
"sync/atomic"
1213
"time"
1314

15+
"github.com/hashicorp/errwrap"
1416
log "github.com/hashicorp/go-hclog"
1517
"github.com/hashicorp/vault/sdk/helper/consts"
1618
"golang.org/x/net/http2"
@@ -66,6 +68,7 @@ type Listener struct {
6668

6769
networkLayer NetworkLayer
6870
cipherSuites []uint16
71+
advertise net.Addr
6972
logger log.Logger
7073
l sync.RWMutex
7174
}
@@ -94,7 +97,23 @@ func NewListener(networkLayer NetworkLayer, cipherSuites []uint16, logger log.Lo
9497
}
9598
}
9699

100+
func (cl *Listener) SetAdvertiseAddr(addr string) error {
101+
u, err := url.ParseRequestURI(addr)
102+
if err != nil {
103+
return errwrap.Wrapf("failed to parse advertise address: {{err}}", err)
104+
}
105+
cl.advertise = &NetAddr{
106+
Host: u.Host,
107+
}
108+
109+
return nil
110+
}
111+
97112
func (cl *Listener) Addr() net.Addr {
113+
if cl.advertise != nil {
114+
return cl.advertise
115+
}
116+
98117
addrs := cl.Addrs()
99118
if len(addrs) == 0 {
100119
return nil
@@ -422,3 +441,15 @@ type NetworkLayer interface {
422441
type NetworkLayerSet interface {
423442
Layers() []NetworkLayer
424443
}
444+
445+
type NetAddr struct {
446+
Host string
447+
}
448+
449+
func (c *NetAddr) String() string {
450+
return c.Host
451+
}
452+
453+
func (*NetAddr) Network() string {
454+
return "tcp"
455+
}

vault/core.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ type Core struct {
498498
// Stop channel for raft TLS rotations
499499
raftTLSRotationStopCh chan struct{}
500500
// Stores the pending peers we are waiting to give answers
501-
pendingRaftPeers map[string][]byte
501+
pendingRaftPeers *sync.Map
502502

503503
// rawConfig stores the config as-is from the provided server configuration.
504504
rawConfig *atomic.Value

vault/logical_system_raft.go

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,17 @@ func (b *SystemBackend) handleRaftBootstrapChallengeWrite() framework.OperationF
181181
return logical.ErrorResponse("no server id provided"), logical.ErrInvalidRequest
182182
}
183183

184-
answer, ok := b.Core.pendingRaftPeers[serverID]
184+
var answer []byte
185+
answerRaw, ok := b.Core.pendingRaftPeers.Load(serverID)
185186
if !ok {
186187
var err error
187188
answer, err = uuid.GenerateRandomBytes(16)
188189
if err != nil {
189190
return nil, err
190191
}
191-
b.Core.pendingRaftPeers[serverID] = answer
192+
b.Core.pendingRaftPeers.Store(serverID, answer)
193+
} else {
194+
answer = answerRaw.([]byte)
192195
}
193196

194197
sealAccess := b.Core.seal.GetAccess()
@@ -243,14 +246,14 @@ func (b *SystemBackend) handleRaftBootstrapAnswerWrite() framework.OperationFunc
243246
return logical.ErrorResponse("could not base64 decode answer"), logical.ErrInvalidRequest
244247
}
245248

246-
expectedAnswer, ok := b.Core.pendingRaftPeers[serverID]
249+
expectedAnswerRaw, ok := b.Core.pendingRaftPeers.Load(serverID)
247250
if !ok {
248251
return logical.ErrorResponse("no expected answer for the server id provided"), logical.ErrInvalidRequest
249252
}
250253

251-
delete(b.Core.pendingRaftPeers, serverID)
254+
b.Core.pendingRaftPeers.Delete(serverID)
252255

253-
if subtle.ConstantTimeCompare(answer, expectedAnswer) == 0 {
256+
if subtle.ConstantTimeCompare(answer, expectedAnswerRaw.([]byte)) == 0 {
254257
return logical.ErrorResponse("invalid answer given"), logical.ErrInvalidRequest
255258
}
256259

vault/raft.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ func (c *Core) startRaftStorage(ctx context.Context) (retErr error) {
171171
}
172172

173173
func (c *Core) setupRaftActiveNode(ctx context.Context) error {
174-
c.pendingRaftPeers = make(map[string][]byte)
174+
c.pendingRaftPeers = &sync.Map{}
175175
return c.startPeriodicRaftTLSRotate(ctx)
176176
}
177177

0 commit comments

Comments
 (0)