-
Notifications
You must be signed in to change notification settings - Fork 5
feat: auto-recover from pruned node errors during extraction #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
7400539
d922277
b071283
e46ac82
afb60f3
2b1bea4
a315f56
82dcc30
055a681
c40b2d0
71d12fd
a4e85a6
2b6350b
75c9480
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,17 @@ | ||
| package utils | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "regexp" | ||
| "strconv" | ||
| "strings" | ||
|
|
||
| "github.com/manifest-network/yaci/internal/client" | ||
| "github.com/pkg/errors" | ||
| ) | ||
|
|
||
| const statusMethod = "cosmos.base.node.v1beta1.Service.Status" | ||
| const getBlockByHeightMethod = "cosmos.base.tendermint.v1beta1.Service.GetBlockByHeight" | ||
|
|
||
| // GetLatestBlockHeightWithRetry retrieves the latest block height from the gRPC server with retry logic. | ||
| func GetLatestBlockHeightWithRetry(gRPCClient *client.GRPCClient, maxRetries uint) (uint64, error) { | ||
|
|
@@ -25,3 +29,46 @@ func GetLatestBlockHeightWithRetry(gRPCClient *client.GRPCClient, maxRetries uin | |
| }, | ||
| ) | ||
| } | ||
|
|
||
| // GetEarliestBlockHeight determines the earliest available block on a node. | ||
| // It probes block 1 to check if the node is an archive node or pruned. | ||
| // For archive nodes, returns 1. For pruned nodes, parses the error message | ||
| // to extract the lowest available height. | ||
| func GetEarliestBlockHeight(gRPCClient *client.GRPCClient, maxRetries uint) (uint64, error) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make this function a little more robust like suggested in the first review? I saw cases there the lowest height from error wasn't working because the query hit another node and it didn't have that height. I.e., the other node has a lowest height higher than previously reported.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah I didn't consider the case of load balancers using nodes with varying heights.. is this common?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I encountered this issue multiple times while building this project, primarily with Osmosis and the Hub. I'm not sure if it's common, but I believe it's common enough to address. I'm surprised you didn't encounter this issue during your tests.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I typically use my own nodes for dev/testing (mainly because of issues like this with public ones).
This comment was marked as outdated.
Sorry, something went wrong. |
||
| inputParams := []byte(`{"height":"1"}`) | ||
|
|
||
| // Fast path: single attempt to check if block 1 exists | ||
| _, err := GetGRPCResponse(gRPCClient, getBlockByHeightMethod, 1, inputParams) | ||
| if err == nil { | ||
| return 1, nil // Archive node with full history | ||
| } | ||
|
|
||
| // Check if error reveals the pruning boundary | ||
| if lowestHeight := parseLowestHeightFromError(err.Error()); lowestHeight > 0 { | ||
| return lowestHeight, nil | ||
| } | ||
|
|
||
| // Error was neither "block exists" nor "pruned" - retry in case of transient failure | ||
| _, err = GetGRPCResponse(gRPCClient, getBlockByHeightMethod, maxRetries, inputParams) | ||
| if err == nil { | ||
| return 1, nil | ||
| } | ||
|
|
||
| return 0, fmt.Errorf("failed to determine earliest block height: %w", err) | ||
| } | ||
|
|
||
| // parseLowestHeightFromError extracts lowest height from pruned node errors. | ||
| // CosmosSDK nodes return errors like "height 1 is not available, lowest height is 28566001". | ||
| func parseLowestHeightFromError(errMsg string) uint64 { | ||
| re := regexp.MustCompile(`lowest height is (\d+)`) | ||
fmorency marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| matches := re.FindStringSubmatch(strings.ToLower(errMsg)) | ||
|
|
||
| if len(matches) >= 2 { | ||
| height, err := strconv.ParseUint(matches[1], 10, 64) | ||
| if err == nil { | ||
| return height | ||
| } | ||
| } | ||
|
|
||
| return 0 | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| package utils | ||
|
|
||
| import "testing" | ||
|
|
||
| func TestParseLowestHeightFromError(t *testing.T) { | ||
| tests := []struct { | ||
| name string | ||
| errMsg string | ||
| want uint64 | ||
| }{ | ||
| { | ||
| name: "standard pruned node error", | ||
| errMsg: "height 1 is not available, lowest height is 28566001", | ||
| want: 28566001, | ||
| }, | ||
| { | ||
| name: "wrapped error", | ||
| errMsg: "rpc error: code = Unknown desc = height 1 is not available, lowest height is 12345", | ||
| want: 12345, | ||
| }, | ||
| { | ||
| name: "unrelated error", | ||
| errMsg: "connection refused", | ||
| want: 0, | ||
| }, | ||
| { | ||
| name: "empty string", | ||
| errMsg: "", | ||
| want: 0, | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| got := parseLowestHeightFromError(tt.errMsg) | ||
| if got != tt.want { | ||
| t.Errorf("parseLowestHeightFromError() = %d, want %d", got, tt.want) | ||
| } | ||
| }) | ||
| } | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.