-
Notifications
You must be signed in to change notification settings - Fork 691
Warm reboot: Add support for orchagent pre-shutdown warm-restart state check #562
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
qiluo-msft
merged 8 commits into
sonic-net:master
from
jipanyang:warm_reboot_collab_7_pre_warm_restart_check
Sep 15, 2018
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
3366c61
Add orchagent pre-warm-restart check mechanism
jipanyang d53130b
Add orchagent_restart_check options: --noFreeze & --skipPendingTaskC…
jipanyang 038b38e
Add waitTime option for response from orchagent
jipanyang 34db540
Fix build issue with latest master
jipanyang 53d8b25
adapt to new dvs.runcmd() signature
jipanyang c8d46b3
Merge remote-tracking branch 'upstream/master' into warm_reboot_colla…
jipanyang c627391
Move standard header before local headers
jipanyang fa4d389
Merge remote-tracking branch 'upstream/master' into warm_reboot_colla…
jipanyang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,145 @@ | ||
| #include <iostream> | ||
| #include <sstream> | ||
|
|
||
| #include <unistd.h> | ||
| #include <getopt.h> | ||
|
|
||
| #include "notificationproducer.h" | ||
| #include "notificationconsumer.h" | ||
| #include "select.h" | ||
| #include "logger.h" | ||
|
|
||
|
|
||
| void printUsage() | ||
| { | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| std::cout << "Usage: orchagent_restart_check [-s] " << std::endl; | ||
| std::cout << " -n --noFreeze" << std::endl; | ||
| std::cout << " Don't freeze orchagent even if check succeeded" << std::endl; | ||
| std::cout << " -s --skipPendingTaskCheck" << std::endl; | ||
| std::cout << " Skip pending task dependency check for orchagent" << std::endl; | ||
| std::cout << " -w --waitTime" << std::endl; | ||
| std::cout << " Wait time for response from orchagent, in milliseconds. Default value: 1000" << std::endl; | ||
| std::cout << " -h --help:" << std::endl; | ||
| std::cout << " Print out this message" << std::endl; | ||
| } | ||
|
|
||
|
|
||
| /* | ||
| * Before stopping orchagent for warm restart, basic state check is preferred to | ||
| * ensure orchagent is not in transient state, so a deterministic state may be restored after restart. | ||
| * | ||
| * Here is to implement orchagent_restart_check binary which may talk to orchagent and | ||
| * ask it to do self-check, return "READY " signal and freeze if everything is ok, | ||
| * otherwise "NOT_READY" signal should be returned. | ||
| * | ||
| * Optionally: | ||
| * if --noFreeze option is provided, orchagent won't freeze. | ||
| * if --skipPendingTaskCheck option is provided, orchagent won't use | ||
| * whether there is pending task existing as state check criterion. | ||
| */ | ||
| int main(int argc, char **argv) | ||
| { | ||
| swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_INFO); | ||
| SWSS_LOG_ENTER(); | ||
|
|
||
| std::string skipPendingTaskCheck = "fasle"; | ||
| std::string noFreeze = "fasle"; | ||
| /* Default wait time is 1000 millisecond */ | ||
| int waitTime = 1000; | ||
|
|
||
| const char* const optstring = "nsw:"; | ||
| while(true) | ||
| { | ||
| static struct option long_options[] = | ||
| { | ||
| { "noFreeze", no_argument, 0, 'n' }, | ||
| { "skipPendingTaskCheck", no_argument, 0, 's' }, | ||
| { "waitTime", required_argument, 0, 'w' } | ||
| }; | ||
|
|
||
| int option_index = 0; | ||
|
|
||
| int c = getopt_long(argc, argv, optstring, long_options, &option_index); | ||
|
|
||
| if (c == -1) | ||
| { | ||
| break; | ||
| } | ||
|
|
||
| switch (c) | ||
| { | ||
| case 'n': | ||
| SWSS_LOG_NOTICE("Won't freeze orchagent even if check succeeded"); | ||
| noFreeze = "true"; | ||
| break; | ||
| case 's': | ||
| SWSS_LOG_NOTICE("Skipping pending task check for orchagent"); | ||
| skipPendingTaskCheck = "true"; | ||
| break; | ||
| case 'w': | ||
| SWSS_LOG_NOTICE("Wait time for response from orchagent set to %s milliseconds", optarg); | ||
| waitTime = atoi(optarg); | ||
| break; | ||
| case 'h': | ||
| printUsage(); | ||
| exit(EXIT_SUCCESS); | ||
|
|
||
| case '?': | ||
| SWSS_LOG_WARN("unknown option %c", optopt); | ||
| printUsage(); | ||
| exit(EXIT_FAILURE); | ||
|
|
||
| default: | ||
| SWSS_LOG_ERROR("getopt_long failure"); | ||
| exit(EXIT_FAILURE); | ||
| } | ||
| } | ||
|
|
||
| swss::DBConnector db(APPL_DB, swss::DBConnector::DEFAULT_UNIXSOCKET, 0); | ||
| // Send warm restart query via "RESTARTCHECK" notification channel | ||
| swss::NotificationProducer restartQuery(&db, "RESTARTCHECK"); | ||
| // Will listen for the reply on "RESTARTCHECKREPLY" channel | ||
| swss::NotificationConsumer restartQueryReply(&db, "RESTARTCHECKREPLY"); | ||
|
|
||
| std::vector<swss::FieldValueTuple> values; | ||
| values.emplace_back("NoFreeze", noFreeze); | ||
| values.emplace_back("SkipPendingTaskCheck", skipPendingTaskCheck); | ||
| std::string op = "orchagent"; | ||
| SWSS_LOG_NOTICE("requested %s to do warm restart state check", op.c_str()); | ||
| restartQuery.send(op, op, values); | ||
|
|
||
|
|
||
| swss::Select s; | ||
| s.addSelectable(&restartQueryReply); | ||
| swss::Selectable *sel; | ||
| std::string op_ret, data; | ||
| values.clear(); | ||
| int result = s.select(&sel, waitTime); | ||
| if (result == swss::Select::OBJECT) | ||
| { | ||
| restartQueryReply.pop(op_ret, data, values); | ||
| if (data == "READY") | ||
| { | ||
| SWSS_LOG_NOTICE("RESTARTCHECK success, %s is frozen and ready for warm restart", op_ret.c_str()); | ||
| std::cout << "RESTARTCHECK succeeded" << std::endl; | ||
| return EXIT_SUCCESS; | ||
| } | ||
| else | ||
| { | ||
| SWSS_LOG_NOTICE("RESTARTCHECK failed, %s is not ready for warm restart with status %s", | ||
| op_ret.c_str(), data.c_str()); | ||
| } | ||
| } | ||
| else if (result == swss::Select::TIMEOUT) | ||
| { | ||
| SWSS_LOG_NOTICE("RESTARTCHECK for %s timed out", op_ret.c_str()); | ||
| } | ||
| else | ||
| { | ||
| SWSS_LOG_NOTICE("RESTARTCHECK for %s error", op_ret.c_str()); | ||
| } | ||
| std::cout << "RESTARTCHECK failed" << std::endl; | ||
| return EXIT_FAILURE; | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please fix the typo? @jipanyang
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Fortunately it is not causing problem due to value "true" is checked
https://github.com/Azure/sonic-swss/blob/master/orchagent/switchorch.cpp#L179