diff --git a/README.md b/README.md index 660ce97..ddd5534 100644 --- a/README.md +++ b/README.md @@ -22,20 +22,21 @@ The library is designed to be used in a seamless way, with minimal code modifica There are currently *twelve* ways to setup notifications: -| Platform | External Contributors | -| :-----------------------------------: | :---------------------------------------------------------------------------------------: | -| [email](#email) | - | -| [Slack](#slack) | - | -| [Telegram](#telegram) | - | -| [Microsoft Teams](#microsoft-teams) | [@noklam](https://github.com/noklam) | -| [Text Message](<#text-message-(sms)>) | [@abhishekkrthakur](https://github.com/abhishekkrthakur) | -| [Discord](#discord) | [@watkinsm](https://github.com/watkinsm) | +| Platform | External Contributors | +| :-----------------------------------: | :----------------------------------------------------------: | +| [email](#email) | - | +| [Slack](#slack) | - | +| [Telegram](#telegram) | - | +| [Microsoft Teams](#microsoft-teams) | [@noklam](https://github.com/noklam) | +| [Text Message](<#text-message-(sms)>) | [@abhishekkrthakur](https://github.com/abhishekkrthakur) | +| [Discord](#discord) | [@watkinsm](https://github.com/watkinsm) | | [Desktop](#desktop-notification) | [@atakanyenel](https://github.com/atakanyenel) [@eyalmazuz](https://github.com/eyalmazuz) | -| [Matrix](#matrix) | [@jcklie](https://github.com/jcklie) | -| [Amazon Chime](#amazon-chime) | [@prabhakar267](https://github.com/prabhakar267) | -| [DingTalk](#dingtalk) | [@wuutiing](https://github.com/wuutiing) | -| [RocketChat](#rocketchat) | [@radao](https://github.com/radao) | -| [WeChat Work](#wechat-work) | [@jcyk](https://github.com/jcyk) | +| [Matrix](#matrix) | [@jcklie](https://github.com/jcklie) | +| [Amazon Chime](#amazon-chime) | [@prabhakar267](https://github.com/prabhakar267) | +| [DingTalk](#dingtalk) | [@wuutiing](https://github.com/wuutiing) | +| [RocketChat](#rocketchat) | [@radao](https://github.com/radao) | +| [WeChat Work](#wechat-work) | [@jcyk](https://github.com/jcyk) | +| [Mattermost](#mattermost) | [@kinoute](https://github.com/kinoute) | ### Email @@ -398,8 +399,42 @@ knockknock wechat \ You can also specify an optional argument to tag specific people: `user-mentions=[""]` and/or `user-mentions-mobile=[""]`. + +### Mattermost + +You can also use Mattermost to get notifications just like you will do for Slack (added by [@kinoute](https://github.com/kinoute)). You'll have to get your Mattermost room [webhook URL](https://docs.mattermost.com/developer/webhooks-incoming.html#simple-incoming-webhook). + +#### Python + +```python +from knockknock import mattermost_sender + +webhook_url = "" +@mattermost_sender(webhook_url=webhook_url, channel="") +def train_your_nicest_model(your_nicest_parameters): + import time + time.sleep(10000) + return {'loss': 0.9} # Optional return value +``` + +You can also specify an optional argument to tag specific people: `user_mentions=[, ]`. + +#### Command-line + +```bash +knockknock mattermost \ + --webhook-url \ + --channel \ + sleep 10 +``` + +You can also specify an optional argument to tag specific people: `--user-mentions ,`. + + + + ## Note on distributed training When using distributed training, a GPU is bound to its process using the local rank variable. Since knockknock works at the process level, if you are using 8 GPUs, you would get 8 notifications at the beginning and 8 notifications at the end... To circumvent that, except for errors, only the master process is allowed to send notifications so that you receive only one notification at the beginning and one notification at the end. -**Note:** _In PyTorch, the launch of `torch.distributed.launch` sets up a RANK environment variable for each process (see [here](https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py#L211)). This is used to detect the master process, and for now, the only simple way I came up with. Unfortunately, this is not intended to be general for all platforms but I would happily discuss smarter/better ways to handle distributed training in an issue/PR._ +**Note:** _In PyTorch, the launch of `torch.distributed.launch` sets up a RANK environment variable for each process (see [here](https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py#L211)). This is used to detect the master process, and for now, the only simple way I came up with. Unfortunately, this is not intended to be general for all platforms but I would happily discuss smarter/better ways to handle distributed training in an issue/PR._ \ No newline at end of file diff --git a/knockknock/__init__.py b/knockknock/__init__.py index a5c7f51..8ac040f 100644 --- a/knockknock/__init__.py +++ b/knockknock/__init__.py @@ -10,3 +10,4 @@ from knockknock.dingtalk_sender import dingtalk_sender from knockknock.wechat_sender import wechat_sender from knockknock.rocketchat_sender import rocketchat_sender +from knockknock.mattermost_sender import mattermost_sender diff --git a/knockknock/__main__.py b/knockknock/__main__.py index 215939d..e9454da 100644 --- a/knockknock/__main__.py +++ b/knockknock/__main__.py @@ -7,6 +7,7 @@ discord_sender, email_sender, matrix_sender, + mattermost_sender, rocketchat_sender, slack_sender, sms_sender, @@ -151,6 +152,20 @@ def main(): help="The alias of the room to which messages will be send by the BOT.") matrix_parser.set_defaults(sender_func=matrix_sender) + # Mattermost + mattermost_parser = subparsers.add_parser( + name="mattermost", description="Send a Mattermost message before and after function " + + "execution, with start and end status (sucessfully or crashed).") + mattermost_parser.add_argument( + "--webhook-url", type=str, required=True, + help="The webhook URL to access your mattermost channel.") + mattermost_parser.add_argument( + "--channel", type=str, required=True, help="The mattermost room to log.") + mattermost_parser.add_argument( + "--user-mentions", type=lambda s: s.split(","), required=False, default=[], + help="Optional user ids to notify, as comma seperated list.") + mattermost_parser.set_defaults(sender_func=mattermost_sender) + # RocketChat rocketchat_parser = subparsers.add_parser( name="rocketchat", description="Send a RocketChat message before and after function " + diff --git a/knockknock/mattermost_sender.py b/knockknock/mattermost_sender.py new file mode 100644 index 0000000..463483c --- /dev/null +++ b/knockknock/mattermost_sender.py @@ -0,0 +1,119 @@ +from typing import List +import os +import datetime +import traceback +import functools +import json +import socket +import requests + +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" + + +def mattermost_sender(webhook_url: str, channel: str, user_mentions: List[str] = []): + """ + Mattermost sender wrapper: execute func, send a Mattermost notification with the end status + (sucessfully finished or crashed) at the end. Also send a Mattermost notification before + executing func. + + `webhook_url`: str + The webhook URL to access your Mattermost room. + Visit https://docs.mattermost.com/developer/webhooks-incoming.html#simple-incoming-webhook for more details. + `channel`: str + The mattermost room to log. + `user_mentions`: List[str] (default=[]) + Optional users to notify. + """ + + dump = { + "username": "Knock Knock", + "channel": channel, + "icon_emoji": ":clapper:", + } + + def decorator_sender(func): + @functools.wraps(func) + def wrapper_sender(*args, **kwargs): + + start_time = datetime.datetime.now() + host_name = socket.gethostname() + func_name = func.__name__ + + # Handling distributed training edge case. + # In PyTorch, the launch of `torch.distributed.launch` sets up a RANK environment variable for each process. + # This can be used to detect the master process. + # See https://github.com/pytorch/pytorch/blob/master/torch/distributed/launch.py#L211 + # Except for errors, only the master process will send notifications. + if "RANK" in os.environ: + master_process = int(os.environ["RANK"]) == 0 + host_name += " - RANK: %s" % os.environ["RANK"] + else: + master_process = True + + if master_process: + contents = [ + "Your training has started 🎬", + "Machine name: %s" % host_name, + "Main call: %s" % func_name, + "Starting date: %s" % start_time.strftime(DATE_FORMAT), + ] + contents.append(" ".join(user_mentions)) + dump["text"] = "\n".join(contents) + dump["icon_emoji"] = ":clapper:" + requests.post(webhook_url, json.dumps(dump)) + + try: + value = func(*args, **kwargs) + + if master_process: + end_time = datetime.datetime.now() + elapsed_time = end_time - start_time + contents = [ + "Your training is complete 🎉", + "Machine name: %s" % host_name, + "Main call: %s" % func_name, + "Starting date: %s" % start_time.strftime(DATE_FORMAT), + "End date: %s" % end_time.strftime(DATE_FORMAT), + "Training duration: %s" % str(elapsed_time), + ] + + try: + str_value = str(value) + contents.append("Main call returned value: %s" % str_value) + except: + contents.append( + "Main call returned value: %s" + % "ERROR - Couldn't str the returned value." + ) + + contents.append(" ".join(user_mentions)) + dump["text"] = "\n".join(contents) + dump["icon_emoji"] = ":tada:" + requests.post(webhook_url, json.dumps(dump)) + + return value + + except Exception as ex: + end_time = datetime.datetime.now() + elapsed_time = end_time - start_time + contents = [ + "Your training has crashed ☠️", + "Machine name: %s" % host_name, + "Main call: %s" % func_name, + "Starting date: %s" % start_time.strftime(DATE_FORMAT), + "Crash date: %s" % end_time.strftime(DATE_FORMAT), + "Crashed training duration: %s\n\n" % str(elapsed_time), + "Here's the error:", + "%s\n\n" % ex, + "Traceback:", + "%s" % traceback.format_exc(), + ] + contents.append(" ".join(user_mentions)) + dump["text"] = "\n".join(contents) + dump["icon_emoji"] = ":skull_and_crossbones:" + requests.post(webhook_url, json.dumps(dump)) + raise ex + + return wrapper_sender + + return decorator_sender