diff --git a/email_reply_parser/__init__.py b/email_reply_parser/__init__.py index ba50683..7171242 100644 --- a/email_reply_parser/__init__.py +++ b/email_reply_parser/__init__.py @@ -76,31 +76,32 @@ def warnings(self): dot = '\u200b' single_space = f'[ {dot}\xA0\t]' space = f'[,()]?{single_space}{{0,3}}[\n\r]?{single_space}{{0,3}}[,()]?' + sentence_start = f'(?:[\n\r.!?]|^){single_space}{{0,3}}' confidential_variations = f'(privileged|confidential|private|sensitive|{space}(/|and|or|and{space}/{space}or|,){space}){{1,3}}' - message_variations = f'(electronic|e[\-]?mail|message|communication|transmission|{space}){{1,3}}' + message_variations = f'(electronic{space}|e[\-]?mail{space}|message{space}|communication{space}|transmission{space}){{1,3}}' self.WARNING_REGEX = re.compile( f'(CAUTION:|NOTICE:|Disclaimer:|Warning:|{confidential_variations}{space}Notice:|Please{space}do{space}not{space}reply' f'|{confidential_variations}{space}information' - f'|(The|This){space}information{space}(provided|transmitted|contained)?{space}(with)?in{space}this{space}{message_variations}' - f'|(The|This){space}information{space}(may also be|is){space}legally' - f'|(The|This){space}content[s]?{space}of{space}this{space}{message_variations}' - f'|(The|This){space}{message_variations}{space}' + f'|{sentence_start}(The|This){space}information{space}(provided|transmitted|contained)?{space}(with)?in{space}this{space}{message_variations}' + f'|{sentence_start}(The|This){space}information{space}(may also be|is){space}legally' + f'|{sentence_start}(The|This){space}content[s]?{space}of{space}this{space}{message_variations}' + f'|{sentence_start}(The|This){space}{message_variations}{space}' f'(may{space}contain|(and|or|and{space}/{space}or)?{space}(any|all)?{space}(files{space}transmitted|the{space}information{space}(contained|it{space}contains)|attach|associated)' f'|[(]?including{space}(any|all)?{space}attachments[)]?|(is|are|contains){space}{confidential_variations}' f'|is{space}for{space}the{space}recipients|is{space}intended{space}only|is{space}for{space}the{space}sole{space}user|has{space}been{space}scanned|with{space}its{space}contents' - f')|(The|This){space}publication,{space}copying' - f'|(The|This){space}sender{space}(cannot{space}guarantee|believes{space}that{space}this{space}{message_variations})' - f'|If{space}you{space}have{space}received{space}this{space}{message_variations}{space}in{space}error' - f'|The{space}contents{space}are{space}{confidential_variations}' - f'|(Under|According to){space}(the)?{space}(General{space}Data{space}Protection{space}Regulation|GDPR)' - f'|Click{space}here{space}to' - f'|Copyright{space}' - f'|Was{space}this{space}email{space}helpful\?' - f'|For{space}Your{space}Information:' - f'|Emails{space}are{space}not{space}secure' - f'|To make{space}sure{space}you{space}continue{space}to{space}receive' - f'|Please{space}choose{space}one{space}of{space}the{space}options{space}below' - f'|Please{space}consider{space}the{space}environment{space}before{space}printing{space}this{space}{message_variations}' + f')|{sentence_start}(The|This){space}publication,{space}copying' + f'|{sentence_start}(The|This){space}sender{space}(cannot{space}guarantee|believes{space}that{space}this{space}{message_variations})' + f'|{sentence_start}If{space}you{space}have{space}received{space}this{space}{message_variations}{space}in{space}error' + f'|{sentence_start}The{space}contents{space}are{space}{confidential_variations}' + f'|{sentence_start}(Under|According to){space}(the)?{space}(General{space}Data{space}Protection{space}Regulation|GDPR)' + f'|{sentence_start}Click{space}here{space}to' + f'|{sentence_start}Copyright{space}' + f'|{sentence_start}Was{space}this{space}email{space}helpful\?' + f'|{sentence_start}For{space}Your{space}Information:' + f'|{sentence_start}Emails{space}are{space}not{space}secure' + f'|{sentence_start}To make{space}sure{space}you{space}continue{space}to{space}receive' + f'|{sentence_start}Please{space}choose{space}one{space}of{space}the{space}options{space}below' + f'|{sentence_start}Please{space}consider{space}the{space}environment{space}before{space}printing{space}this{space}{message_variations}' f')[a-zA-Z0-9:;.,?!<>()@&/\'\"\“\” {dot}\xA0\t\-]*', re.IGNORECASE )