4242 Package = "packages" , ExtractedLicensingInfo = "extracted_licensing_info" )
4343ELEMENT_EXPECTED_START_TAG = dict (File = "FileName" , Annotation = "Annotator" , Relationship = "Relationship" ,
4444 Snippet = "SnippetSPDXID" , Package = "PackageName" , ExtractedLicensingInfo = "LicenseID" )
45- EXPECTED_START_TAG_ELEMENT = {"FileName" : File , "PackageName" : Package , "Annotator" : Annotation ,
46- "Relationship" : Relationship , "SnippetSPDXID" : Snippet ,
47- "LicenseID" : ExtractedLicensingInfo }
4845
4946
5047class Parser (object ):
@@ -135,8 +132,8 @@ def p_attrib(self, p):
135132 "annotation_comment : ANNOTATION_COMMENT error\n annotation_type : ANNOTATION_TYPE error\n "
136133 "annotation_spdx_id : ANNOTATION_SPDX_ID error\n relationship : RELATIONSHIP error" )
137134 def p_current_element_error (self , p ):
138- if p [1 ] in EXPECTED_START_TAG_ELEMENT . keys ():
139- self .initialize_new_current_element (EXPECTED_START_TAG_ELEMENT [p [1 ]])
135+ if p [1 ] in ELEMENT_EXPECTED_START_TAG . values ():
136+ self .initialize_new_current_element (TAG_DATA_MODEL_FIELD [p [1 ]][ 0 ])
140137 self .current_element ["logger" ].append (
141138 f"Error while parsing { p [1 ]} : Token did not match specified grammar rule. Line: { p .lineno (1 )} " )
142139
@@ -167,8 +164,8 @@ def p_current_element_error(self, p):
167164 "annotation_spdx_id : ANNOTATION_SPDX_ID LINE\n "
168165 "annotation_comment : ANNOTATION_COMMENT text_or_line" )
169166 def p_generic_value (self , p ):
170- if p [1 ] in EXPECTED_START_TAG_ELEMENT . keys ():
171- self .initialize_new_current_element (EXPECTED_START_TAG_ELEMENT [p [1 ]])
167+ if p [1 ] in ELEMENT_EXPECTED_START_TAG . values ():
168+ self .initialize_new_current_element (TAG_DATA_MODEL_FIELD [p [1 ]][ 0 ])
172169 if self .check_that_current_element_matches_class_for_value (TAG_DATA_MODEL_FIELD [p [1 ]][0 ], p .lineno (1 )):
173170 set_value (p , self .current_element )
174171
@@ -232,11 +229,22 @@ def p_generic_value_creation_info(self, p):
232229 def p_license_list_version (self , p ):
233230 set_value (p , self .creation_info , method_to_apply = Version .from_string )
234231
235- @grammar_rule ("ext_doc_ref : EXT_DOC_REF EXT_DOC_REF_ID EXT_DOC_URI EXT_DOC_REF_CHECKSUM " )
232+ @grammar_rule ("ext_doc_ref : EXT_DOC_REF LINE " )
236233 def p_external_document_ref (self , p ):
237- document_ref_id = p [2 ]
238- document_uri = p [3 ]
239- checksum = parse_checksum (p [4 ])
234+ external_doc_ref_regex = re .compile (r"(.*)(\s*SHA1:\s*[a-f0-9]{40})" )
235+ external_doc_ref_match = external_doc_ref_regex .match (p [2 ])
236+ if not external_doc_ref_match :
237+ self .creation_info ["logger" ].append (
238+ f"Error while parsing ExternalDocumentRef: Couldn\' t match Checksum. Line: { p .lineno (1 )} " )
239+ return
240+ try :
241+ document_ref_id , document_uri = external_doc_ref_match .group (1 ).strip ().split (" " )
242+ except ValueError :
243+ self .creation_info ["logger" ].append (
244+ f"Error while parsing ExternalDocumentRef: Couldn't split the first part of the value into "
245+ f"document_ref_id and document_uri. Line: { p .lineno (1 )} " )
246+ return
247+ checksum = parse_checksum (external_doc_ref_match .group (2 ).strip ())
240248 external_document_ref = ExternalDocumentRef (document_ref_id , document_uri , checksum )
241249 self .creation_info .setdefault ("external_document_refs" , []).append (external_document_ref )
242250
@@ -415,6 +423,7 @@ def p_snippet_range(self, p):
415423 if argument_name in self .current_element :
416424 self .current_element ["logger" ].append (
417425 f"Multiple values for { p [1 ]} found. Line: { p .lineno (1 )} " )
426+ return
418427 range_re = re .compile (r"^(\d+):(\d+)$" , re .UNICODE )
419428 if not range_re .match (p [2 ].strip ()):
420429 self .current_element ["logger" ].append (f"Value for { p [1 ]} doesn't match valid range pattern. "
@@ -443,8 +452,8 @@ def p_annotation_type(self, p):
443452
444453 # parsing methods for relationship
445454
446- @grammar_rule ("relationship : RELATIONSHIP relationship_value RELATIONSHIP_COMMENT text_or_line\n "
447- "| RELATIONSHIP relationship_value " )
455+ @grammar_rule ("relationship : RELATIONSHIP LINE RELATIONSHIP_COMMENT text_or_line\n "
456+ "| RELATIONSHIP LINE " )
448457 def p_relationship (self , p ):
449458 self .initialize_new_current_element (Relationship )
450459 try :
@@ -467,16 +476,6 @@ def p_relationship(self, p):
467476 if len (p ) == 5 :
468477 self .current_element ["comment" ] = p [4 ]
469478
470- @grammar_rule ("relationship_value : EXT_DOC_REF_ID LINE" )
471- def p_relationship_value_with_doc_ref (self , p ):
472-
473- p [0 ] = p [1 ] + ":" + p [2 ]
474-
475- @grammar_rule ("relationship_value : LINE" )
476- def p_relationship_value_without_doc_ref (self , p ):
477-
478- p [0 ] = p [1 ]
479-
480479 def p_error (self , p ):
481480 pass
482481
0 commit comments