@@ -558,6 +558,10 @@ protected void map(LongWritable key, WARCWritable value, Context output)
558558 byte [] rowKey ;
559559 try {
560560 rowKey = rowKeyFromTargetURI (warcHeader .getTargetURI ());
561+ } catch (IllegalArgumentException e ) {
562+ LOG .debug ("URI for record " + warcHeader .getRecordID () +
563+ " did not parse with a host component" );
564+ return ;
561565 } catch (URISyntaxException e ) {
562566 LOG .warn ("Could not parse URI \" " + warcHeader .getTargetURI () + "\" for record " +
563567 warcHeader .getRecordID ());
@@ -612,17 +616,22 @@ protected void map(LongWritable key, WARCWritable value, Context output)
612616 }
613617 }
614618
615- private byte [] rowKeyFromTargetURI (String targetURI ) throws URISyntaxException {
619+ private byte [] rowKeyFromTargetURI (String targetURI )
620+ throws URISyntaxException , IllegalArgumentException {
616621 URI uri = new URI (targetURI );
617622 StringBuffer sb = new StringBuffer ();
618623 // Ignore the scheme
619624 // Reverse the components of the hostname
620- String [] hostComponents = uri .getHost ().split ("\\ ." );
621- for (int i = hostComponents .length - 1 ; i >= 0 ; i --) {
622- sb .append (hostComponents [i ]);
623- if (i != 0 ) {
624- sb .append ('.' );
625+ if (uri .getHost () != null ) {
626+ String [] hostComponents = uri .getHost ().split ("\\ ." );
627+ for (int i = hostComponents .length - 1 ; i >= 0 ; i --) {
628+ sb .append (hostComponents [i ]);
629+ if (i != 0 ) {
630+ sb .append ('.' );
631+ }
625632 }
633+ } else {
634+ throw new IllegalArgumentException ("URI is missing host component" );
626635 }
627636 // Port
628637 if (uri .getPort () != -1 ) {
0 commit comments