|
| 1 | +#!/usr/bin/env ruby |
| 2 | + |
| 3 | +url = ARGV[0] |
| 4 | + |
| 5 | +usage_message = <<-USAGE |
| 6 | +This tool is designed to check links on cucumber documentation, usage: |
| 7 | +#{__FILE__} http://<url>:port/features/<Version>/<Library> |
| 8 | +USAGE |
| 9 | + |
| 10 | +abort usage_message if url.nil? || url == "" |
| 11 | + |
| 12 | +require 'uri' |
| 13 | +uri = URI.parse(url) |
| 14 | + |
| 15 | +abort usage_message unless uri.path =~ %r[^/features/\d+-\d+/rspec-\w+/?$] |
| 16 | + |
| 17 | +begin |
| 18 | + require 'bundler/setup' |
| 19 | + require 'nokogiri' |
| 20 | + require 'httparty' |
| 21 | +rescue LoadError |
| 22 | + abort "Could not load one of bundler / nokogiri / httparty, check your bundle and try again." |
| 23 | +end |
| 24 | + |
| 25 | +def check(library, version, host, port, source, page, state) |
| 26 | + next_pages = {} |
| 27 | + |
| 28 | + page.css('a').each do |a| |
| 29 | + href = a.attr('href') |
| 30 | + next if href.nil? || href == "" |
| 31 | + to_parse = URI.parse(href) |
| 32 | + |
| 33 | + to_parse.scheme = "http" unless to_parse.scheme |
| 34 | + to_parse.port = port unless to_parse.host |
| 35 | + to_parse.host = host unless to_parse.host |
| 36 | + |
| 37 | + if to_parse.path =~ /^\.\// |
| 38 | + sections = to_parse.path.to_s.split("/") |
| 39 | + |
| 40 | + abort <<-ERROR unless "." == sections.shift |
| 41 | + Tried to shift a '.' off #{to_parse.path} but failed |
| 42 | + ERROR |
| 43 | + |
| 44 | + root = source.path.split("/") |
| 45 | + root.pop if root.pop == "" |
| 46 | + |
| 47 | + to_parse.path = (root + sections).join("/") |
| 48 | + elsif to_parse.path =~ /\.\./ |
| 49 | + sections = to_parse.path.to_s.split("/") |
| 50 | + |
| 51 | + continue = true |
| 52 | + count = 1 |
| 53 | + |
| 54 | + while !sections.empty? && continue do |
| 55 | + section = sections.shift |
| 56 | + if section == ".." |
| 57 | + count += 1 |
| 58 | + else |
| 59 | + sections.unshift section |
| 60 | + continue = false |
| 61 | + end |
| 62 | + end |
| 63 | + |
| 64 | + root = source.path.split("/") |
| 65 | + |
| 66 | + abort <<-EMPTY if sections.empty? |
| 67 | + Unable to hydrate url #{to_parse.path} on #{source.to_s} |
| 68 | + EMPTY |
| 69 | + |
| 70 | + to_parse.path = "#{root[0..(-1-count)].join("/")}/#{sections.join("/")}" |
| 71 | + end |
| 72 | + |
| 73 | + |
| 74 | + next if state[to_parse.to_s] |
| 75 | + |
| 76 | + $stdout.write "\r#{' ' * 200}\rHandling link... #{to_parse}" |
| 77 | + |
| 78 | + if to_parse.host != host |
| 79 | + next if ENV['SKIP_EXTERNAL'] |
| 80 | + $stdout.write "\r#{' ' * 200}\rRequesting external link... #{to_parse}" |
| 81 | + begin |
| 82 | + response = HTTParty.get(to_parse) |
| 83 | + state[to_parse.to_s] = {code: response.code, type: :external} |
| 84 | + rescue Exception => error |
| 85 | + state[to_parse.to_s] = {error: error, source: source, type: :external} |
| 86 | + end |
| 87 | + next |
| 88 | + end |
| 89 | + |
| 90 | + unless to_parse.path =~ %r[/features/#{version}/#{library}] |
| 91 | + $stdout.write "\r#{' ' * 200}\rRequesting internal link... #{to_parse}" |
| 92 | + response = HTTParty.get(to_parse) |
| 93 | + state[to_parse.to_s] = {code: response.code, source: source, type: :internal_non_crawled} |
| 94 | + next |
| 95 | + end |
| 96 | + |
| 97 | + response = HTTParty.get(to_parse) |
| 98 | + |
| 99 | + state[to_parse.to_s] = {code: response.code, source: source, type: :internal_crawled} |
| 100 | + |
| 101 | + if response.code == 200 |
| 102 | + $stdout.write "\r#{' ' * 200}\rParsing link... #{to_parse}" |
| 103 | + result = Nokogiri.parse(response.body) |
| 104 | + state[to_parse.to_s][:links] = result.css('a').length |
| 105 | + next_pages[to_parse] = result |
| 106 | + end |
| 107 | + end |
| 108 | + |
| 109 | + next_pages.reduce(state) do |current_state, (source, page)| |
| 110 | + check(library, version, host, port, source, page, current_state) |
| 111 | + end |
| 112 | + state |
| 113 | +end |
| 114 | + |
| 115 | +(_slash, features, version, library,) = uri.path.split("/") |
| 116 | + |
| 117 | +abort <<-WARNING unless features == "features" |
| 118 | +Something went wrong parsing the url #{uri.path} as #{features.inspect} was not "features" and so we cant detect library: #{library.inspect} |
| 119 | +WARNING |
| 120 | + |
| 121 | +$stdout.write "Checking..." |
| 122 | +$stdout.write "\r#{' ' * 200}\rParsing... #{uri}" |
| 123 | + |
| 124 | +response = HTTParty.get(uri) |
| 125 | + |
| 126 | +abort <<-ROOT_ERROR unless response.code == 200 |
| 127 | +The source page #{uri} could not be loaded succesfully, got #{response.code}. |
| 128 | +ROOT_ERROR |
| 129 | + |
| 130 | +urls_checked = {} |
| 131 | +page = Nokogiri.parse(response.body) |
| 132 | + |
| 133 | +check(library, version, uri.host, uri.port, uri, page, urls_checked) |
| 134 | + |
| 135 | +$stdout.write "\r#{' ' * 200}\rDone!\n" |
| 136 | + |
| 137 | +urls_checked.each do |url, result| |
| 138 | + if result[:code] != 200 |
| 139 | + puts "Warning! #{result[:code]} from #{url} via #{result[:source].to_s} please manually check." |
| 140 | + puts "debug: #{result.inspect}" if ENV['DEBUG'] || ENV['VERBOSE'] |
| 141 | + else |
| 142 | + puts "verbose: #{result.inspect}" if ENV['VERBOSE'] |
| 143 | + end |
| 144 | +end |
0 commit comments