Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,10 @@ gem "bourbon", "4.2.2"
gem "neat", "1.7.2"
gem "slim"
gem "builder", "~> 3.0" # For feed.xml.builder
gem "redcarpet"

gem "rack-contrib" # For try_static

# For testing purposes
gem "nokogiri"
gem "httparty"
14 changes: 14 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ GEM
concurrent-ruby (~> 1.0)
hashie (3.6.0)
http_parser.rb (0.8.0)
httparty (0.21.0)
mini_mime (>= 1.0.0)
multi_xml (>= 0.5.2)
i18n (1.6.0)
concurrent-ruby (~> 1.0)
kramdown (2.4.0)
Expand Down Expand Up @@ -95,10 +98,16 @@ GEM
middleman-syntax (3.2.0)
middleman-core (>= 3.2)
rouge (~> 3.2)
mini_mime (1.1.2)
mini_portile2 (2.8.1)
minitest (5.18.0)
multi_xml (0.6.0)
neat (1.7.2)
bourbon (>= 4.0)
sass (>= 3.3)
nokogiri (1.14.2)
mini_portile2 (~> 2.8.0)
racc (~> 1.4)
padrino-helpers (0.15.1)
i18n (>= 0.6.7, < 2)
padrino-support (= 0.15.1)
Expand All @@ -107,6 +116,7 @@ GEM
parallel (1.22.1)
parslet (2.0.0)
public_suffix (4.0.7)
racc (1.6.2)
rack (2.2.6.4)
rack-contrib (2.3.0)
rack (~> 2.0)
Expand All @@ -115,6 +125,7 @@ GEM
rb-fsevent (0.11.1)
rb-inotify (0.10.1)
ffi (~> 1.0)
redcarpet (3.6.0)
rexml (3.2.5)
rouge (3.28.0)
sass (3.7.4)
Expand Down Expand Up @@ -146,13 +157,16 @@ PLATFORMS
DEPENDENCIES
bourbon (= 4.2.2)
builder (~> 3.0)
httparty
middleman (~> 4.3)
middleman-blog
middleman-favicon-maker
middleman-livereload
middleman-syntax
neat (= 1.7.2)
nokogiri
rack-contrib
redcarpet
slim
webrick

Expand Down
144 changes: 144 additions & 0 deletions bin/link_checker
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#!/usr/bin/env ruby

url = ARGV[0]

usage_message = <<-USAGE
This tool is designed to check links on cucumber documentation, usage:
#{__FILE__} http://<url>:port/features/<Version>/<Library>
USAGE

abort usage_message if url.nil? || url == ""

require 'uri'
uri = URI.parse(url)

abort usage_message unless uri.path =~ %r[^/features/\d+-\d+/rspec-\w+/?$]

begin
require 'bundler/setup'
require 'nokogiri'
require 'httparty'
rescue LoadError
abort "Could not load one of bundler / nokogiri / httparty, check your bundle and try again."
end

def check(library, version, host, port, source, page, state)
next_pages = {}

page.css('a').each do |a|
href = a.attr('href')
next if href.nil? || href == ""
to_parse = URI.parse(href)

to_parse.scheme = "http" unless to_parse.scheme
to_parse.port = port unless to_parse.host
to_parse.host = host unless to_parse.host

if to_parse.path =~ /^\.\//
sections = to_parse.path.to_s.split("/")

abort <<-ERROR unless "." == sections.shift
Tried to shift a '.' off #{to_parse.path} but failed
ERROR

root = source.path.split("/")
root.pop if root.pop == ""

to_parse.path = (root + sections).join("/")
elsif to_parse.path =~ /\.\./
sections = to_parse.path.to_s.split("/")

continue = true
count = 1

while !sections.empty? && continue do
section = sections.shift
if section == ".."
count += 1
else
sections.unshift section
continue = false
end
end

root = source.path.split("/")

abort <<-EMPTY if sections.empty?
Unable to hydrate url #{to_parse.path} on #{source.to_s}
EMPTY

to_parse.path = "#{root[0..(-1-count)].join("/")}/#{sections.join("/")}"
end


next if state[to_parse.to_s]

$stdout.write "\r#{' ' * 200}\rHandling link... #{to_parse}"

if to_parse.host != host
next if ENV['SKIP_EXTERNAL']
$stdout.write "\r#{' ' * 200}\rRequesting external link... #{to_parse}"
begin
response = HTTParty.get(to_parse)
state[to_parse.to_s] = {code: response.code, type: :external}
rescue Exception => error
state[to_parse.to_s] = {error: error, source: source, type: :external}
end
next
end

unless to_parse.path =~ %r[/features/#{version}/#{library}]
$stdout.write "\r#{' ' * 200}\rRequesting internal link... #{to_parse}"
response = HTTParty.get(to_parse)
state[to_parse.to_s] = {code: response.code, source: source, type: :internal_non_crawled}
next
end

response = HTTParty.get(to_parse)

state[to_parse.to_s] = {code: response.code, source: source, type: :internal_crawled}

if response.code == 200
$stdout.write "\r#{' ' * 200}\rParsing link... #{to_parse}"
result = Nokogiri.parse(response.body)
state[to_parse.to_s][:links] = result.css('a').length
next_pages[to_parse] = result
end
end

next_pages.reduce(state) do |current_state, (source, page)|
check(library, version, host, port, source, page, current_state)
end
state
end

(_slash, features, version, library,) = uri.path.split("/")

abort <<-WARNING unless features == "features"
Something went wrong parsing the url #{uri.path} as #{features.inspect} was not "features" and so we cant detect library: #{library.inspect}
WARNING

$stdout.write "Checking..."
$stdout.write "\r#{' ' * 200}\rParsing... #{uri}"

response = HTTParty.get(uri)

abort <<-ROOT_ERROR unless response.code == 200
The source page #{uri} could not be loaded succesfully, got #{response.code}.
ROOT_ERROR

urls_checked = {}
page = Nokogiri.parse(response.body)

check(library, version, uri.host, uri.port, uri, page, urls_checked)

$stdout.write "\r#{' ' * 200}\rDone!\n"

urls_checked.each do |url, result|
if result[:code] != 200
puts "Warning! #{result[:code]} from #{url} via #{result[:source].to_s} please manually check."
puts "debug: #{result.inspect}" if ENV['DEBUG'] || ENV['VERBOSE']
else
puts "verbose: #{result.inspect}" if ENV['VERBOSE']
end
end
5 changes: 4 additions & 1 deletion config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,12 @@ def configure_blog(blog)

# for build
activate :syntax
set :markdown_engine, :kramdown
set :markdown_engine, :redcarpet
set :markdown, fenced_code_blocks: true, tables: true, smartypants: true
activate :directory_indexes

page "features/**/*", directory_index: true, layout: "features"

page "documentation/**/*.html", directory_index: false
config[:ignored_sitemap_matchers][:partials] = ->(source_file, _) do
# Only files with 1 (but not two) underscores at the start
Expand Down
Loading