Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
22 changes: 12 additions & 10 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,36 @@
--------------------
All notable changes to this project will be documented in this file.

## 1.3.0 | Future
## 1.3.0 | Present

### Changed
* Moderate code improvements

* Major code improvements
* Updated README.md
* Updated dependencies
* Modularize and documented Golang library
* Using Golang library instead of Python library for getting links
* Refactored TorBot

### Added
* Unit tests for Golang Library

* Visualizer Module
* Download option to save Tree into different formats.
* DB module
* Installation shell script to create torBot binary
* Testing documentation for Golang test suite.
* Test for getting links that uses a Mock Object to reproduce tests without touching actual servers.
* Script for getting Golang dependencies
* Script for building Golang shared object
* Installs Golang dependencies when install.sh is executed
* BFS algorithm for crawling


## 1.2.0 | Present (Stable)
## 1.2.0 | Nov 16, 2017 - Oct 19, 2018

### Changed

* Major code improvements
* Pep 8 Standard
* Tests
* Library changes

### Added

* Documentation
* Save to JSON
* Testcase for Save to JSON
Expand Down
4 changes: 0 additions & 4 deletions install.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
#!/bin/bash

# Get Golang Dependencies
go get github.com/mgutz/ansi
go get golang.org/x/net/html

# Makes directory for dependencies and executable to be installed
mkdir -p tmp_build
mkdir -p tmp_dist
Expand Down
53 changes: 18 additions & 35 deletions modules/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
"""
Module is used for analyzing link relationships
"""
import requests
from requests.exceptions import HTTPError

from bs4 import BeautifulSoup
from ete3 import Tree, TreeStyle, TextFace, add_face_to_node
from .getweblinks import get_urls_from_page
from .pagereader import read
from .link import LinkNode

class LinkTree:
"""
Expand All @@ -20,14 +17,14 @@ class LinkTree:
tld (bool): Decides whether or not to use additional top-level-domains besides .tor
stop_depth (int): Depth of which to stop searching for links
"""
def __init__(self, root, tld=False, stop_depth=1):
self._tree = build_tree(root, tld=tld, stop=stop_depth)
def __init__(self, root_node, *, tld=False, stop_depth=1):
self._tree = build_tree(root_node, tld=tld, stop=stop_depth)

def __len__(self):
return len(self._tree)

def __contains__(self, link):
return link in self._tree
return self._tree.search_nodes(name=link)

def save(self, file_name):
"""
Expand Down Expand Up @@ -57,25 +54,8 @@ def my_layout(node):
style.layout_fn = my_layout
self._tree.show(tree_style=style)

def get_node_children(link, tld):
"""
Returns children for link node

Args:
link (str): link node to get children for
tld (bool): Additional top-level-domains
Returns:
children (list): A list of children from linknode
"""
try:
resp = requests.get(link)
soup = BeautifulSoup(resp.text, 'html.parser')
children = get_urls_from_page(soup, tld)
except (HTTPError, ConnectionError):
children = []
return children

def initialize_tree(link, tld):
def initialize_tree(root_node):
"""
Creates root of tree
Args:
Expand All @@ -85,13 +65,11 @@ def initialize_tree(link, tld):
root (ete3.Tree): root node of tree
to_visit (list): Children of root node
"""
root = Tree(name=link)
html_content = read(link)
soup = BeautifulSoup(html_content, 'html.parser')
to_visit = get_urls_from_page(soup, extension=tld)
return root, to_visit
root = Tree(name=root_node.name)
children = root_node.get_children()
return root, children

def build_tree(link, tld, stop=1, *, rec=0, to_visit=None, tree=None):
def build_tree(link, *, tld, stop=1, rec=0, to_visit=None, tree=None):
"""
Builds tree using Breadth First Search. You can specify stop depth.
Rec & tree arguments are used for recursion.
Expand All @@ -111,7 +89,7 @@ def build_tree(link, tld, stop=1, *, rec=0, to_visit=None, tree=None):
tree (ete3.Tree): built tree
"""
if rec == 0:
tree, to_visit = initialize_tree(link, tld)
tree, to_visit = initialize_tree(link)

sub_tree = Tree(name=tree.name)

Expand All @@ -121,8 +99,13 @@ def build_tree(link, tld, stop=1, *, rec=0, to_visit=None, tree=None):

children_to_visit = list()
for link_name in to_visit:
link_node = sub_tree.add_child(name=link_name)
link_children = get_node_children(link_name, tld)
try:
node = LinkNode(link_name, tld=tld)
except (ValueError, ConnectionError, HTTPError):
continue

link_node = sub_tree.add_child(name=node.name)
link_children = node.get_children()
# No need to find children if we aren't going to visit them
if stop != rec + 1:
for child in link_children:
Expand All @@ -135,4 +118,4 @@ def build_tree(link, tld, stop=1, *, rec=0, to_visit=None, tree=None):
return sub_tree

new_tree = tree.add_child(sub_tree)
return build_tree(to_visit, tld, stop, rec=rec, tree=new_tree)
return build_tree(to_visit, tld=tld, stop=stop, rec=rec, tree=new_tree)
6 changes: 6 additions & 0 deletions modules/color.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,9 @@ def __init__(self, message, selected):

def __str__(self):
return self._color + self._msg + COLORS['end']

def __add__(self, other):
return str(self) + other

def __radd__(self, other):
return other + str(self)
34 changes: 0 additions & 34 deletions modules/getemails.py

This file was deleted.

109 changes: 0 additions & 109 deletions modules/getweblinks.py

This file was deleted.

21 changes: 0 additions & 21 deletions modules/go_linker.py

This file was deleted.

4 changes: 2 additions & 2 deletions modules/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from requests.exceptions import HTTPError
import requests

from .pagereader import read
from .link_io import LinkIO


def execute_all(link, *, display_status=False):
page, response = read(link, response=True, show_msg=display_status)
page, response = LinkIO.read(link, response=True, show_msg=display_status)
soup = BeautifulSoup(page, 'html.parser')
validation_functions = [get_robots_txt, get_dot_git, get_dot_svn, get_dot_git]
for validate_func in validation_functions:
Expand Down
2 changes: 0 additions & 2 deletions modules/lib/build.sh

This file was deleted.

3 changes: 0 additions & 3 deletions modules/lib/go_dep.sh

This file was deleted.

Loading