-
-
Notifications
You must be signed in to change notification settings - Fork 669
Expand file tree
/
Copy pathinfo.py
More file actions
146 lines (125 loc) · 5.01 KB
/
info.py
File metadata and controls
146 lines (125 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
from urllib.parse import urlsplit
from bs4 import BeautifulSoup
from termcolor import cprint
from re import search, findall
from requests.exceptions import HTTPError
import requests
from requests import get
import re
from .link_io import LinkIO
def execute_all(link, *, display_status=False):
keys = set() # high entropy strings, prolly secret keys
files = set() # pdf, css, png etc.
intel = set() # emails, website accounts, aws buckets etc.
robots = set() # entries of robots.txt
custom = set() # string extracted by custom regex pattern
failed = set() # urls that photon failed to crawl
scripts = set() # javascript files
external = set() # urls that don't belong to the target i.e. out-of-scope
fuzzable = set() # urls that have get params in them e.g. example.com/page.php?id=2
endpoints = set() # urls found from javascript files
processed = set() # urls that have been crawled
everything = []
bad_intel = set() # unclean intel urls
bad_scripts = set() # unclean javascript file urls
datasets = [files, intel, robots, custom, failed, scripts, external, fuzzable, endpoints, keys]
dataset_names = ['files', 'intel', 'robots', 'custom', 'failed', 'scripts', 'external', 'fuzzable', 'endpoints', 'keys']
page,response = LinkIO.read(link, response=True, show_msg=display_status)
response = get(link, verify=False).text
soup = BeautifulSoup(page, 'html.parser')
validation_functions = [get_robots_txt, get_dot_git, get_dot_svn, get_dot_git, get_intel,get_bitcoin_address]
for validate_func in validation_functions:
try:
validate_func(link,response)
except (ConnectionError, HTTPError):
cprint('Error', 'red')
display_webpage_description(soup)
#display_headers(response)
def display_headers(response):
print('''
RESPONSE HEADERS
__________________
''')
for key, val in response.headers.items():
print('*', key, ':', val)
def get_robots_txt(target,response):
cprint("[*]Checking for Robots.txt", 'yellow')
url = target
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
requests.get(target+"robots.txt")
print(target+"robots.txt")
matches = findall(r'Allow: (.*)|Disallow: (.*)', response)
if matches:
for match in matches:
match = ''.join(match)
if '*' not in match:
url = main_url + match
robots.add(url)
cprint("Robots.txt found",'blue')
print(robots)
def get_intel(link,response):
intel=set()
matches = findall(r'''([\w\.-]+s[\w\.-]+\.amazonaws\.com)|([\w\.-]+@[\w\.-]+\.[\.\w]+)''', response)
print("Intel\n--------\n\n")
if matches:
for match in matches:
intel.add(match)
def get_dot_git(target,response):
cprint("[*]Checking for .git folder", 'yellow')
url = target
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
req = requests.get(target+"/.git/")
status = req.status_code
if status == 200:
cprint("Alert!", 'red')
cprint(".git folder exposed publicly", 'red')
else:
cprint("NO .git folder found", 'blue')
def get_bitcoin_address(target,response):
bitcoins = re.findall(r'^[13][a-km-zA-HJ-NP-Z1-9]{25,34}$',response)
print("BTC FOUND: ",len(bitcoins))
for bitcoin in bitcoins:
print("BTC: ",bitcoin)
def get_dot_svn(target,response):
cprint("[*]Checking for .svn folder", 'yellow')
url = target
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
req = requests.get(target+"/.svn/entries")
status = req.status_code
if status == 200:
cprint("Alert!", 'red')
cprint(".SVN folder exposed publicly", 'red')
else:
cprint("NO .SVN folder found", 'blue')
def get_dot_htaccess(target,response):
cprint("[*]Checking for .htaccess", 'yellow')
url = target
target = "{0.scheme}://{0.netloc}/".format(urlsplit(url))
req = requests.get(target+"/.htaccess")
statcode = req.status_code
if statcode == 403:
cprint("403 Forbidden", 'blue')
elif statcode == 200:
cprint("Alert!!", 'blue')
cprint(".htaccess file found!", 'blue')
else:
cprint("Status code", 'blue')
cprint(statcode)
def display_webpage_description(soup):
cprint("[*]Checking for meta tag", 'yellow')
metatags = soup.find_all('meta')
for meta in metatags:
print("Meta : ",meta)
def writer(datasets, dataset_names, output_dir):
for dataset, dataset_name in zip(datasets, dataset_names):
if dataset:
filepath = output_dir + '/' + dataset_name + '.txt'
if python3:
with open(filepath, 'w+', encoding='utf8') as f:
f.write(str('\n'.join(dataset)))
f.write('\n')
else:
with open(filepath, 'w+') as f:
joined = '\n'.join(dataset)
f.write(str(joined.encode('utf-8')))
f.write('\n')