Skip to content

Commit 7a79d22

Browse files
authored
merge-request for v0.4.0
Good news everyone! There's a new (and long overdue) new version of hackingBuddyGPT out! To summarize the big changes: - @Neverbolt did extensive work on the configuration and logging system: - Overwork of the configuration system - Added a visual and live web based log viewer, which can be started with `wintermute Viewer` - Updated the configuration system. The new configuration system now allows loading parameters from a .json file as well as choosing which logging backend should be used - @lloydchang with @pardaz-banu, @halifrieri, @toluwalopeoolagbegi and @tushcmd added support for dev containers - @jamfish added support for key-based SSH access (to the target system) - @Qsan1 added a new use-case, focusing on enabling linux priv-esc with small-language models, to quote: - Added an extended linux-privesc usecase. It is based on 'privesc', but extends it with multiple components that can be freely switch on or off: - Analyze: After each iteration the LLM is asked to analyze the output of that round. - Retrieval Augmented Generation (RAG): After each iteration the LLM is prompted and asked to generate a search query for a vector store. The search query is then used to retrieve relevant documents from the vector store and the information is included in the prompt for the Analyze component (Only works if Analyze is enabled). - Chain of thought (CoT): Instead of simply asking the LLM for the next command, we use CoT to generate the next action. - History Compression: Instead of including all commands and their respective output in the prompt, it removes all outputs except the most recent one. - Structure via Prompt: Include an initial set of command recommendations in `query_next_command` I thank all our contributors (and hopefully haven't forgotten too many). Enjoy!
2 parents d5435b6 + 9f3d4d3 commit 7a79d22

File tree

111 files changed

+4469
-1577
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+4469
-1577
lines changed

.env.example

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,26 @@ conn.port=2222
88

99
# exchange with the user for your target VM
1010
conn.username='bob'
11+
#To just use keyauth only, use '' with no space for conn.password
12+
#Otherwise, insert the password for instance here
1113
conn.password='secret'
14+
#To just use username and password auth only, use '' with no space for conn.keyfilename
15+
#Otherwise, insert the filepath for the keyfile here (for example, '/home/bob/.ssh/sshkey.rsa')
16+
conn.keyfilename=''
1217

1318
# which LLM model to use (can be anything openai supports, or if you use a custom llm.api_url, anything your api provides for the model parameter
1419
llm.model='gpt-3.5-turbo'
1520
llm.context_size=16385
1621

1722
# how many rounds should this thing go?
18-
max_turns = 20
23+
max_turns = 20
24+
25+
# The following four parameters are only relevant for the usecase rag
26+
# rag_database_folder_name: Name of the folder where the vector store will be saved.
27+
# rag_embedding: The name of the embedding model used. Currently only OpenAI api supported.
28+
# openai_api_key: API key that is used for the embedding model.
29+
# rag_return_token_limit: The upper bound for the RAG output.
30+
rag_database_folder_name = "vetorDB"
31+
rag_embedding = "text-embedding-3-small"
32+
openai_api_key = 'your-openai-key'
33+
rag_return_token_limit = 1000

.env.example.aws

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
llm.api_key='your-openai-key'
2+
log_db.connection_string='log_db.sqlite3'
3+
4+
# exchange with the IP of your target VM
5+
conn.host='enter the public IP of AWS Instance'
6+
conn.hostname='DNS of AWS Instance '
7+
conn.port=22
8+
9+
# user of target AWS Instance
10+
conn.username='bob'
11+
#To just use keyauth only, use '' with no space for conn.password
12+
#Otherwise, insert the password for instance here
13+
conn.password=''
14+
#To just use username and password auth only, use '' with no space for conn.keyfilename
15+
#Otherwise, insert the filepath for the keyfile here (for example, '/home/bob/.ssh/awskey.pem')
16+
conn.keyfilename='/home/bob/.ssh/awskey.pem'
17+
18+
# which LLM model to use (can be anything openai supports, or if you use a custom llm.api_url, anything your api provides for the model parameter
19+
llm.model='gpt-3.5-turbo'
20+
llm.context_size=16385
21+
22+
# how many rounds should this thing go?
23+
max_turns = 20

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
.env
22
venv/
3+
.venv/
34
__pycache__/
45
*.swp
56
*.log

README.md

Lines changed: 101 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -85,38 +85,38 @@ template_next_cmd = Template(filename=str(template_dir / "next_cmd.txt"))
8585

8686

8787
class MinimalLinuxPrivesc(Agent):
88-
8988
conn: SSHConnection = None
89+
9090
_sliding_history: SlidingCliHistory = None
91+
_max_history_size: int = 0
9192

9293
def init(self):
9394
super().init()
95+
9496
self._sliding_history = SlidingCliHistory(self.llm)
97+
self._max_history_size = self.llm.context_size - llm_util.SAFETY_MARGIN - self.llm.count_tokens(template_next_cmd.source)
98+
9599
self.add_capability(SSHRunCommand(conn=self.conn), default=True)
96100
self.add_capability(SSHTestCredential(conn=self.conn))
97-
self._template_size = self.llm.count_tokens(template_next_cmd.source)
98-
99-
def perform_round(self, turn: int) -> bool:
100-
got_root: bool = False
101101

102-
with self._log.console.status("[bold green]Asking LLM for a new command..."):
103-
# get as much history as fits into the target context size
104-
history = self._sliding_history.get_history(self.llm.context_size - llm_util.SAFETY_MARGIN - self._template_size)
102+
@log_conversation("Asking LLM for a new command...")
103+
def perform_round(self, turn: int, log: Logger) -> bool:
104+
# get as much history as fits into the target context size
105+
history = self._sliding_history.get_history(self._max_history_size)
105106

106-
# get the next command from the LLM
107-
answer = self.llm.get_response(template_next_cmd, capabilities=self.get_capability_block(), history=history, conn=self.conn)
108-
cmd = llm_util.cmd_output_fixer(answer.result)
107+
# get the next command from the LLM
108+
answer = self.llm.get_response(template_next_cmd, capabilities=self.get_capability_block(), history=history, conn=self.conn)
109+
message_id = log.call_response(answer)
109110

110-
with self._log.console.status("[bold green]Executing that command..."):
111-
self._log.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
112-
result, got_root = self.get_capability(cmd.split(" ", 1)[0])(cmd)
111+
# clean the command, load and execute it
112+
cmd = llm_util.cmd_output_fixer(answer.result)
113+
capability, arguments = cmd.split(" ", 1)
114+
result, got_root = self.run_capability(message_id, "0", capability, arguments, calling_mode=CapabilityCallingMode.Direct, log=log)
113115

114-
# log and output the command and its result
115-
self._log.log_db.add_log_query(self._log.run_id, turn, cmd, result, answer)
116+
# store the results in our local history
116117
self._sliding_history.add_command(cmd, result)
117-
self._log.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
118118

119-
# if we got root, we can stop the loop
119+
# signal if we were successful in our task
120120
return got_root
121121

122122

@@ -169,21 +169,65 @@ $ source ./venv/bin/activate
169169
# install python requirements
170170
$ pip install -e .
171171

172-
# copy default .env.example
172+
# copy default .env.example
173173
$ cp .env.example .env
174174

175+
# NOTE: if you are trying to use this with AWS or ssh-key only authentication, copy .env.example.aws
176+
$ cp .env.example.aws .env
177+
175178
# IMPORTANT: setup your OpenAI API key, the VM's IP and credentials within .env
176179
$ vi .env
177180

178181
# if you start wintermute without parameters, it will list all available use cases
179182
$ python src/hackingBuddyGPT/cli/wintermute.py
180-
usage: wintermute.py [-h]
181-
{LinuxPrivesc,WindowsPrivesc,ExPrivEscLinux,ExPrivEscLinuxTemplated,ExPrivEscLinuxHintFile,ExPrivEscLinuxLSE,MinimalWebTesting,WebTestingWithExplanation,SimpleWebAPITesting,SimpleWebAPIDocumentation}
182-
...
183-
wintermute.py: error: the following arguments are required: {LinuxPrivesc,WindowsPrivesc,ExPrivEscLinux,ExPrivEscLinuxTemplated,ExPrivEscLinuxHintFile,ExPrivEscLinuxLSE,MinimalWebTesting,WebTestingWithExplanation,SimpleWebAPITesting,SimpleWebAPIDocumentation}
183+
No command provided
184+
usage: src/hackingBuddyGPT/cli/wintermute.py <command> [--help] [--config config.json] [options...]
185+
186+
commands:
187+
ExPrivEscLinux Showcase Minimal Linux Priv-Escalation
188+
ExPrivEscLinuxTemplated Showcase Minimal Linux Priv-Escalation
189+
LinuxPrivesc Linux Privilege Escalation
190+
WindowsPrivesc Windows Privilege Escalation
191+
ExPrivEscLinuxHintFile Linux Privilege Escalation using hints from a hint file initial guidance
192+
ExPrivEscLinuxLSE Linux Privilege Escalation using lse.sh for initial guidance
193+
WebTestingWithExplanation Minimal implementation of a web testing use case while allowing the llm to 'talk'
194+
SimpleWebAPIDocumentation Minimal implementation of a web API testing use case
195+
SimpleWebAPITesting Minimal implementation of a web API testing use case
196+
Viewer Webserver for (live) log viewing
197+
Replayer Tool to replay the .jsonl logs generated by the Viewer (not well tested)
198+
ThesisLinuxPrivescPrototype Thesis Linux Privilege Escalation Prototype
199+
200+
# to get more information about how to configure a use case you can call it with --help
201+
$ python src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc --help
202+
usage: src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc [--help] [--config config.json] [options...]
203+
204+
--log.log_server_address='localhost:4444' address:port of the log server to be used (default from builtin)
205+
--log.tag='' Tag for your current run (default from builtin)
206+
--log='local_logger' choice of logging backend (default from builtin)
207+
--log_db.connection_string='wintermute.sqlite3' sqlite3 database connection string for logs (default from builtin)
208+
--max_turns='30' (default from .env file, alternatives: 10 from builtin)
209+
--llm.api_key=<secret> OpenAI API Key (default from .env file)
210+
--llm.model OpenAI model name
211+
--llm.context_size='100000' Maximum context size for the model, only used internally for things like trimming to the context size (default from .env file)
212+
--llm.api_url='https://api.openai.com' URL of the OpenAI API (default from builtin)
213+
--llm.api_path='/v1/chat/completions' Path to the OpenAI API (default from builtin)
214+
--llm.api_timeout=240 Timeout for the API request (default from builtin)
215+
--llm.api_backoff=60 Backoff time in seconds when running into rate-limits (default from builtin)
216+
--llm.api_retries=3 Number of retries when running into rate-limits (default from builtin)
217+
--system='linux' (default from builtin)
218+
--enable_explanation=False (default from builtin)
219+
--enable_update_state=False (default from builtin)
220+
--disable_history=False (default from builtin)
221+
--hint='' (default from builtin)
222+
--conn.host
223+
--conn.hostname
224+
--conn.username
225+
--conn.password
226+
--conn.keyfilename
227+
--conn.port='2222' (default from .env file, alternatives: 22 from builtin)
184228
```
185229
186-
## Provide a Target Machine over SSH
230+
### Provide a Target Machine over SSH
187231
188232
The next important part is having a machine that we can run our agent against. In our case, the target machine will be situated at `192.168.122.151`.
189233
@@ -193,6 +237,23 @@ We are using vulnerable Linux systems running in Virtual Machines for this. Neve
193237
>
194238
> We are using virtual machines from our [Linux Privilege-Escalation Benchmark](https://github.com/ipa-lab/benchmark-privesc-linux) project. Feel free to use them for your own research!
195239
240+
## Using the web based viewer and replayer
241+
242+
If you want to have a better representation of the agent's output, you can use the web-based viewer. You can start it using `wintermute Viewer`, which will run the server on `http://127.0.0.1:4444` for the default `wintermute.sqlite3` database. You can change these options using the `--log_server_address` and `--log_db.connection_string` parameters.
243+
244+
Navigating to the log server address will show you an overview of all runs and clicking on a run will show you the details of that run. The viewer updates live using a websocket connection, and if you enable `Follow new runs` it will automatically switch to the new run when one is started.
245+
246+
Keep in mind that there is no additional protection for this webserver, other than how it can be reached (per default binding to `127.0.0.1` means it can only be reached from your local machine). If you make it accessible to the internet, everybody will be able to see all of your runs and also be able to inject arbitrary data into the database.
247+
248+
Therefore **DO NOT** make it accessible to the internet if you're not super sure about what you're doing!
249+
250+
There is also the experimental replay functionality, which can replay a run live from a capture file, including timing information. This is great for showcases and presentations, because it looks like everything is happening live and for real, but you know exactly what the results will be.
251+
252+
To use this, the run needs to be captured by a Viewer server by setting `--save_playback_dir` to a directory where the viewer can write the capture files.
253+
254+
With the Viewer server still running, you can then start `wintermute Replayer --replay_file <path_to_capture_file>` to replay the captured run (this will create a new run in the database).
255+
You can configure it to `--pause_on_message` and `--pause_on_tool_calls`, which will interrupt the replay at the respective points until enter is pressed in the shell where you run the Replayer in. You can also configure the `--playback_speed` to control the speed of the replay.
256+
196257
## Use Cases
197258
198259
GitHub Codespaces:
@@ -222,6 +283,22 @@ $ python src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc --llm.api_key=sk...C
222283
$ pip install '.[testing]'
223284
```
224285
286+
## Beta Features
287+
288+
### Viewer
289+
290+
The viewer is a simple web-based tool to view the results of hackingBuddyGPT runs. It is currently in beta and can be started with:
291+
292+
```bash
293+
$ hackingBuddyGPT Viewer
294+
```
295+
296+
This will start a webserver on `http://localhost:4444` that can be accessed with a web browser.
297+
298+
To log to this central viewer, you currently need to change the `GlobalLogger` definition in [./src/hackingBuddyGPT/utils/logging.py](src/hackingBuddyGPT/utils/logging.py) to `GlobalRemoteLogger`.
299+
300+
This feature is not fully tested yet and therefore is not recommended to be exposed to the internet!
301+
225302
## Publications about hackingBuddyGPT
226303
227304
Given our background in academia, we have authored papers that lay the groundwork and report on our efforts:

publish_notes.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# how to publish to pypi
2+
3+
## start with testing if the project builds and tag the version
4+
5+
```bash
6+
python -m venv venv
7+
source venv/bin/activate
8+
pip install -e .
9+
pytest
10+
git tag v0.3.0
11+
git push origin v0.3.0
12+
```
13+
14+
## build and new package
15+
16+
(according to https://packaging.python.org/en/latest/tutorials/packaging-projects/)
17+
18+
```bash
19+
pip install build twine
20+
python3 -m build
21+
vi ~/.pypirc
22+
twine check dist/*
23+
```
24+
25+
Now, for next time.. test install the package in a new vanilla environment, then..
26+
27+
```bash
28+
twine upload dist/*
29+
```
30+
31+
## repo todos
32+
33+
- rebase development upon main
34+
- bump the pyproject version number to a new `-dev`

pyproject.toml

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,20 @@ build-backend = "setuptools.build_meta"
44

55
[project]
66
name = "hackingBuddyGPT"
7+
# original author was Andreas Happe, for an up-to-date list see
8+
# https://github.com/ipa-lab/hackingBuddyGPT/graphs/contributors
79
authors = [
8-
{ name = "Andreas Happe", email = "[email protected]" }
10+
{ name = "HackingBuddyGPT maintainers", email = "[email protected]" }
911
]
1012
maintainers = [
1113
{ name = "Andreas Happe", email = "[email protected]" },
12-
{ name = "Juergen Cito", email = "juergen.cito@tuwiena.c.at" }
14+
{ name = "Juergen Cito", email = "juergen.cito@tuwien.ac.at" }
1315
]
1416
description = "Helping Ethical Hackers use LLMs in 50 lines of code"
1517
readme = "README.md"
1618
keywords = ["hacking", "pen-testing", "LLM", "AI", "agent"]
1719
requires-python = ">=3.10"
18-
version = "0.3.1"
20+
version = "0.4.0"
1921
license = { file = "LICENSE" }
2022
classifiers = [
2123
"Programming Language :: Python :: 3",
@@ -24,19 +26,30 @@ classifiers = [
2426
"Development Status :: 4 - Beta",
2527
]
2628
dependencies = [
27-
'fabric == 3.2.2',
28-
'Mako == 1.3.2',
29-
'requests == 2.32.3',
30-
'rich == 13.7.1',
31-
'tiktoken == 0.8.0',
32-
'instructor == 1.7.2',
33-
'PyYAML == 6.0.1',
34-
'python-dotenv == 1.0.1',
35-
'pypsexec == 0.3.0',
36-
'pydantic == 2.8.2',
37-
'openai == 1.65.2',
38-
'BeautifulSoup4',
39-
'nltk'
29+
'fabric == 3.2.2',
30+
'Mako == 1.3.2',
31+
'requests == 2.32.3',
32+
'rich == 13.7.1',
33+
'tiktoken == 0.8.0',
34+
'instructor == 1.7.2',
35+
'PyYAML == 6.0.1',
36+
'python-dotenv == 1.0.1',
37+
'pypsexec == 0.3.0',
38+
'pydantic == 2.8.2',
39+
'openai == 1.65.2',
40+
'BeautifulSoup4',
41+
'nltk',
42+
'fastapi == 0.114.0',
43+
'fastapi-utils == 0.7.0',
44+
'jinja2 == 3.1.4',
45+
'uvicorn[standard] == 0.30.6',
46+
'dataclasses_json == 0.6.7',
47+
'websockets == 13.1',
48+
'langchain-community',
49+
'langchain-openai',
50+
'markdown',
51+
'chromadb',
52+
'langchain-chroma',
4053
]
4154

4255
[project.urls]
@@ -54,15 +67,27 @@ where = ["src"]
5467

5568
[tool.pytest.ini_options]
5669
pythonpath = "src"
57-
addopts = [
58-
"--import-mode=importlib",
59-
]
70+
addopts = ["--import-mode=importlib"]
6071
[project.optional-dependencies]
61-
testing = [
62-
'pytest',
63-
'pytest-mock'
72+
testing = ['pytest', 'pytest-mock']
73+
dev = [
74+
'ruff',
75+
]
76+
rag-usecase = [
77+
'langchain-community',
78+
'langchain-openai',
79+
'markdown',
80+
'chromadb',
81+
'langchain-chroma',
6482
]
6583

6684
[project.scripts]
6785
wintermute = "hackingBuddyGPT.cli.wintermute:main"
6886
hackingBuddyGPT = "hackingBuddyGPT.cli.wintermute:main"
87+
88+
[tool.ruff]
89+
line-length = 120
90+
91+
[tool.ruff.lint]
92+
select = ["E", "F", "B", "I"]
93+
ignore = ["E501", "F401", "F403"]
Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
from .capability import Capability
2-
from .psexec_test_credential import PSExecTestCredential
32
from .psexec_run_command import PSExecRunCommand
3+
from .psexec_test_credential import PSExecTestCredential
44
from .ssh_run_command import SSHRunCommand
5-
from .ssh_test_credential import SSHTestCredential
5+
from .ssh_test_credential import SSHTestCredential
6+
7+
__all__ = [
8+
"Capability",
9+
"PSExecRunCommand",
10+
"PSExecTestCredential",
11+
"SSHRunCommand",
12+
"SSHTestCredential",
13+
]

0 commit comments

Comments
 (0)