ipa-lab
diff --git a/‎.env.example‎
Lines changed: 16 additions & 1 deletion b/‎.env.example‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎.env.example.aws‎
Lines changed: 23 additions & 0 deletions b/‎.env.example.aws‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 101 additions & 24 deletions b/‎README.md‎
Lines changed: 101 additions & 24 deletions
diff --git a/‎publish_notes.md‎
Lines changed: 34 additions & 0 deletions b/‎publish_notes.md‎
Lines changed: 34 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 47 additions & 22 deletions b/‎pyproject.toml‎
Lines changed: 47 additions & 22 deletions
diff --git a/‎src/hackingBuddyGPT/capabilities/__init__.py‎
Lines changed: 10 additions & 2 deletions b/‎src/hackingBuddyGPT/capabilities/__init__.py‎
Lines changed: 10 additions & 2 deletions
@@ -8,11 +8,26 @@ conn.port=2222
 
 # exchange with the user for your target VM
 conn.username='bob'
+#To just use keyauth only, use '' with no space for conn.password 
+#Otherwise, insert the password for instance here
 conn.password='secret'
+#To just use username and password auth only, use '' with no space for conn.keyfilename
+#Otherwise, insert the filepath for the keyfile here (for example, '/home/bob/.ssh/sshkey.rsa')
+conn.keyfilename=''
 
 # which LLM model to use (can be anything openai supports, or if you use a custom llm.api_url, anything your api provides for the model parameter
 llm.model='gpt-3.5-turbo'
 llm.context_size=16385
 
 # how many rounds should this thing go?
-max_turns = 20
+max_turns = 20
+
+# The following four parameters are only relevant for the usecase rag
+# rag_database_folder_name: Name of the folder where the vector store will be saved.
+# rag_embedding: The name of the embedding model used. Currently only OpenAI api supported.
+# openai_api_key: API key that is used for the embedding model.
+# rag_return_token_limit: The upper bound for the RAG output.
+rag_database_folder_name = "vetorDB"
+rag_embedding = "text-embedding-3-small"
+openai_api_key = 'your-openai-key'
+rag_return_token_limit = 1000
@@ -0,0 +1,23 @@
+llm.api_key='your-openai-key'
+log_db.connection_string='log_db.sqlite3'
+
+# exchange with the IP of your target VM
+conn.host='enter the public IP of AWS Instance'
+conn.hostname='DNS of AWS Instance '
+conn.port=22
+
+# user of target AWS Instance
+conn.username='bob'
+#To just use keyauth only, use '' with no space for conn.password 
+#Otherwise, insert the password for instance here
+conn.password=''
+#To just use username and password auth only, use '' with no space for conn.keyfilename
+#Otherwise, insert the filepath for the keyfile here (for example, '/home/bob/.ssh/awskey.pem')
+conn.keyfilename='/home/bob/.ssh/awskey.pem'
+
+# which LLM model to use (can be anything openai supports, or if you use a custom llm.api_url, anything your api provides for the model parameter
+llm.model='gpt-3.5-turbo'
+llm.context_size=16385
+
+# how many rounds should this thing go?
+max_turns = 20
@@ -1,5 +1,6 @@
 .env
 venv/
+.venv/
 __pycache__/
 *.swp
 *.log
 
@@ -85,38 +85,38 @@ template_next_cmd = Template(filename=str(template_dir / "next_cmd.txt"))
 
 
 class MinimalLinuxPrivesc(Agent):
-
     conn: SSHConnection = None
+
     _sliding_history: SlidingCliHistory = None
+    _max_history_size: int = 0
 
     def init(self):
         super().init()
+
         self._sliding_history = SlidingCliHistory(self.llm)
+        self._max_history_size = self.llm.context_size - llm_util.SAFETY_MARGIN - self.llm.count_tokens(template_next_cmd.source)
+
         self.add_capability(SSHRunCommand(conn=self.conn), default=True)
         self.add_capability(SSHTestCredential(conn=self.conn))
-        self._template_size = self.llm.count_tokens(template_next_cmd.source)
-
-    def perform_round(self, turn: int) -> bool:
-        got_root: bool = False
 
-        with self._log.console.status("[bold green]Asking LLM for a new command..."):
-            # get as much history as fits into the target context size
-            history = self._sliding_history.get_history(self.llm.context_size - llm_util.SAFETY_MARGIN - self._template_size)
+    @log_conversation("Asking LLM for a new command...")
+    def perform_round(self, turn: int, log: Logger) -> bool:
+        # get as much history as fits into the target context size
+        history = self._sliding_history.get_history(self._max_history_size)
 
-            # get the next command from the LLM
-            answer = self.llm.get_response(template_next_cmd, capabilities=self.get_capability_block(), history=history, conn=self.conn)
-            cmd = llm_util.cmd_output_fixer(answer.result)
+        # get the next command from the LLM
+        answer = self.llm.get_response(template_next_cmd, capabilities=self.get_capability_block(), history=history, conn=self.conn)
+        message_id = log.call_response(answer)
 
-        with self._log.console.status("[bold green]Executing that command..."):
-            self._log.console.print(Panel(answer.result, title="[bold cyan]Got command from LLM:"))
-            result, got_root = self.get_capability(cmd.split(" ", 1)[0])(cmd)
+        # clean the command, load and execute it
+        cmd = llm_util.cmd_output_fixer(answer.result)
+        capability, arguments = cmd.split(" ", 1)
+        result, got_root = self.run_capability(message_id, "0", capability, arguments, calling_mode=CapabilityCallingMode.Direct, log=log)
 
-        # log and output the command and its result
-        self._log.log_db.add_log_query(self._log.run_id, turn, cmd, result, answer)
+        # store the results in our local history
         self._sliding_history.add_command(cmd, result)
-        self._log.console.print(Panel(result, title=f"[bold cyan]{cmd}"))
 
-        # if we got root, we can stop the loop
+        # signal if we were successful in our task
         return got_root
 
 
@@ -169,21 +169,65 @@ $ source ./venv/bin/activate
 # install python requirements
 $ pip install -e .
 
-# copy default .env.example
+# copy default .env.example 
 $ cp .env.example .env
 
+# NOTE: if you are trying to use this with AWS or ssh-key only authentication, copy .env.example.aws
+$ cp .env.example.aws .env 
+
 # IMPORTANT: setup your OpenAI API key, the VM's IP and credentials within .env
 $ vi .env
 
 # if you start wintermute without parameters, it will list all available use cases
 $ python src/hackingBuddyGPT/cli/wintermute.py
-usage: wintermute.py [-h]
-                     {LinuxPrivesc,WindowsPrivesc,ExPrivEscLinux,ExPrivEscLinuxTemplated,ExPrivEscLinuxHintFile,ExPrivEscLinuxLSE,MinimalWebTesting,WebTestingWithExplanation,SimpleWebAPITesting,SimpleWebAPIDocumentation}
-                     ...
-wintermute.py: error: the following arguments are required: {LinuxPrivesc,WindowsPrivesc,ExPrivEscLinux,ExPrivEscLinuxTemplated,ExPrivEscLinuxHintFile,ExPrivEscLinuxLSE,MinimalWebTesting,WebTestingWithExplanation,SimpleWebAPITesting,SimpleWebAPIDocumentation}
+No command provided
+usage: src/hackingBuddyGPT/cli/wintermute.py  <command> [--help] [--config config.json] [options...]
+
+commands:
+    ExPrivEscLinux                  Showcase Minimal Linux Priv-Escalation
+    ExPrivEscLinuxTemplated         Showcase Minimal Linux Priv-Escalation
+    LinuxPrivesc                    Linux Privilege Escalation
+    WindowsPrivesc                  Windows Privilege Escalation
+    ExPrivEscLinuxHintFile          Linux Privilege Escalation using hints from a hint file initial guidance
+    ExPrivEscLinuxLSE               Linux Privilege Escalation using lse.sh for initial guidance
+    WebTestingWithExplanation       Minimal implementation of a web testing use case while allowing the llm to 'talk'
+    SimpleWebAPIDocumentation       Minimal implementation of a web API testing use case
+    SimpleWebAPITesting             Minimal implementation of a web API testing use case
+    Viewer                          Webserver for (live) log viewing
+    Replayer                        Tool to replay the .jsonl logs generated by the Viewer (not well tested)
+    ThesisLinuxPrivescPrototype     Thesis Linux Privilege Escalation Prototype
+
+# to get more information about how to configure a use case you can call it with --help
+$ python src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc --help
+usage: src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc [--help] [--config config.json] [options...]
+
+    --log.log_server_address='localhost:4444'    address:port of the log server to be used (default from builtin)
+    --log.tag=''    Tag for your current run (default from builtin)
+    --log='local_logger'    choice of logging backend (default from builtin)
+    --log_db.connection_string='wintermute.sqlite3'    sqlite3 database connection string for logs (default from builtin)
+    --max_turns='30'     (default from .env file, alternatives: 10 from builtin)
+    --llm.api_key=<secret>    OpenAI API Key (default from .env file)
+    --llm.model    OpenAI model name
+    --llm.context_size='100000'    Maximum context size for the model, only used internally for things like trimming to the context size (default from .env file)
+    --llm.api_url='https://api.openai.com'    URL of the OpenAI API (default from builtin)
+    --llm.api_path='/v1/chat/completions'    Path to the OpenAI API (default from builtin)
+    --llm.api_timeout=240    Timeout for the API request (default from builtin)
+    --llm.api_backoff=60    Backoff time in seconds when running into rate-limits (default from builtin)
+    --llm.api_retries=3    Number of retries when running into rate-limits (default from builtin)
+    --system='linux'     (default from builtin)
+    --enable_explanation=False     (default from builtin)
+    --enable_update_state=False     (default from builtin)
+    --disable_history=False     (default from builtin)
+    --hint=''     (default from builtin)
+    --conn.host
+    --conn.hostname
+    --conn.username
+    --conn.password
+    --conn.keyfilename
+    --conn.port='2222'     (default from .env file, alternatives: 22 from builtin)
 ```
 
-## Provide a Target Machine over SSH
+### Provide a Target Machine over SSH
 
 The next important part is having a machine that we can run our agent against. In our case, the target machine will be situated at `192.168.122.151`.
 
@@ -193,6 +237,23 @@ We are using vulnerable Linux systems running in Virtual Machines for this. Neve
 >
 > We are using virtual machines from our [Linux Privilege-Escalation Benchmark](https://github.com/ipa-lab/benchmark-privesc-linux) project. Feel free to use them for your own research!
 
+## Using the web based viewer and replayer
+
+If you want to have a better representation of the agent's output, you can use the web-based viewer. You can start it using `wintermute Viewer`, which will run the server on `http://127.0.0.1:4444` for the default `wintermute.sqlite3` database. You can change these options using the `--log_server_address` and `--log_db.connection_string` parameters.
+
+Navigating to the log server address will show you an overview of all runs and clicking on a run will show you the details of that run. The viewer updates live using a websocket connection, and if you enable `Follow new runs` it will automatically switch to the new run when one is started.
+
+Keep in mind that there is no additional protection for this webserver, other than how it can be reached (per default binding to `127.0.0.1` means it can only be reached from your local machine). If you make it accessible to the internet, everybody will be able to see all of your runs and also be able to inject arbitrary data into the database.
+
+Therefore **DO NOT** make it accessible to the internet if you're not super sure about what you're doing!
+
+There is also the experimental replay functionality, which can replay a run live from a capture file, including timing information. This is great for showcases and presentations, because it looks like everything is happening live and for real, but you know exactly what the results will be.
+
+To use this, the run needs to be captured by a Viewer server by setting `--save_playback_dir` to a directory where the viewer can write the capture files.
+
+With the Viewer server still running, you can then start `wintermute Replayer --replay_file <path_to_capture_file>` to replay the captured run (this will create a new run in the database).
+You can configure it to `--pause_on_message` and `--pause_on_tool_calls`, which will interrupt the replay at the respective points until enter is pressed in the shell where you run the Replayer in. You can also configure the `--playback_speed` to control the speed of the replay.
+
 ## Use Cases
 
 GitHub Codespaces:
@@ -222,6 +283,22 @@ $ python src/hackingBuddyGPT/cli/wintermute.py LinuxPrivesc --llm.api_key=sk...C
 $ pip install '.[testing]'
 ```
 
+## Beta Features
+
+### Viewer
+
+The viewer is a simple web-based tool to view the results of hackingBuddyGPT runs. It is currently in beta and can be started with:
+
+```bash
+$ hackingBuddyGPT Viewer
+```
+
+This will start a webserver on `http://localhost:4444` that can be accessed with a web browser.
+
+To log to this central viewer, you currently need to change the `GlobalLogger` definition in [./src/hackingBuddyGPT/utils/logging.py](src/hackingBuddyGPT/utils/logging.py) to `GlobalRemoteLogger`.
+
+This feature is not fully tested yet and therefore is not recommended to be exposed to the internet!
+
 ## Publications about hackingBuddyGPT
 
 Given our background in academia, we have authored papers that lay the groundwork and report on our efforts:
 
@@ -0,0 +1,34 @@
+# how to publish to pypi
+
+## start with testing if the project builds and tag the version
+
+```bash
+python -m venv venv
+source venv/bin/activate
+pip install -e .
+pytest
+git tag v0.3.0
+git push origin v0.3.0
+```
+
+## build and new package
+
+(according to https://packaging.python.org/en/latest/tutorials/packaging-projects/)
+
+```bash
+pip install build twine
+python3 -m build
+vi ~/.pypirc
+twine check dist/*
+```
+
+Now, for next time.. test install the package in a new vanilla environment, then..
+
+```bash
+twine upload dist/*
+```
+
+## repo todos
+
+- rebase development upon main
+- bump the pyproject version number to a new `-dev` 
@@ -4,18 +4,20 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "hackingBuddyGPT"
+# original author was Andreas Happe, for an up-to-date list see
+# https://github.com/ipa-lab/hackingBuddyGPT/graphs/contributors
 authors = [
-	{ name = "Andreas Happe", email = "[email protected]" }
+	{ name = "HackingBuddyGPT maintainers", email = "[email protected]" }
 ]
 maintainers = [
 	{ name = "Andreas Happe", email = "[email protected]" },
-	{ name = "Juergen Cito", email = "juergen.cito@tuwiena.c.at" }
+	{ name = "Juergen Cito", email = "juergen.cito@tuwien.ac.at" }
 ]
 description = "Helping Ethical Hackers use LLMs in 50 lines of code"
 readme = "README.md"
 keywords = ["hacking", "pen-testing", "LLM", "AI", "agent"]
 requires-python = ">=3.10"
-version = "0.3.1"
+version = "0.4.0"
 license = { file = "LICENSE" }
 classifiers = [
     "Programming Language :: Python :: 3",
@@ -24,19 +26,30 @@ classifiers = [
 	"Development Status :: 4 - Beta",
 ]
 dependencies = [
-	'fabric == 3.2.2',
-	'Mako == 1.3.2',
-	'requests == 2.32.3',
-	'rich == 13.7.1',
-	'tiktoken == 0.8.0',
-	'instructor == 1.7.2',
-	'PyYAML == 6.0.1',
-	'python-dotenv == 1.0.1',
-	'pypsexec == 0.3.0',
-	'pydantic == 2.8.2',
-	'openai == 1.65.2',
-	'BeautifulSoup4',
-	'nltk'
+    'fabric == 3.2.2',
+    'Mako == 1.3.2',
+    'requests == 2.32.3',
+    'rich == 13.7.1',
+    'tiktoken == 0.8.0',
+    'instructor == 1.7.2',
+    'PyYAML == 6.0.1',
+    'python-dotenv == 1.0.1',
+    'pypsexec == 0.3.0',
+    'pydantic == 2.8.2',
+    'openai == 1.65.2',
+    'BeautifulSoup4',
+    'nltk',
+    'fastapi == 0.114.0',
+    'fastapi-utils == 0.7.0',
+    'jinja2 == 3.1.4',
+    'uvicorn[standard] == 0.30.6',
+    'dataclasses_json == 0.6.7',
+    'websockets == 13.1',
+    'langchain-community',
+    'langchain-openai',
+    'markdown',
+    'chromadb',
+    'langchain-chroma',
 ]
 
 [project.urls]
@@ -54,15 +67,27 @@ where = ["src"]
 
 [tool.pytest.ini_options]
 pythonpath = "src"
-addopts = [
-	"--import-mode=importlib",
-]
+addopts = ["--import-mode=importlib"]
 [project.optional-dependencies]
-testing = [
-    'pytest',
-    'pytest-mock'
+testing = ['pytest', 'pytest-mock']
+dev = [
+	'ruff',
+]
+rag-usecase = [
+    'langchain-community',
+	'langchain-openai',
+	'markdown',
+	'chromadb',
+	'langchain-chroma',
 ]
 
 [project.scripts]
 wintermute = "hackingBuddyGPT.cli.wintermute:main"
 hackingBuddyGPT = "hackingBuddyGPT.cli.wintermute:main"
+
+[tool.ruff]
+line-length = 120
+
+[tool.ruff.lint]
+select = ["E", "F", "B", "I"]
+ignore = ["E501", "F401", "F403"]
@@ -1,5 +1,13 @@
 from .capability import Capability
-from .psexec_test_credential import PSExecTestCredential
 from .psexec_run_command import PSExecRunCommand
+from .psexec_test_credential import PSExecTestCredential
 from .ssh_run_command import SSHRunCommand
-from .ssh_test_credential import SSHTestCredential
+from .ssh_test_credential import SSHTestCredential
+
+__all__ = [
+    "Capability",
+    "PSExecRunCommand",
+    "PSExecTestCredential",
+    "SSHRunCommand",
+    "SSHTestCredential",
+]
-Original file line number
+Diff line change
@@ @@ -1,5 +1,6 @@ @@
 .env
 venv/
 +.venv/
 __pycache__/
 *.swp
 *.log