Skip to content

Commit 28d2b56

Browse files
authored
Merge pull request #671 from projectdiscovery/path-encodings
path encoding / decoding helpers
2 parents 519d7fb + bd7f284 commit 28d2b56

File tree

3 files changed

+351
-18
lines changed

3 files changed

+351
-18
lines changed

url/encoding.go

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1 @@
11
package urlutil
2-
3-
import (
4-
"github.com/projectdiscovery/utils/env"
5-
)
6-
7-
// SpaceEncoding determines how spaces are encoded in URLs via external environment variable:
8-
// - When empty (""), spaces are encoded as "+"
9-
// - When set to "percent", spaces are encoded as "%20"
10-
var SpaceEncoding = env.GetEnvOrDefault("SPACE_ENCODING", "")

url/rawparam.go

Lines changed: 84 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,34 @@ func (p Params) Decode(raw string) {
153153
}
154154
}
155155

156+
// PathMustEscapeCharSet are characters that must be escaped in URL paths
157+
// Different from query params: & = don't need escaping, but / # ? do need escaping
158+
var PathMustEscapeCharSet []rune = []rune{'?', '#', '@', ';', ',', '[', ']', '^'}
159+
156160
// ParamEncode encodes Key characters only. key characters include
157161
// whitespaces + non printable chars + non-ascii
158162
// also this does not double encode encoded characters
159163
func ParamEncode(data string) string {
160164
return URLEncodeWithEscapes(data)
161165
}
162166

167+
// PathEncode encodes path segments with path-specific rules
168+
// Key differences from ParamEncode:
169+
// 1. Always uses %20 for spaces (never +)
170+
// 2. & and = don't need escaping in paths
171+
// 3. / # ? must be escaped as they have special meaning in paths
172+
func PathEncode(data string) string {
173+
return pathEncodeWithEscapes(data)
174+
}
175+
176+
// PathDecode decodes path segments with path-specific rules
177+
// Key differences from param decoding:
178+
// 1. + is treated as literal + character (not space)
179+
// 2. Only %20 decodes to space
180+
func PathDecode(data string) (string, error) {
181+
return pathDecode(data)
182+
}
183+
163184
// URLEncodeWithEscapes URL encodes data with given special characters escaped (similar to burpsuite intruder)
164185
// Note `MustEscapeCharSet` is not included
165186
func URLEncodeWithEscapes(data string, charset ...rune) string {
@@ -175,15 +196,8 @@ func URLEncodeWithEscapes(data string, charset ...rune) string {
175196
buff.WriteRune('%')
176197
buff.WriteString(getasciihex(r)) // 2 digit hex
177198
case r == ' ':
178-
// use configuration to determine space encoding
179-
switch SpaceEncoding {
180-
case "percent":
181-
buff.WriteRune('%')
182-
buff.WriteRune('2')
183-
buff.WriteRune('0')
184-
default:
185-
buff.WriteRune('+')
186-
}
199+
// Query parameters always use + for spaces
200+
buff.WriteRune('+')
187201
case r < rune(127):
188202
if _, ok := mustescape[r]; ok {
189203
// reserved char must escape
@@ -271,3 +285,64 @@ func getasciihex(r rune) string {
271285
}
272286
return strings.ToUpper(val)
273287
}
288+
289+
// pathEncodeWithEscapes encodes path segments with path-specific rules
290+
func pathEncodeWithEscapes(data string) string {
291+
mustescape := getrunemap(PathMustEscapeCharSet)
292+
var buff bytes.Buffer
293+
buff.Grow(len(data))
294+
295+
for _, r := range data {
296+
switch {
297+
case r < rune(20):
298+
// control character
299+
buff.WriteRune('%')
300+
buff.WriteString(getasciihex(r))
301+
case r == ' ':
302+
// Always use %20 for spaces in paths (never +)
303+
buff.WriteRune('%')
304+
buff.WriteRune('2')
305+
buff.WriteRune('0')
306+
case r < rune(127):
307+
if _, ok := mustescape[r]; ok {
308+
// reserved char must escape
309+
buff.WriteRune('%')
310+
buff.WriteString(getasciihex(r))
311+
} else {
312+
// do not percent encode
313+
buff.WriteRune(r)
314+
}
315+
case r == rune(127):
316+
// [DEL] char should be encoded
317+
buff.WriteRune('%')
318+
buff.WriteString(getasciihex(r))
319+
case r > rune(128):
320+
// non-ascii characters
321+
buff.WriteRune('%')
322+
buff.WriteString(getutf8hex(r))
323+
}
324+
}
325+
return buff.String()
326+
}
327+
328+
// pathDecode decodes path segments treating + as literal
329+
func pathDecode(data string) (string, error) {
330+
var buff bytes.Buffer
331+
buff.Grow(len(data))
332+
333+
for i := 0; i < len(data); {
334+
if data[i] == '%' && i+2 < len(data) {
335+
// Try to decode hex sequence
336+
hexStr := data[i+1 : i+3]
337+
if decoded, err := hex.DecodeString(hexStr); err == nil && len(decoded) == 1 {
338+
buff.WriteByte(decoded[0])
339+
i += 3
340+
continue
341+
}
342+
}
343+
// + is treated as literal in paths (unlike query params)
344+
buff.WriteByte(data[i])
345+
i++
346+
}
347+
return buff.String(), nil
348+
}

url/rawparam_test.go

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,270 @@ func TestURLDecode(t *testing.T) {
120120
require.Equalf(t, v.Expected, parsed.Query().Encode(), "failed to decode params in url %v expected %v got %v", v.url, v.Expected, parsed.Query())
121121
}
122122
}
123+
124+
func TestPathEncode(t *testing.T) {
125+
testcases := []struct {
126+
Input string
127+
Expected string
128+
Desc string
129+
}{
130+
// Space encoding - always %20 in paths
131+
{"hello world", "hello%20world", "spaces encoded as %20"},
132+
{"test+value", "test+value", "+ preserved as literal"},
133+
134+
// Special characters that need escaping in paths
135+
{"path?query", "path%3Fquery", "? must be escaped in paths"},
136+
{"path#fragment", "path%23fragment", "# must be escaped in paths"},
137+
{"user@domain", "user%40domain", "@ must be escaped"},
138+
139+
// Characters that don't need escaping in paths (unlike query params)
140+
{"key=value", "key=value", "= is literal in paths"},
141+
{"param&other", "param&other", "& is literal in paths"},
142+
143+
// Control characters
144+
{"test\nline", "test%0Aline", "newline encoded"},
145+
{"test\tline", "test%09line", "tab encoded"},
146+
147+
// Non-ASCII characters
148+
{"café", "caf%c3%a9", "unicode encoded"},
149+
150+
// Edge cases
151+
{"", "", "empty string"},
152+
{"/", "/", "forward slash preserved"},
153+
{"../../../etc/passwd", "../../../etc/passwd", "path traversal sequences preserved"},
154+
}
155+
156+
for _, v := range testcases {
157+
result := PathEncode(v.Input)
158+
require.Equalf(t, v.Expected, result, "%s: expected %q but got %q", v.Desc, v.Expected, result)
159+
}
160+
}
161+
162+
func TestPathDecode(t *testing.T) {
163+
testcases := []struct {
164+
Input string
165+
Expected string
166+
Desc string
167+
}{
168+
// Space decoding - only %20 becomes space
169+
{"hello%20world", "hello world", "%20 decoded to space"},
170+
{"test+value", "test+value", "+ preserved as literal (not decoded to space)"},
171+
172+
// Hex decoding
173+
{"path%3Fquery", "path?query", "? decoded"},
174+
{"path%23fragment", "path#fragment", "# decoded"},
175+
{"user%40domain", "user@domain", "@ decoded"},
176+
177+
// Characters that don't need decoding
178+
{"key=value", "key=value", "= preserved"},
179+
{"param&other", "param&other", "& preserved"},
180+
181+
// Control characters
182+
{"test%0Aline", "test\nline", "newline decoded"},
183+
{"test%09line", "test\tline", "tab decoded"},
184+
185+
// Non-ASCII
186+
{"caf%C3%A9", "café", "unicode decoded"},
187+
188+
// Invalid sequences should be preserved
189+
{"test%GG", "test%GG", "invalid hex preserved"},
190+
{"test%2", "test%2", "incomplete hex preserved"},
191+
192+
// Edge cases
193+
{"", "", "empty string"},
194+
{"/", "/", "forward slash preserved"},
195+
{"../../../etc/passwd", "../../../etc/passwd", "path traversal preserved"},
196+
}
197+
198+
for _, v := range testcases {
199+
result, err := PathDecode(v.Input)
200+
require.Nilf(t, err, "%s: unexpected error: %v", v.Desc, err)
201+
require.Equalf(t, v.Expected, result, "%s: expected %q but got %q", v.Desc, v.Expected, result)
202+
}
203+
}
204+
205+
func TestPathEncodeDecodeRoundtrip(t *testing.T) {
206+
testcases := []string{
207+
"hello world",
208+
"path?query#fragment",
209+
210+
"key=value&param=other",
211+
"test\nwith\tcontrol\rchars",
212+
"café with unicode",
213+
"../../../etc/passwd",
214+
"test+literal+plus",
215+
}
216+
217+
for _, input := range testcases {
218+
encoded := PathEncode(input)
219+
decoded, err := PathDecode(encoded)
220+
require.Nilf(t, err, "decode error for input %q", input)
221+
require.Equalf(t, input, decoded, "roundtrip failed for %q: encoded=%q decoded=%q", input, encoded, decoded)
222+
}
223+
}
224+
225+
func TestPathVsParamEncodingDifferences(t *testing.T) {
226+
testcases := []struct {
227+
Input string
228+
ExpectedPath string
229+
ExpectedParam string
230+
Desc string
231+
}{
232+
// Key difference: space encoding
233+
{"hello world", "hello%20world", "hello+world", "space encoding difference"},
234+
235+
// + character handling
236+
{"test+plus", "test+plus", "test+plus", "+ preserved in both"},
237+
238+
// & and = handling
239+
{"key=val&other=test", "key=val&other=test", "key=val&other=test", "& and = preserved in both by default"},
240+
241+
// ? and # handling
242+
{"query?test#frag", "query%3Ftest%23frag", "query?test#frag", "? and # encoded only in paths"},
243+
}
244+
245+
for _, v := range testcases {
246+
pathResult := PathEncode(v.Input)
247+
paramResult := ParamEncode(v.Input)
248+
249+
require.Equalf(t, v.ExpectedPath, pathResult, "%s: path encoding mismatch", v.Desc)
250+
require.Equalf(t, v.ExpectedParam, paramResult, "%s: param encoding mismatch", v.Desc)
251+
}
252+
}
253+
254+
func TestSQLInjectionPathEncoding(t *testing.T) {
255+
testcases := []struct {
256+
Name string
257+
Input string
258+
ExpectedEncoded string
259+
ExpectedDecoded string
260+
Description string
261+
}{
262+
{
263+
Name: "SQL injection in path with mixed encoding",
264+
Input: "/admin/1' OR 1=1 ?key=y'+1=1&key2=value2",
265+
ExpectedEncoded: "/admin/1'%20OR%201=1%20%3Fkey=y'+1=1&key2=value2",
266+
ExpectedDecoded: "/admin/1' OR 1=1 ?key=y'+1=1&key2=value2",
267+
Description: "SQL injection path with spaces, quotes, and query-like syntax",
268+
},
269+
{
270+
Name: "Path with SQL payload and question mark",
271+
Input: "/user/1' OR 1=1?admin=true",
272+
ExpectedEncoded: "/user/1'%20OR%201=1%3Fadmin=true",
273+
ExpectedDecoded: "/user/1' OR 1=1?admin=true",
274+
Description: "SQL injection with question mark that needs encoding in paths",
275+
},
276+
{
277+
Name: "Complex SQL injection with multiple special chars",
278+
Input: "/api/user/1' UNION SELECT * FROM users WHERE admin=1#comment",
279+
ExpectedEncoded: "/api/user/1'%20UNION%20SELECT%20*%20FROM%20users%20WHERE%20admin=1%23comment",
280+
ExpectedDecoded: "/api/user/1' UNION SELECT * FROM users WHERE admin=1#comment",
281+
Description: "Complex SQL injection with spaces and hash that need encoding",
282+
},
283+
{
284+
Name: "Path traversal with SQL injection",
285+
Input: "/../../../etc/passwd' OR '1'='1",
286+
ExpectedEncoded: "/../../../etc/passwd'%20OR%20'1'='1",
287+
ExpectedDecoded: "/../../../etc/passwd' OR '1'='1",
288+
Description: "Path traversal combined with SQL injection",
289+
},
290+
{
291+
Name: "Already encoded SQL injection",
292+
Input: "/admin/1' OR 1=1 --",
293+
ExpectedEncoded: "/admin/1'%20OR%201=1%20--",
294+
ExpectedDecoded: "/admin/1' OR 1=1 --",
295+
Description: "SQL injection should be properly encoded",
296+
},
297+
}
298+
299+
for _, tc := range testcases {
300+
t.Run(tc.Name, func(t *testing.T) {
301+
// Test encoding
302+
encoded := PathEncode(tc.Input)
303+
require.Equalf(t, tc.ExpectedEncoded, encoded,
304+
"%s - Encoding mismatch:\nInput: %q\nExpected: %q\nGot: %q",
305+
tc.Description, tc.Input, tc.ExpectedEncoded, encoded)
306+
307+
// Test decoding
308+
decoded, err := PathDecode(tc.Input)
309+
require.Nilf(t, err, "%s - Decode error: %v", tc.Description, err)
310+
require.Equalf(t, tc.ExpectedDecoded, decoded,
311+
"%s - Decoding mismatch:\nInput: %q\nExpected: %q\nGot: %q",
312+
tc.Description, tc.Input, tc.ExpectedDecoded, decoded)
313+
314+
// Test roundtrip: encode then decode
315+
roundtrip, err := PathDecode(encoded)
316+
require.Nilf(t, err, "%s - Roundtrip decode error: %v", tc.Description, err)
317+
require.Equalf(t, tc.Input, roundtrip,
318+
"%s - Roundtrip failed:\nOriginal: %q\nEncoded: %q\nDecoded: %q",
319+
tc.Description, tc.Input, encoded, roundtrip)
320+
})
321+
}
322+
}
323+
324+
func TestPathEncodingSecurityImplications(t *testing.T) {
325+
// Test the key security difference: + vs %20 in SQL injection contexts
326+
sqlPayload := "1 OR 1=1"
327+
328+
// Path encoding (always %20)
329+
pathEncoded := PathEncode(sqlPayload)
330+
require.Equal(t, "1%20OR%201=1", pathEncoded, "Path should encode spaces as %20")
331+
332+
// Param encoding (always +)
333+
paramEncoded := ParamEncode(sqlPayload)
334+
require.Equal(t, "1+OR+1=1", paramEncoded, "Params should encode spaces as +")
335+
336+
// Decoding behavior difference
337+
pathDecoded, err := PathDecode("test+plus")
338+
require.Nil(t, err)
339+
require.Equal(t, "test+plus", pathDecoded, "Path decode should preserve + as literal")
340+
341+
pathDecodedSpace, err := PathDecode("test%20space")
342+
require.Nil(t, err)
343+
require.Equal(t, "test space", pathDecodedSpace, "Path decode should convert %20 to space")
344+
345+
t.Log("✓ Path encoding uses %20 for spaces (correct for path context)")
346+
t.Log("✓ Param encoding uses + for spaces (correct for query context)")
347+
t.Log("✓ Path decode treats + as literal (preventing confusion)")
348+
t.Log("✓ Path decode converts %20 to space (standard percent decoding)")
349+
}
350+
351+
func TestSpecificSQLInjectionPath(t *testing.T) {
352+
// Test the specific path you mentioned
353+
originalPath := "/admin/1'%20OR%201=1%20?key=y'+1=1&key2=value2"
354+
355+
// Test decoding - this should convert %20 to spaces
356+
decoded, err := PathDecode(originalPath)
357+
require.Nil(t, err, "Failed to decode path")
358+
expectedDecoded := "/admin/1' OR 1=1 ?key=y'+1=1&key2=value2"
359+
require.Equal(t, expectedDecoded, decoded,
360+
"Decoded path mismatch:\nInput: %q\nExpected: %q\nGot: %q",
361+
originalPath, expectedDecoded, decoded)
362+
363+
// Test encoding the decoded version - should re-encode spaces and ?
364+
encoded := PathEncode(decoded)
365+
expectedEncoded := "/admin/1'%20OR%201=1%20%3Fkey=y'+1=1&key2=value2"
366+
require.Equal(t, expectedEncoded, encoded,
367+
"Encoded path mismatch:\nInput: %q\nExpected: %q\nGot: %q",
368+
decoded, expectedEncoded, encoded)
369+
370+
// Verify that the + signs are preserved as literals in both operations
371+
require.Contains(t, decoded, "+1=1", "Plus signs should be preserved as literals during decode")
372+
require.Contains(t, encoded, "+1=1", "Plus signs should be preserved as literals during encode")
373+
374+
// Verify that spaces are properly encoded as %20 (not +)
375+
require.Contains(t, encoded, "%20OR%20", "Spaces should be encoded as %20 in paths")
376+
require.NotContains(t, encoded, "+OR+", "Spaces should NOT be encoded as + in paths")
377+
378+
// Verify that ? is encoded in paths (it has special meaning)
379+
require.Contains(t, encoded, "%3F", "Question mark should be encoded in paths")
380+
381+
// Log the transformation for clarity
382+
t.Logf("Original (mixed encoding): %s", originalPath)
383+
t.Logf("Decoded (human readable): %s", decoded)
384+
t.Logf("Re-encoded (consistent): %s", encoded)
385+
t.Log("✓ Percent-20 properly decoded to spaces")
386+
t.Log("✓ + preserved as literal characters")
387+
t.Log("✓ Spaces re-encoded as percent-20 (not +)")
388+
t.Log("✓ ? encoded as percent-3F (has special meaning in paths)")
389+
}

0 commit comments

Comments
 (0)