Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ type Options struct {
OutputMatchContentLength string
OutputFilterStatusCode string
OutputFilterErrorPage bool
FilterOutDuplicates bool
OutputFilterContentLength string
InputRawRequest string
rawRequest string
Expand Down Expand Up @@ -409,6 +410,7 @@ func ParseOptions() *Options {
flagSet.CreateGroup("filters", "Filters",
flagSet.StringVarP(&options.OutputFilterStatusCode, "filter-code", "fc", "", "filter response with specified status code (-fc 403,401)"),
flagSet.BoolVarP(&options.OutputFilterErrorPage, "filter-error-page", "fep", false, "filter response with ML based error page detection"),
flagSet.BoolVarP(&options.FilterOutDuplicates, "filter-duplicates", "fd", false, "filter out near-duplicate responses (only first response is retained)"),
flagSet.StringVarP(&options.OutputFilterContentLength, "filter-length", "fl", "", "filter response with specified content length (-fl 23,33)"),
flagSet.StringVarP(&options.OutputFilterLinesCount, "filter-line-count", "flc", "", "filter response body with specified line count (-flc 423,532)"),
flagSet.StringVarP(&options.OutputFilterWordsCount, "filter-word-count", "fwc", "", "filter response body with specified word count (-fwc 423,532)"),
Expand Down
44 changes: 37 additions & 7 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/PuerkitoBio/goquery"
"github.com/corona10/goimagehash"
"github.com/mfonda/simhash"
asnmap "github.com/projectdiscovery/asnmap/libs"
"github.com/projectdiscovery/fastdialer/fastdialer"
"github.com/projectdiscovery/httpx/common/customextract"
Expand Down Expand Up @@ -65,6 +66,7 @@ import (
"github.com/projectdiscovery/httpx/common/stringz"
"github.com/projectdiscovery/mapcidr"
"github.com/projectdiscovery/rawhttp"
converstionutil "github.com/projectdiscovery/utils/conversion"
fileutil "github.com/projectdiscovery/utils/file"
pdhttputil "github.com/projectdiscovery/utils/http"
iputil "github.com/projectdiscovery/utils/ip"
Expand All @@ -86,6 +88,7 @@ type Runner struct {
browser *Browser
errorPageClassifier *errorpageclassifier.ErrorPageClassifier
pHashClusters []pHashCluster
simHashes gcache.Cache[uint64, struct{}]
httpApiEndpoint *Server
}

Expand Down Expand Up @@ -359,6 +362,7 @@ func New(options *Options) (*Runner, error) {
}

runner.errorPageClassifier = errorpageclassifier.New()
runner.simHashes = gcache.New[uint64, struct{}](1000).ARC().Build()

if options.HttpApiEndpoint != "" {
apiServer := NewServer(options.HttpApiEndpoint, options)
Expand Down Expand Up @@ -438,7 +442,7 @@ func (r *Runner) prepareInput() {
// check if input target host(s) have been provided
if len(r.options.InputTargetHost) > 0 {
for _, target := range r.options.InputTargetHost {
expandedTarget := r.countTargetFromRawTarget(target)
expandedTarget, _ := r.countTargetFromRawTarget(target)
if expandedTarget > 0 {
numHosts += expandedTarget
r.hm.Set(target, nil) //nolint
Expand Down Expand Up @@ -514,6 +518,24 @@ func (r *Runner) seen(k string) bool {
return ok
}

func (r *Runner) duplicate(result *Result) bool {
respSimHash := simhash.Simhash(simhash.NewWordFeatureSet(converstionutil.Bytes(result.Raw)))
if r.simHashes.Has(respSimHash) {
gologger.Debug().Msgf("Skipping duplicate response with simhash %d for URL %s\n", respSimHash, result.URL)
return true
}

for simHash := range r.simHashes.GetALL(false) {
// lower threshold for increased precision
if simhash.Compare(simHash, respSimHash) <= 3 {
gologger.Debug().Msgf("Skipping near-duplicate response with simhash %d for URL %s\n", respSimHash, result.URL)
return true
}
}
_ = r.simHashes.Set(respSimHash, struct{}{})
return false
}

func (r *Runner) testAndSet(k string) bool {
// skip empty lines
k = strings.TrimSpace(k)
Expand Down Expand Up @@ -581,7 +603,7 @@ func (r *Runner) loadAndCloseFile(finput *os.File) (numTargets int, err error) {
for scanner.Scan() {
target := strings.TrimSpace(scanner.Text())
// Used just to get the exact number of targets
expandedTarget := r.countTargetFromRawTarget(target)
expandedTarget, _ := r.countTargetFromRawTarget(target)
if expandedTarget > 0 {
numTargets += expandedTarget
r.hm.Set(target, nil) //nolint
Expand All @@ -591,12 +613,12 @@ func (r *Runner) loadAndCloseFile(finput *os.File) (numTargets int, err error) {
return numTargets, err
}

func (r *Runner) countTargetFromRawTarget(rawTarget string) (numTargets int) {
func (r *Runner) countTargetFromRawTarget(rawTarget string) (numTargets int, err error) {
if rawTarget == "" {
return 0
return 0, nil
}
if _, ok := r.hm.Get(rawTarget); ok {
return 0
return 0, nil
}

expandedTarget := 0
Expand All @@ -606,14 +628,17 @@ func (r *Runner) countTargetFromRawTarget(rawTarget string) (numTargets int) {
expandedTarget = int(ipsCount)
}
case asn.IsASN(rawTarget):
cidrs, _ := asn.GetCIDRsForASNNum(rawTarget)
cidrs, err := asn.GetCIDRsForASNNum(rawTarget)
if err != nil {
return 0, err
}
for _, cidr := range cidrs {
expandedTarget += int(mapcidr.AddressCountIpnet(cidr))
}
default:
expandedTarget = 1
}
return expandedTarget
return expandedTarget, nil
}

var (
Expand Down Expand Up @@ -884,6 +909,11 @@ func (r *Runner) RunEnumeration() {
logFilteredErrorPage(r.options.OutputFilterErrorPagePath, resp.URL)
continue
}

if r.options.FilterOutDuplicates && r.duplicate(&resp) {
continue
}

if len(r.options.filterStatusCode) > 0 && sliceutil.Contains(r.options.filterStatusCode, resp.StatusCode) {
continue
}
Expand Down
37 changes: 27 additions & 10 deletions runner/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (

_ "github.com/projectdiscovery/fdmax/autofdmax"
"github.com/projectdiscovery/httpx/common/httpx"
"github.com/projectdiscovery/mapcidr/asn"
stringsutil "github.com/projectdiscovery/utils/strings"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -106,11 +108,17 @@ func TestRunner_asn_targets(t *testing.T) {
for _, ip := range ips {
expected = append(expected, httpx.Target{Host: ip})
}

if _, err := asn.GetIPAddressesAsStream(input); err != nil && stringsutil.ContainsAnyI(err.Error(), "unauthorized: 401") {
t.Skip("skipping asn test due to missing/invalid api key")
return
}

got := []httpx.Target{}
for target := range r.targets(r.hp, input) {
got = append(got, target)
}
require.ElementsMatch(t, expected, got, "could not exepcted output")
require.ElementsMatch(t, expected, got, "could not get expected output")
}

func TestRunner_countTargetFromRawTarget(t *testing.T) {
Expand All @@ -120,32 +128,41 @@ func TestRunner_countTargetFromRawTarget(t *testing.T) {

input := "example.com"
expected := 1
got := r.countTargetFromRawTarget(input)
got, err := r.countTargetFromRawTarget(input)
require.Nil(t, err, "could not count targets")
require.Equal(t, expected, got, "got wrong output")

input = "example.com"
expected = 0
err = r.hm.Set(input, nil)
require.Nil(t, err, "could not set value to hm")
got = r.countTargetFromRawTarget(input)
got, err = r.countTargetFromRawTarget(input)
require.Nil(t, err, "could not count targets")
require.Equal(t, expected, got, "got wrong output")

input = "173.0.84.0/24"
expected = 256
got, err = r.countTargetFromRawTarget(input)
require.Nil(t, err, "could not count targets")
require.Equal(t, expected, got, "got wrong output")

input = ""
expected = 0
got = r.countTargetFromRawTarget(input)
got, err = r.countTargetFromRawTarget(input)
require.Nil(t, err, "could not count targets")
require.Equal(t, expected, got, "got wrong output")

if os.Getenv("PDCP_API_KEY") != "" {
input = "AS14421"
expected = 256
got = r.countTargetFromRawTarget(input)
got, err = r.countTargetFromRawTarget(input)
if err != nil && stringsutil.ContainsAnyI(err.Error(), "unauthorized: 401") {
t.Skip("skipping asn test due to missing/invalid api key")
return
}
require.Nil(t, err, "could not count targets")
require.Equal(t, expected, got, "got wrong output")
}

input = "173.0.84.0/24"
expected = 256
got = r.countTargetFromRawTarget(input)
require.Equal(t, expected, got, "got wrong output")
}

func TestRunner_urlWithComma_targets(t *testing.T) {
Expand Down