Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ type Options struct {
OutputMatchContentLength string
OutputFilterStatusCode string
OutputFilterErrorPage bool
FilterOutDuplicates bool
OutputFilterContentLength string
InputRawRequest string
rawRequest string
Expand Down Expand Up @@ -409,6 +410,7 @@ func ParseOptions() *Options {
flagSet.CreateGroup("filters", "Filters",
flagSet.StringVarP(&options.OutputFilterStatusCode, "filter-code", "fc", "", "filter response with specified status code (-fc 403,401)"),
flagSet.BoolVarP(&options.OutputFilterErrorPage, "filter-error-page", "fep", false, "filter response with ML based error page detection"),
flagSet.BoolVarP(&options.FilterOutDuplicates, "filter-duplicates", "fd", false, "filter out near-duplicate responses"),
flagSet.StringVarP(&options.OutputFilterContentLength, "filter-length", "fl", "", "filter response with specified content length (-fl 23,33)"),
flagSet.StringVarP(&options.OutputFilterLinesCount, "filter-line-count", "flc", "", "filter response body with specified line count (-flc 423,532)"),
flagSet.StringVarP(&options.OutputFilterWordsCount, "filter-word-count", "fwc", "", "filter response body with specified word count (-fwc 423,532)"),
Expand Down
25 changes: 25 additions & 0 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/PuerkitoBio/goquery"
"github.com/corona10/goimagehash"
"github.com/mfonda/simhash"
asnmap "github.com/projectdiscovery/asnmap/libs"
"github.com/projectdiscovery/fastdialer/fastdialer"
"github.com/projectdiscovery/httpx/common/customextract"
Expand Down Expand Up @@ -86,6 +87,7 @@ type Runner struct {
browser *Browser
errorPageClassifier *errorpageclassifier.ErrorPageClassifier
pHashClusters []pHashCluster
simHashes map[uint64]struct{}
httpApiEndpoint *Server
}

Expand Down Expand Up @@ -359,6 +361,7 @@ func New(options *Options) (*Runner, error) {
}

runner.errorPageClassifier = errorpageclassifier.New()
runner.simHashes = make(map[uint64]struct{})

if options.HttpApiEndpoint != "" {
apiServer := NewServer(options.HttpApiEndpoint, options)
Expand Down Expand Up @@ -514,6 +517,23 @@ func (r *Runner) seen(k string) bool {
return ok
}

func (r *Runner) duplicate(resp []byte) bool {
respSimHash := simhash.Simhash(simhash.NewWordFeatureSet(resp))
if _, exists := r.simHashes[respSimHash]; exists {
gologger.Warning().Msgf("Skipping duplicate response with simhash %d\n", respSimHash)
return true
}
for simHash := range r.simHashes {
// lower threshold for increased precision
if simhash.Compare(simHash, respSimHash) <= 3 {
gologger.Warning().Msgf("Skipping near-duplicate response with simhash %d\n", respSimHash)
return true
}
}
r.simHashes[respSimHash] = struct{}{}
return false
}

func (r *Runner) testAndSet(k string) bool {
// skip empty lines
k = strings.TrimSpace(k)
Expand Down Expand Up @@ -884,6 +904,11 @@ func (r *Runner) RunEnumeration() {
logFilteredErrorPage(r.options.OutputFilterErrorPagePath, resp.URL)
continue
}

if r.options.FilterOutDuplicates && r.duplicate(resp.Response.Data) {
continue
}

if len(r.options.filterStatusCode) > 0 && sliceutil.Contains(r.options.filterStatusCode, resp.StatusCode) {
continue
}
Expand Down