From ab0d57c1945271d99c2d324e0d382dc92c004e94 Mon Sep 17 00:00:00 2001 From: Emil Guliyev Date: Mon, 13 Aug 2018 17:49:29 -0700 Subject: [PATCH 1/3] Add benchmarks --- publicsuffix/publicsuffix_test.go | 35 +++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/publicsuffix/publicsuffix_test.go b/publicsuffix/publicsuffix_test.go index b85212a..5dd3d9b 100644 --- a/publicsuffix/publicsuffix_test.go +++ b/publicsuffix/publicsuffix_test.go @@ -3,6 +3,8 @@ package publicsuffix import ( "reflect" "testing" + + xlib "golang.org/x/net/publicsuffix" ) func TestNewListFromString(t *testing.T) { @@ -439,3 +441,36 @@ func TestCookieJarList(t *testing.T) { } } } + +var benchmarkTestCases = map[string]string{ + "example.com": "example.com", + "example.id.au": "example.id.au", + "www.ck": "www.ck", + "foo.bar.xn--55qx5d.cn": "bar.xn--55qx5d.cn", + "a.b.c.minami.fukuoka.jp": "c.minami.fukuoka.jp", + "posts-and-telecommunications.museum": "", + "www.example.pvt.k12.ma.us": "example.pvt.k12.ma.us", + "many.lol": "many.lol", + "the.russian.for.moscow.is.xn--80adxhks": "is.xn--80adxhks", + "blah.blah.s3-us-west-1.amazonaws.com": "blah.s3-us-west-1.amazonaws.com", + "thing.dyndns.org": "thing.dyndns.org", + "nosuchtld": "", +} + +func benchmarkDomain(b *testing.B, domainFunc func(string) (string, error)) { + var got string + for i := 0; i < b.N; i++ { + for input := range benchmarkTestCases { + got, _ = domainFunc(input) + } + } + _ = got +} + +func BenchmarkDomain(b *testing.B) { + benchmarkDomain(b, Domain) +} + +func BenchmarkXNet(b *testing.B) { + benchmarkDomain(b, xlib.EffectiveTLDPlusOne) +} From ceaaabb703e762cfa54cead77c3619b938204efd Mon Sep 17 00:00:00 2001 From: Emil Guliyev Date: Mon, 13 Aug 2018 17:49:44 -0700 Subject: [PATCH 2/3] Move to a hash implementation --- cmd/gen/gen.go | 8 +++--- publicsuffix/publicsuffix.go | 44 +++++++++---------------------- publicsuffix/publicsuffix_test.go | 14 +++++----- publicsuffix/rules.go | 4 ++- 4 files changed, 29 insertions(+), 41 deletions(-) diff --git a/cmd/gen/gen.go b/cmd/gen/gen.go index 06da611..a51558e 100644 --- a/cmd/gen/gen.go +++ b/cmd/gen/gen.go @@ -11,6 +11,8 @@ import ( "context" "fmt" "go/format" + "io/ioutil" + "log" "net/http" "strings" "text/template" @@ -18,8 +20,6 @@ import ( "github.com/google/go-github/github" "github.com/weppos/publicsuffix-go/publicsuffix" - "io/ioutil" - "log" ) const ( @@ -39,7 +39,9 @@ func init() { { {{$r.Type}}, "{{$r.Value}}", {{$r.Length}}, {{$r.Private}} }, {{end}} } - DefaultList.rules = r[:] + for i := range r { + DefaultList.AddRule(&r[i]) + } } ` diff --git a/publicsuffix/publicsuffix.go b/publicsuffix/publicsuffix.go index 1efa48b..bcc47eb 100644 --- a/publicsuffix/publicsuffix.go +++ b/publicsuffix/publicsuffix.go @@ -81,12 +81,14 @@ type FindOptions struct { type List struct { // rules is kept private because you should not access rules directly // for lookup optimization the list will not be guaranteed to be a simple slice forever - rules []Rule + rules map[string]*Rule } // NewList creates a new empty list. func NewList() *List { - return &List{} + return &List{ + rules: map[string]*Rule{}, + } } // NewListFromString parses a string that represents a Public Suffix source @@ -132,7 +134,7 @@ func (l *List) LoadFile(path string, options *ParserOption) ([]Rule, error) { // The list may be optimized internally for lookups, therefore the algorithm // will decide the best position for the new rule. func (l *List) AddRule(r *Rule) error { - l.rules = append(l.rules, *r) + l.rules[r.Value] = r return nil } @@ -195,45 +197,25 @@ Scanning: // Find and returns the most appropriate rule for the domain name. func (l *List) Find(name string, options *FindOptions) *Rule { - var bestRule *Rule - if options == nil { options = DefaultFindOptions } - for _, r := range l.selectRules(name, options) { - if r.Type == ExceptionType { - return &r + for { + rule, ok := l.rules[name] + if ok && (!options.IgnorePrivate || !rule.Private) { + return rule } - if bestRule == nil || bestRule.Length < r.Length { - bestRule = &r + i := strings.IndexRune(name, '.') + if i < 0 { + break } - } - - if bestRule != nil { - return bestRule + name = name[i+1:] } return options.DefaultRule } -func (l *List) selectRules(name string, options *FindOptions) []Rule { - var found []Rule - - // In this phase the search is a simple sequential scan - for _, rule := range l.rules { - if !rule.Match(name) { - continue - } - if options.IgnorePrivate && rule.Private { - continue - } - found = append(found, rule) - } - - return found -} - // NewRule parses the rule content, creates and returns a Rule. // // The content of the rule MUST be encoded in ASCII (A-labels). diff --git a/publicsuffix/publicsuffix_test.go b/publicsuffix/publicsuffix_test.go index 5dd3d9b..1683cae 100644 --- a/publicsuffix/publicsuffix_test.go +++ b/publicsuffix/publicsuffix_test.go @@ -44,7 +44,7 @@ blogspot.com testRules = []Rule{} for _, rule := range rules { if rule.Private == false { - testRules = append(testRules, rule) + testRules = append(testRules, *rule) } } if want, got := 2, len(testRules); want != got { @@ -55,7 +55,7 @@ blogspot.com testRules = []Rule{} for _, rule := range rules { if rule.Private == true { - testRules = append(testRules, rule) + testRules = append(testRules, *rule) } } if want, got := 1, len(testRules); want != got { @@ -143,7 +143,7 @@ func TestNewListFromFile(t *testing.T) { testRules = []Rule{} for _, rule := range rules { if rule.Private == false { - testRules = append(testRules, rule) + testRules = append(testRules, *rule) } } if want, got := 2, len(testRules); want != got { @@ -154,7 +154,7 @@ func TestNewListFromFile(t *testing.T) { testRules = []Rule{} for _, rule := range rules { if rule.Private == true { - testRules = append(testRules, rule) + testRules = append(testRules, *rule) } } if want, got := 1, len(testRules); want != got { @@ -175,8 +175,10 @@ func TestListAddRule(t *testing.T) { if list.Size() != 1 { t.Fatalf("List should have 1 rule, got %v", list.Size()) } - if got := &list.rules[0]; !reflect.DeepEqual(rule, got) { - t.Fatalf("List[0] expected to be %v, got %v", rule, got) + for _, got := range list.rules { + if !reflect.DeepEqual(rule, got) { + t.Fatalf("List[0] expected to be %v, got %v", rule, got) + } } } diff --git a/publicsuffix/rules.go b/publicsuffix/rules.go index 43d0726..c24c91b 100644 --- a/publicsuffix/rules.go +++ b/publicsuffix/rules.go @@ -8626,5 +8626,7 @@ func init() { {1, "now.sh", 2, true}, {1, "zone.id", 2, true}, } - DefaultList.rules = r[:] + for i := range r { + DefaultList.AddRule(&r[i]) + } } From fec11fc7d0dc616a381fb5992330c62d9287f5ba Mon Sep 17 00:00:00 2001 From: Emil Guliyev Date: Tue, 14 Aug 2018 01:51:29 -0700 Subject: [PATCH 3/3] Optimize Decompose --- publicsuffix/publicsuffix.go | 107 ++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 53 deletions(-) diff --git a/publicsuffix/publicsuffix.go b/publicsuffix/publicsuffix.go index bcc47eb..a98c904 100644 --- a/publicsuffix/publicsuffix.go +++ b/publicsuffix/publicsuffix.go @@ -11,7 +11,6 @@ import ( "io" "net/http/cookiejar" "os" - "regexp" "strings" "golang.org/x/net/idna" @@ -80,7 +79,6 @@ type FindOptions struct { // List represents a Public Suffix List. type List struct { // rules is kept private because you should not access rules directly - // for lookup optimization the list will not be guaranteed to be a simple slice forever rules map[string]*Rule } @@ -208,12 +206,12 @@ func (l *List) Find(name string, options *FindOptions) *Rule { } i := strings.IndexRune(name, '.') if i < 0 { - break + return options.DefaultRule } name = name[i+1:] } - return options.DefaultRule + return nil } // NewRule parses the rule content, creates and returns a Rule. @@ -291,36 +289,46 @@ func (r *Rule) Match(name string) bool { // Decompose takes a name as input and decomposes it into a tuple of , // according to the rule definition and type. -func (r *Rule) Decompose(name string) [2]string { - var parts []string - +func (r *Rule) Decompose(name string) (result [2]string) { + if r == DefaultRule { + i := strings.LastIndex(name, ".") + if i < 0 { + return + } + result[0], result[1] = name[:i], name[i+1:] + return + } switch r.Type { + case NormalType: + name = strings.TrimSuffix(name, r.Value) + if len(name) == 0 { + return + } + result[0], result[1] = name[:len(name)-1], r.Value case WildcardType: - parts = append([]string{`.*?`}, r.parts()...) - default: - parts = r.parts() - } - - suffix := strings.Join(parts, `\.`) - re := regexp.MustCompile(fmt.Sprintf(`^(.+)\.(%s)$`, suffix)) - - matches := re.FindStringSubmatch(name) - if len(matches) < 3 { - return [2]string{"", ""} - } - - return [2]string{matches[1], matches[2]} -} - -func (r *Rule) parts() []string { - labels := Labels(r.Value) - if r.Type == ExceptionType { - return labels[1:] - } - if r.Type == WildcardType && r.Value == "" { - return []string{} + name := strings.TrimSuffix(name, r.Value) + if len(name) == 0 { + return + } + name = name[:len(name)-1] + i := strings.LastIndex(name, ".") + if i < 0 { + return + } + result[0], result[1] = name[:i], name[i+1:]+"."+r.Value + case ExceptionType: + i := strings.IndexRune(r.Value, '.') + if i < 0 { + return + } + suffix := r.Value[i+1:] + name = strings.TrimSuffix(name, suffix) + if len(name) == 0 { + return + } + result[0], result[1] = name[:len(name)-1], suffix } - return labels + return } // Labels decomposes given domain name into labels, @@ -414,7 +422,6 @@ func DomainFromListWithOptions(l *List, name string, options *FindOptions) (stri if err != nil { return "", err } - return dn.SLD + "." + dn.TLD, nil } @@ -440,12 +447,22 @@ func ParseFromListWithOptions(l *List, name string, options *FindOptions) (*Doma } r := l.Find(n, options) - if tld := r.Decompose(n)[1]; tld == "" { + parts := r.Decompose(n) + left, tld := parts[0], parts[1] + if tld == "" { return nil, fmt.Errorf("%s is a suffix", n) } - dn := &DomainName{Rule: r} - dn.TLD, dn.SLD, dn.TRD = decompose(r, n) + dn := &DomainName{ + Rule: r, + TLD: tld, + } + if i := strings.LastIndex(left, "."); i < 0 { + dn.SLD = left + } else { + dn.TRD = left[:i] + dn.SLD = left[i+1:] + } return dn, nil } @@ -453,31 +470,15 @@ func normalize(name string) (string, error) { ret := strings.ToLower(name) if ret == "" { - return "", fmt.Errorf("Name is blank") + return "", fmt.Errorf("name is blank") } if ret[0] == '.' { - return "", fmt.Errorf("Name %s starts with a dot", ret) + return "", fmt.Errorf("name %s starts with a dot", ret) } return ret, nil } -func decompose(r *Rule, name string) (tld, sld, trd string) { - parts := r.Decompose(name) - left, tld := parts[0], parts[1] - - dot := strings.LastIndex(left, ".") - if dot == -1 { - sld = left - trd = "" - } else { - sld = left[dot+1:] - trd = left[0:dot] - } - - return -} - // ToASCII is a wrapper for idna.ToASCII. // // This wrapper exists because idna.ToASCII backward-compatibility was broken twice in few months