Skip to content

Commit 13bec55

Browse files
author
Chris Stockton
committed
fix: tighten email validation rules
The goal here is to limit the conditions which resolver implementations can affect the determinism of our DNS checks, without allowing transient DNS failures to block signups: * Reject single label email domains (`a@a`, `a@gmail`) * Use absolute FQDN for DNS lookups to avoid implicit search behavior * Preserves the RFC 5321 fallback, but narrows when it is called * Add a whitelist for major email providers to lower latency * Reject mutated display name address that the mail parser might accept * Add test coverage for some corner cases
1 parent c553b10 commit 13bec55

File tree

2 files changed

+130
-9
lines changed

2 files changed

+130
-9
lines changed

internal/mailer/validateclient/validateclient.go

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,53 @@ var invalidHostMap = map[string]bool{
5656

5757
// Hundreds of typos per day for this.
5858
"gamil.com": true,
59+
"gamai.com": true,
5960

6061
// These are not email providers, but people often use them.
6162
"anonymous.com": true,
6263
"email.com": true,
6364
}
6465

66+
// We skip checking hosts for some of the biggest well known public email
67+
// providers.
68+
//
69+
// Generated via:
70+
//
71+
// test -f "gmass.html" \
72+
// || wget -O "gmass.html" https://www.gmass.co/domains
73+
// cat "gmass.html" \
74+
// | pup 'div#status-details a json{}' \
75+
// | jq -r 'map([(.text | split(" "))[1], .children[0].text])
76+
// | map("`" + .[0] + ".` : true, // " + .[1]) | join("\n")' \
77+
// | sed 's| emails sent||g' \
78+
// | head -20
79+
//
80+
// Note:
81+
// This only affects the validateHost code, if we have an exact match we don't
82+
// bother to make a dns request.
83+
var hostWhiteList = map[string]bool{
84+
`gmail.com.`: true, // 563,185,814
85+
`yahoo.com.`: true, // 107,413,999
86+
`hotmail.com.`: true, // 98,895,904
87+
`aol.com.`: true, // 31,839,178
88+
`outlook.com.`: true, // 11,826,511
89+
`comcast.net.`: true, // 9,663,112
90+
`icloud.com.`: true, // 9,274,437
91+
`msn.com.`: true, // 7,101,124
92+
`hotmail.co.uk.`: true, // 5,456,609
93+
`sbcglobal.net.`: true, // 5,167,305
94+
`live.com.`: true, // 5,140,589
95+
`yahoo.co.in.`: true, // 4,091,798
96+
`me.com.`: true, // 3,920,969
97+
`att.net.`: true, // 3,688,388
98+
`mail.ru.`: true, // 3,583,276
99+
`bellsouth.net.`: true, // 3,455,683
100+
`rediffmail.com`: true, // 3,400,300
101+
`cox.net.`: true, // 3,254,227
102+
`yahoo.co.uk.`: true, // 3,218,049
103+
`verizon.net.`: true, // 3,046,288
104+
}
105+
65106
const (
66107
validateEmailTimeout = 3 * time.Second
67108
)
@@ -222,6 +263,12 @@ func (ev *emailValidator) validateStatic(email string) (string, error) {
222263
return "", ErrInvalidEmailFormat
223264
}
224265

266+
// The mail package supports RFC 5322 addresses which are not valid for
267+
// signup users (e.g. Chris Stockton <chris.stockton@host...>).
268+
if ea.Address != email {
269+
return "", ErrInvalidEmailFormat
270+
}
271+
225272
i := strings.LastIndex(ea.Address, "@")
226273
if i == -1 {
227274
return "", ErrInvalidEmailFormat
@@ -291,13 +338,16 @@ func (ev *emailValidator) validateService(ctx context.Context, email string) err
291338
return nil
292339
}
293340

341+
// 32 bytes is plenty for the payload: {"valid": true|false}
294342
dec := json.NewDecoder(io.LimitReader(res.Body, 1<<5))
295343
if err := dec.Decode(&resObject); err != nil {
296344
return nil
297345
}
298346

299-
// If the object did not contain a valid key we consider the check as
300-
// failed. We _must_ get a valid JSON response with a "valid" field.
347+
// If the resObject contained no "valid" key we ignore the service and
348+
// return a nil error. If the Valid key is present AND set to true we
349+
// will return a nil error, otherwise the valid key was present & false
350+
// so we fall through to ErrInvalidEmailAddress.
301351
if resObject.Valid == nil || *resObject.Valid {
302352
return nil
303353
}
@@ -320,7 +370,29 @@ func (ev *emailValidator) validateProviders(name, host string) error {
320370
return nil
321371
}
322372

373+
// NOTE(cstockton): We could consider using[1] in the future for an additional
374+
// data point.
375+
//
376+
// [1] https://pkg.go.dev/golang.org/x/net/publicsuffix
323377
func (ev *emailValidator) validateHost(ctx context.Context, host string) error {
378+
379+
// As far as I know there is no such thing as valid single label hosts for
380+
// email. This will block anything like: email@a, email@mycompanygltd and
381+
// so on.
382+
if !strings.Contains(host, ".") {
383+
return ErrInvalidEmailDNS
384+
}
385+
386+
// Require a FQDN to remove possible implict search behavior.
387+
if !strings.HasSuffix(host, ".") {
388+
host = host + "."
389+
}
390+
391+
// If the host is whitelisted skip mx check all together.
392+
if hostWhiteList[host] {
393+
return nil
394+
}
395+
324396
mxs, err := validateEmailResolver.LookupMX(ctx, host)
325397
if !isHostNotFound(err) {
326398
return ev.validateMXRecords(mxs, nil)

internal/mailer/validateclient/validateclient_test.go

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package validateclient
33
import (
44
"context"
55
"fmt"
6+
"net"
67
"net/http"
78
"net/http/httptest"
89
"sync/atomic"
@@ -201,7 +202,10 @@ func TestValidateEmailExtended(t *testing.T) {
201202
// valid (has mx record)
202203
{email: "[email protected]"},
203204
{email: "[email protected]"},
204-
{email: "[email protected]"},
205+
{email: "[email protected]"},
206+
207+
// valid (RFC 5321 fallback, supabase.co has no mx, but valid A)
208+
{email: "[email protected]"},
205209

206210
// bad format
207211
{email: "", err: "invalid_email_format"},
@@ -210,6 +214,25 @@ func TestValidateEmailExtended(t *testing.T) {
210214
{email: "@supabase.io", err: "invalid_email_format"},
211215
{email: "[email protected]", err: "invalid_email_format"},
212216

217+
// invalid providers check doesn't allow short gmails
218+
{email: "[email protected]", err: "invalid_email_address"},
219+
{email: "[email protected]"}, // allow other providers
220+
221+
// ensure the mail parser does not result in mutated addr
222+
{email: "Chris Stockton <[email protected]>",
223+
err: "invalid_email_format"},
224+
225+
// Check dot suffixes are invalid (mutations)
226+
{email: "[email protected].", err: "invalid_email_format"},
227+
{email: "a@", err: "invalid_email_format"},
228+
{email: "a@.", err: "invalid_email_format"},
229+
{email: "a@a.", err: "invalid_email_format"},
230+
{email: "[email protected].", err: "invalid_email_format"},
231+
{email: "[email protected]..", err: "invalid_email_format"},
232+
{email: "[email protected]", err: "invalid_email_format"},
233+
{email: "[email protected].", err: "invalid_email_format"},
234+
{email: "[email protected]", err: "invalid_email_format"},
235+
213236
// invalid: valid mx records, but invalid and often typed
214237
// (invalidEmailMap)
215238
{email: "[email protected]", err: "invalid_email_address"},
@@ -235,23 +258,23 @@ func TestValidateEmailExtended(t *testing.T) {
235258
// valid but not actually valid and typed a lot
236259
{email: "a@invalid", err: "invalid_email_dns"},
237260
{email: "[email protected]", err: "invalid_email_dns"},
238-
{email: "test@invalid", err: "invalid_email_dns"},
239261

240262
// various invalid emails
241263
{email: "[email protected]", err: "invalid_email_dns"},
242264
{email: "[email protected]", err: "invalid_email_dns"},
243265
{email: "[email protected]", err: "invalid_email_dns"},
244-
245-
// test blocked mx records
246-
{email: "[email protected]", err: "invalid_email_mx"},
247-
248266
// this low timeout should simulate a dns timeout, which should
249267
// not be treated as an invalid email.
250268
{email: "[email protected]",
251269
timeout: time.Millisecond},
252270

253271
// likewise for a valid email
254-
{email: "[email protected]", timeout: time.Millisecond},
272+
{email: "[email protected]", timeout: time.Millisecond},
273+
274+
// invalid dns
275+
{email: "a@a", err: "invalid_email_dns"},
276+
{email: "[email protected]", err: "invalid_email_dns"},
277+
{email: "a@abcd", err: "invalid_email_dns"},
255278
}
256279

257280
cfg := conf.MailerConfiguration{
@@ -268,6 +291,7 @@ func TestValidateEmailExtended(t *testing.T) {
268291

269292
ev := newEmailValidator(cfg)
270293

294+
seen := make(map[string]bool)
271295
for idx, tc := range cases {
272296
func(timeout time.Duration) {
273297
if timeout == 0 {
@@ -277,6 +301,11 @@ func TestValidateEmailExtended(t *testing.T) {
277301
ctx, cancel := context.WithTimeout(ctx, timeout)
278302
defer cancel()
279303

304+
if seen[tc.email] {
305+
t.Fatalf("duplicate email %v in test cases", tc.email)
306+
}
307+
seen[tc.email] = true
308+
280309
now := time.Now()
281310
err := ev.Validate(ctx, tc.email)
282311
dur := time.Since(now)
@@ -294,4 +323,24 @@ func TestValidateEmailExtended(t *testing.T) {
294323

295324
}(tc.timeout)
296325
}
326+
327+
// test blocked mx list
328+
{
329+
for _, v := range []string{
330+
"hotmail-com.olc.protection.outlook.com",
331+
"hotmail-com.olc.protection.outlook.com.",
332+
} {
333+
{
334+
err := ev.validateMXRecords([]*net.MX{{Host: v}}, nil)
335+
require.Error(t, err)
336+
require.Contains(t, err.Error(), ErrInvalidEmailMX.Error())
337+
}
338+
339+
{
340+
err := ev.validateMXRecords(nil, []string{v})
341+
require.Error(t, err)
342+
require.Contains(t, err.Error(), ErrInvalidEmailMX.Error())
343+
}
344+
}
345+
}
297346
}

0 commit comments

Comments
 (0)