Skip to content

Commit 72befdc

Browse files
committed
Also unfold and split lazily
This has little effect on the specs, but on my header reading example it makes about a 10x performance difference, finally bringing it within one order of magnitude of the "fast hacky solution" at https://gist.github.com/5901bbd810c08ed3d0b1
1 parent 2bd6dac commit 72befdc

File tree

4 files changed

+24
-23
lines changed

4 files changed

+24
-23
lines changed

lib/mail/field.rb

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ def initialize(name, value = nil, charset = 'utf-8')
115115
case
116116
when name =~ /:/ # Field.new("field-name: field data")
117117
@charset = value.blank? ? charset : value
118-
@name, @value = split(name)
118+
@name = name[FIELD_PREFIX]
119+
@raw_value = name
119120
when name !~ /:/ && value.blank? # Field.new("field-name")
120121
@name = name
121122
@value = nil
@@ -125,19 +126,20 @@ def initialize(name, value = nil, charset = 'utf-8')
125126
@value = value
126127
@charset = charset
127128
end
128-
return self
129+
@name = FIELD_NAME_MAP[@name.to_s.downcase] || @name
129130
end
130131

131132
def field=(value)
132133
@field = value
133134
end
134135

135136
def field
137+
_, @value = split(@raw_value) if @raw_value && !@value
136138
@field ||= create_field(@name, @value, @charset)
137139
end
138140

139141
def name
140-
FIELD_NAME_MAP[@name.to_s.downcase] || @name
142+
@name
141143
end
142144

143145
def value
@@ -198,7 +200,21 @@ def split(raw_field)
198200
STDERR.puts "WARNING: Could not parse (and so ignoring) '#{raw_field}'"
199201
end
200202

203+
# 2.2.3. Long Header Fields
204+
#
205+
# The process of moving from this folded multiple-line representation
206+
# of a header field to its single line representation is called
207+
# "unfolding". Unfolding is accomplished by simply removing any CRLF
208+
# that is immediately followed by WSP. Each header field should be
209+
# treated in its unfolded form for further syntactic and semantic
210+
# evaluation.
211+
def unfold(string)
212+
string.gsub(/[\r\n \t]+/m, ' ')
213+
end
214+
201215
def create_field(name, value, charset)
216+
value = unfold(value) if value.is_a?(String) || value.is_a?(Mail::Multibyte::Chars)
217+
202218
begin
203219
new_field(name, value, charset)
204220
rescue Mail::Field::ParseError => e

lib/mail/header.rb

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -244,27 +244,10 @@ def raw_source=(val)
244244
@raw_source = val
245245
end
246246

247-
# 2.2.3. Long Header Fields
248-
#
249-
# The process of moving from this folded multiple-line representation
250-
# of a header field to its single line representation is called
251-
# "unfolding". Unfolding is accomplished by simply removing any CRLF
252-
# that is immediately followed by WSP. Each header field should be
253-
# treated in its unfolded form for further syntactic and semantic
254-
# evaluation.
255-
def unfold(string)
256-
string.gsub(/#{CRLF}#{WSP}+/, ' ').gsub(/#{WSP}+/, ' ')
257-
end
258-
259-
# Returns the header with all the folds removed
260-
def unfolded_header
261-
@unfolded_header ||= unfold(raw_source)
262-
end
263-
264247
# Splits an unfolded and line break cleaned header into individual field
265248
# strings.
266249
def split_header
267-
self.fields = unfolded_header.split(CRLF)
250+
self.fields = raw_source.split(HEADER_SPLIT)
268251
end
269252

270253
def select_field_for(name)

lib/mail/patterns.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ module Patterns
2020
FWS = /#{CRLF}#{WSP}*/
2121
TEXT = /[#{text}]/ # + obs-text
2222
FIELD_NAME = /[#{field_name}]+/
23-
FIELD_BODY = /.+/
23+
FIELD_PREFIX = /\A(#{FIELD_NAME})/
24+
FIELD_BODY = /.+/m
2425
FIELD_LINE = /^[#{field_name}]+:\s*.+$/
2526
FIELD_SPLIT = /^(#{FIELD_NAME})\s*:\s*(#{FIELD_BODY})?$/
2627
HEADER_LINE = /^([#{field_name}]+:\s*.+)$/
28+
HEADER_SPLIT = /#{CRLF}(?!#{WSP})/
2729

2830
QP_UNSAFE = /[^#{qp_safe}]/
2931
QP_SAFE = /[#{qp_safe}]/

spec/mail/message_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def basic_email
109109

110110
it "should raise a warning (and keep parsing) on having an incorrectly formatted header" do
111111
STDERR.should_receive(:puts).with("WARNING: Could not parse (and so ignoring) 'quite Delivered-To: [email protected]'")
112-
Mail.read(fixture('emails', 'plain_emails', 'raw_email_incorrect_header.eml'))
112+
Mail.read(fixture('emails', 'plain_emails', 'raw_email_incorrect_header.eml')).to_s
113113
end
114114

115115
it "should read in an email message and basically parse it" do

0 commit comments

Comments
 (0)