@@ -18,7 +18,7 @@ public class Selectable : AbstractSelectable
1818 /// <param name="removeOutboundLinks">是否去除外链</param>
1919 public Selectable ( string html , string url , bool removeOutboundLinks = true )
2020 {
21- HtmlDocument document = new HtmlDocument { OptionAutoCloseOnEnd = true } ;
21+ HtmlDocument document = new HtmlDocument { OptionAutoCloseOnEnd = true } ;
2222 document . LoadHtml ( html ) ;
2323
2424 if ( ! string . IsNullOrWhiteSpace ( url ) )
@@ -32,7 +32,8 @@ public Selectable(string html, string url, bool removeOutboundLinks = true)
3232 RemoveOutboundLinks ( document , domain ) ;
3333 }
3434 }
35- Elements = new List < dynamic > { document . DocumentNode . OuterHtml } ;
35+
36+ Elements = new List < dynamic > { document . DocumentNode . OuterHtml } ;
3637 }
3738
3839 /// <summary>
@@ -41,7 +42,7 @@ public Selectable(string html, string url, bool removeOutboundLinks = true)
4142 /// <param name="json">Json</param>
4243 public Selectable ( string json )
4344 {
44- Elements = new List < dynamic > { json } ;
45+ Elements = new List < dynamic > { json } ;
4546 }
4647
4748 /// <summary>
@@ -86,29 +87,29 @@ public override dynamic Environment(string field)
8687 switch ( key )
8788 {
8889 case "now" :
89- {
90- return DateTime . Now . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
91- }
90+ {
91+ return DateTime . Now . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
92+ }
9293 case "monday" :
93- {
94- var now = DateTime . Now ;
95- int i = now . DayOfWeek - DayOfWeek . Monday == - 1 ? 6 : - 1 ;
96- TimeSpan ts = new TimeSpan ( i , 0 , 0 , 0 ) ;
97- return now . Subtract ( ts ) . Date . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
98- }
94+ {
95+ var now = DateTime . Now ;
96+ int i = now . DayOfWeek - DayOfWeek . Monday == - 1 ? 6 : - 1 ;
97+ TimeSpan ts = new TimeSpan ( i , 0 , 0 , 0 ) ;
98+ return now . Subtract ( ts ) . Date . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
99+ }
99100 case "today" :
100- {
101- return DateTime . Now . Date . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
102- }
101+ {
102+ return DateTime . Now . Date . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
103+ }
103104 case "monthly" :
104- {
105- var now = DateTime . Now ;
106- return now . AddDays ( now . Day * - 1 + 1 ) . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
107- }
105+ {
106+ var now = DateTime . Now ;
107+ return now . AddDays ( now . Day * - 1 + 1 ) . ToString ( "yyyy/MM/dd hh:mm:ss" ) ;
108+ }
108109 default :
109- {
110- return Properties . ContainsKey ( field ) ? Properties [ field ] : null ;
111- }
110+ {
111+ return Properties . ContainsKey ( field ) ? Properties [ field ] : null ;
112+ }
112113 }
113114 }
114115
@@ -128,13 +129,15 @@ public override ISelectable Links()
128129 results . Add ( link ) ;
129130 }
130131 }
132+
131133 foreach ( var link in sourceLinks )
132134 {
133135 if ( Uri . TryCreate ( link , UriKind . RelativeOrAbsolute , out _ ) )
134136 {
135137 results . Add ( link ) ;
136138 }
137139 }
140+
138141 return new Selectable ( results . ToList ( ) ) ;
139142 }
140143
@@ -166,8 +169,10 @@ public override ISelectable Select(ISelector selector)
166169 results . Add ( result ) ;
167170 }
168171 }
172+
169173 return new Selectable ( results ) ;
170174 }
175+
171176 throw new ExtractionException ( $ "{ nameof ( selector ) } is null.") ;
172177 }
173178
@@ -189,6 +194,7 @@ public override ISelectable SelectList(ISelector selector)
189194 results . AddRange ( result ) ;
190195 }
191196 }
197+
192198 return new Selectable ( results ) ;
193199 }
194200
@@ -204,8 +210,9 @@ public override IEnumerable<ISelectable> Nodes()
204210 List < ISelectable > result = new List < ISelectable > ( ) ;
205211 foreach ( var element in Elements )
206212 {
207- result . Add ( new Selectable ( new List < dynamic > ( ) { element } ) ) ;
213+ result . Add ( new Selectable ( new List < dynamic > ( ) { element } ) ) ;
208214 }
215+
209216 return result ;
210217 }
211218
@@ -242,26 +249,28 @@ public static string CanonicalizeUrl(string url, string refer)
242249
243250 private void FixAllRelativeHref ( HtmlDocument document , string url )
244251 {
245- var nodes = document . DocumentNode . SelectNodes ( "//a[not(starts-with( @href,'http') or starts-with(@href,'https'))] " ) ;
246- if ( nodes != null )
252+ var hrefNodes = document . DocumentNode . SelectNodes ( ".// @href" ) ;
253+ if ( hrefNodes != null )
247254 {
248- foreach ( var node in nodes )
255+ foreach ( var node in hrefNodes )
249256 {
250- if ( node . Attributes [ "href" ] != null )
257+ var href = node . Attributes [ "href" ] . Value ;
258+ if ( ! string . IsNullOrWhiteSpace ( href ) && ! href . Contains ( "http" ) && ! href . Contains ( "https" ) )
251259 {
252- node . Attributes [ "href" ] . Value = CanonicalizeUrl ( node . Attributes [ " href" ] . Value , url ) ;
260+ node . Attributes [ "href" ] . Value = CanonicalizeUrl ( href , url ) ;
253261 }
254262 }
255263 }
256264
257- var images = document . DocumentNode . SelectNodes ( ".//img " ) ;
258- if ( images != null )
265+ var srcNodes = document . DocumentNode . SelectNodes ( ".//@src " ) ;
266+ if ( srcNodes != null )
259267 {
260- foreach ( var image in images )
268+ foreach ( var node in srcNodes )
261269 {
262- if ( image . Attributes [ "src" ] != null )
270+ var src = node . Attributes [ "src" ] . Value ;
271+ if ( ! string . IsNullOrWhiteSpace ( src ) && ! src . Contains ( "http" ) && ! src . Contains ( "https" ) )
263272 {
264- image . Attributes [ "src" ] . Value = CanonicalizeUrl ( image . Attributes [ " src" ] . Value , url ) ;
273+ node . Attributes [ "src" ] . Value = CanonicalizeUrl ( src , url ) ;
265274 }
266275 }
267276 }
@@ -279,17 +288,20 @@ private void RemoveOutboundLinks(HtmlDocument document, params string[] domains)
279288 foreach ( var domain in domains )
280289 {
281290 var href = node . Attributes [ "href" ] ? . Value ;
282- if ( ! string . IsNullOrWhiteSpace ( href ) && System . Text . RegularExpressions . Regex . IsMatch ( href , domain ) )
291+ if ( ! string . IsNullOrWhiteSpace ( href ) &&
292+ System . Text . RegularExpressions . Regex . IsMatch ( href , domain ) )
283293 {
284294 isMatch = true ;
285295 break ;
286296 }
287297 }
298+
288299 if ( ! isMatch )
289300 {
290301 deleteNodes . Add ( node ) ;
291302 }
292303 }
304+
293305 foreach ( var node in deleteNodes )
294306 {
295307 node . Remove ( ) ;
0 commit comments