Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,72 @@ private async Task<IList<RealEstateAdPost>> GetAdsDirectlyAsync()
throw new RealEstateAdsPortalException($"({Name}): Error getting latest ads: {ex.Message}", ex);
}
}

/// <summary>
/// Parses a price value from a node's inner text by removing all non-numeric characters.
/// </summary>
/// <param name="node">The HTML node containing the price information</param>
/// <param name="xpath">XPath to select the specific node containing the price</param>
/// <returns>The parsed decimal price, or decimal.Zero if parsing fails</returns>
protected static decimal ParsePriceFromNode(HtmlNode node, string xpath)
{
var value = node.SelectSingleNode(xpath)?.InnerText;
if (value is null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
}

/// <summary>
/// Parses layout information from text using regex pattern matching.
/// </summary>
/// <param name="text">The text to parse for layout information</param>
/// <returns>The parsed Layout enum value, or Layout.NotSpecified if not found</returns>
protected static Layout ParseLayoutFromText(string text)
{
var result = RegexMatchers.Layout().Match(text);
if (!result.Success)
return Layout.NotSpecified;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
}

/// <summary>
/// Parses floor area from text using regex pattern matching.
/// </summary>
/// <param name="text">The text to parse for floor area information</param>
/// <returns>The parsed decimal floor area, or decimal.Zero if not found</returns>
protected static decimal ParseFloorAreaFromText(string text)
{
var result = RegexMatchers.FloorArea().Match(text);
if (!result.Success)
return decimal.Zero;

var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;

return decimal.TryParse(floorAreaValue, out var floorArea)
? floorArea
: decimal.Zero;
}

/// <summary>
/// Returns the price comment when the price is zero, otherwise returns null.
/// </summary>
/// <param name="price">The price value to check</param>
/// <param name="node">The HTML node containing the price comment</param>
/// <param name="xpath">XPath to select the specific node containing the price comment</param>
/// <returns>The price comment text if price is zero, otherwise null</returns>
protected static string? GetPriceCommentWhenZero(decimal price, HtmlNode node, string xpath)
{
return price is decimal.Zero
? node.SelectSingleNode(xpath)?.InnerText?.Trim()
: null;
}
}
77 changes: 40 additions & 37 deletions Portals/RealEstatesWatcher.AdsPortals.BazosCz/BazosCzAdsPortal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,60 +77,63 @@ private static Uri ParseWebUrl(HtmlNode node, string rootHost)

private static decimal ParsePrice(HtmlNode node)
{
var value = node.SelectSingleNode(@"./div[@class=""inzeratycena""]")?.InnerText;
if (value is null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
return ParsePriceFromNode(node, @"./div[@class=""inzeratycena""]");
}

private static decimal ParseFloorArea(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.FloorArea().Match(value);
if (!result.Success)
var floorArea = ParseFloorAreaFromText(value);
if (floorArea != decimal.Zero)
{
value = ParseText(node);
result = RegexMatchers.FloorArea().Match(value);
if (!result.Success)
return decimal.Zero;
// Handle special number format with dots and commas
var result = RegexMatchers.FloorArea().Match(value);
if (result.Success)
{
var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
floorAreaValue = floorAreaValue.Replace(".", ",")
.Replace(" ", string.Empty)
.Trim(',');

return decimal.TryParse(floorAreaValue, NumberStyles.AllowDecimalPoint, new NumberFormatInfo{ NumberDecimalSeparator = ","}, out var parsedFloorArea)
? parsedFloorArea
: decimal.Zero;
}
return floorArea;
}

var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
floorAreaValue = floorAreaValue.Replace(".", ",")
.Replace(" ", string.Empty)
.Trim(',');
value = ParseText(node);
var result2 = RegexMatchers.FloorArea().Match(value);
if (result2.Success)
{
var floorAreaValue = result2.Groups.Skip<Group>(1).First(group => group.Success).Value;
floorAreaValue = floorAreaValue.Replace(".", ",")
.Replace(" ", string.Empty)
.Trim(',');

return decimal.TryParse(floorAreaValue, NumberStyles.AllowDecimalPoint, new NumberFormatInfo{ NumberDecimalSeparator = ","}, out var parsedFloorArea)
? parsedFloorArea
: decimal.Zero;
}

return decimal.TryParse(floorAreaValue, NumberStyles.AllowDecimalPoint, new NumberFormatInfo{ NumberDecimalSeparator = ","}, out var floorArea)
? floorArea
: decimal.Zero;
return decimal.Zero;
}

private static string? ParsePriceComment(HtmlNode node) => ParsePrice(node) is decimal.Zero
? node.SelectSingleNode("./div[@class=\"inzeratycena\"]")?.InnerText?.Trim()
: null;
private static string? ParsePriceComment(HtmlNode node)
{
return GetPriceCommentWhenZero(ParsePrice(node), node, "./div[@class=\"inzeratycena\"]");
}

private static Layout ParseLayout(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.Layout().Match(value);
if (!result.Success)
{
value = ParseText(node);
result = RegexMatchers.Layout().Match(value);
if (!result.Success)
return Layout.NotSpecified;
}

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);
var layout = ParseLayoutFromText(value);
if (layout != Layout.NotSpecified)
return layout;

return LayoutExtensions.ToLayout(layoutValue);
value = ParseText(node);
return ParseLayoutFromText(value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,15 +50,7 @@ private static string ParseTitle(HtmlNode node)

private static decimal ParsePrice(HtmlNode node)
{
var value = node.SelectSingleNode(".//span[contains(@class,'propertyPriceAmount')]")?.InnerText;
if (value is null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
return ParsePriceFromNode(node, ".//span[contains(@class,'propertyPriceAmount')]");
}

private static decimal ParseAdditionalFees(HtmlNode node)
Expand All @@ -84,15 +76,7 @@ private static Layout ParseLayout(HtmlNode node)
return Layout.NotSpecified;

var value = HttpUtility.HtmlDecode(values[0].InnerText);

var result = RegexMatchers.Layout().Match(value);
if (!result.Success)
return Layout.NotSpecified;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
return ParseLayoutFromText(value);
}

private static decimal ParseFloorArea(HtmlNode node)
Expand All @@ -102,16 +86,7 @@ private static decimal ParseFloorArea(HtmlNode node)
return decimal.Zero;

var value = HttpUtility.HtmlDecode(values[^1].InnerText);

var result = RegexMatchers.FloorArea().Match(value);
if (!result.Success)
return decimal.Zero;

var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;

return decimal.TryParse(floorAreaValue, out var floorArea)
? floorArea
: decimal.Zero;
return ParseFloorAreaFromText(value);
}

private static Uri? ParseImageUrl(HtmlNode node)
Expand Down
21 changes: 2 additions & 19 deletions Portals/RealEstatesWatcher.AdsPortals.BidliCz/BidliCzAdsPortal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,7 @@ private static decimal ParsePrice(HtmlNode node)
private static Layout ParseLayout(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.Layout().Match(value);
if (!result.Success)
return Layout.NotSpecified;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
return ParseLayoutFromText(value);
}

private static string ParseAddress(HtmlNode node) => node.SelectSingleNode(".//span[@class=\"adresa\"]").InnerText;
Expand All @@ -98,16 +90,7 @@ private static Uri ParseWebUrl(HtmlNode node, string rootHost)
private static decimal ParseFloorArea(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.FloorArea().Match(value);
if (!result.Success)
return decimal.Zero;

var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;

return decimal.TryParse(floorAreaValue, out var floorArea)
? floorArea
: decimal.Zero;
return ParseFloorAreaFromText(value);
}

private static Uri? ParseImageUrl(HtmlNode node, string rootHost)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,7 @@ public class BravisCzAdsPortal(string watchedUrl,

private static decimal ParsePrice(HtmlNode node)
{
var value = node.SelectSingleNode(".//strong[@class='price']")?.FirstChild?.InnerText;
if (value == null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
return ParsePriceFromNode(node, ".//strong[@class='price']");
}

private static decimal ParseAdditionalFees(HtmlNode node)
Expand Down Expand Up @@ -74,15 +66,7 @@ private static decimal ParseAdditionalFees(HtmlNode node)
private static Layout ParseLayout(HtmlNode node)
{
var value = node.SelectSingleNode(".//ul[@class='params']/li[contains(text(),\"Typ\")]").InnerText;

var result = RegexMatchers.Layout().Match(value);
if (!result.Success)
return Layout.NotSpecified;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
return ParseLayoutFromText(value);
}

private static string ParseAddress(HtmlNode node) => node.SelectSingleNode(".//em[@class=\"location\"]").InnerText;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,38 +38,24 @@ public partial class CeskeRealityCzAdsPortal(string watchedUrl,

private static decimal ParsePrice(HtmlNode node)
{
var value = node.SelectSingleNode(".//h3[@class='i-estate__footer-price-value']")?.InnerText;
if (value is null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
return ParsePriceFromNode(node, ".//h3[@class='i-estate__footer-price-value']");
}

private static string? ParsePriceComment(HtmlNode node) => ParsePrice(node) is decimal.Zero
? node.SelectSingleNode(".//h3[@class='i-estate__footer-price-value']")?.InnerText?.Trim()
: null;
private static string? ParsePriceComment(HtmlNode node)
{
return GetPriceCommentWhenZero(ParsePrice(node), node, ".//h3[@class='i-estate__footer-price-value']");
}

private static Layout ParseLayout(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.Layout().Match(value);
if (!result.Success)
{
value = ParseText(node);
result = RegexMatchers.Layout().Match(value);
if (!result.Success)
return Layout.NotSpecified;
}
var layout = ParseLayoutFromText(value);
if (layout != Layout.NotSpecified)
return layout;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
value = ParseText(node);
return ParseLayoutFromText(value);
}

private static string ParseAddress(HtmlNode node)
Expand All @@ -95,16 +81,7 @@ private static Uri ParseWebUrl(HtmlNode node, string rootUri)
private static decimal ParseFloorArea(HtmlNode node)
{
var value = ParseTitle(node);

var result = RegexMatchers.FloorArea().Match(value);
if (!result.Success)
return decimal.Zero;

var floorAreaValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;

return decimal.TryParse(floorAreaValue, out var floorArea)
? floorArea
: decimal.Zero;
return ParseFloorAreaFromText(value);
}

private static Uri? ParseImageUrl(HtmlNode node)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,31 +38,15 @@ public partial class MmRealityCzAdsPortal(string watchedUrl,

private static decimal ParsePrice(HtmlNode node)
{
var value = node.SelectSingleNode(".//div[@class='rds-content']//div[contains(@class, 'price')]")?.InnerText;
if (value is null)
return decimal.Zero;

value = RegexMatchers.AllNonNumberValues().Replace(value, string.Empty);

return decimal.TryParse(value, out var price)
? price
: decimal.Zero;
return ParsePriceFromNode(node, ".//div[@class='rds-content']//div[contains(@class, 'price')]");
}

private static string? ParsePriceComment(HtmlNode node) => node.SelectSingleNode(".//div[@class='rds-content']//div[contains(@class, 'price')]")?.InnerText;

private static Layout ParseLayout(HtmlNode node)
{
var title = ParseTitle(node);

var result = RegexMatchers.Layout().Match(title);
if (!result.Success)
return Layout.NotSpecified;

var layoutValue = result.Groups.Skip<Group>(1).First(group => group.Success).Value;
layoutValue = RegexMatchers.AllWhitespaceCharacters().Replace(layoutValue, string.Empty);

return LayoutExtensions.ToLayout(layoutValue);
return ParseLayoutFromText(title);
}

private static string ParseAddress(HtmlNode node)
Expand Down
Loading