Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Scrapers available for:
- `https://altonbrown.com/ <https://altonbrown.com/>`_
- `https://amazingribs.com/ <https://amazingribs.com/>`_
- `https://ambitiouskitchen.com/ <https://ambitiouskitchen.com>`_
- `https://app.samsungfood.com/ <https://app.samsungfood.com>`_ (*)
- `https://americastestkitchen.com/ <https://www.americastestkitchen.com>`_ (*)
- `https://archanaskitchen.com/ <https://archanaskitchen.com/>`_
- `https://www.argiro.gr/ <https://www.argiro.gr/>`_
Expand Down
2 changes: 2 additions & 0 deletions recipe_scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@
from .sallysbakingaddiction import SallysBakingAddiction
from .sallysblog import SallysBlog
from .saltpepperskillet import SaltPepperSkillet
from .samsungfood import SamsungFood
from .sandwhichtribunal import SandwhichTribunal
from .saveur import Saveur
from .savorynothings import SavoryNothings
Expand Down Expand Up @@ -691,6 +692,7 @@
SallysBakingAddiction.host(): SallysBakingAddiction,
SallysBlog.host(): SallysBlog,
SaltPepperSkillet.host(): SaltPepperSkillet,
SamsungFood.host(): SamsungFood,
Saveur.host(): Saveur,
SeriousEats.host(): SeriousEats,
SimpleVeganista.host(): SimpleVeganista,
Expand Down
209 changes: 209 additions & 0 deletions recipe_scrapers/samsungfood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
import re

from ._abstract import AbstractScraper
from ._exceptions import ElementNotFoundInHtml
from ._grouping_utils import IngredientGroup
from ._utils import get_minutes, normalize_string


class SamsungFood(AbstractScraper):
@classmethod
def host(cls):
return "app.samsungfood.com"

def author(self):
return normalize_string(
self.soup.h1.previous_sibling.find("span").get_text().replace("By ", "")
)

def title(self):
return normalize_string(self.soup.h1.get_text())

def image(self):
img = self.soup.find(id="recipe-wrapper").next_element.next_element.find(
"img", src=re.compile("user-recipes")
)["src"]
if not img or img == "":
raise ElementNotFoundInHtml
return img

def description(self):
try:
return normalize_string(
self.soup.find(
attrs={"data-testid": "saved-recipe-description"}
).get_text()
)
except AttributeError:
return None

def yields(self):
return self.soup.find(attrs={"data-testid": "servings-block"}).get_text()

# Helper method:
def _ingredient_list_elements(self):
ing_header = self.soup.find("div", string="Ingredients")
return ing_header.parent.parent.find_next_siblings()

def ingredients(self):
ingredients = []
element = self._ingredient_list_elements()[1]
tags = element.find_all(attrs={"data-testid": "recipe-ingredient"})
for tag in tags:
if tag.div:
tag.div.replace_with(", " + tag.div.string)
ingredients.append(normalize_string(tag.get_text()))
return ingredients

def ingredient_groups(self):
ingredient_groups = []
element = self._ingredient_list_elements()[1]
for group in element.children:
ingredients = []
purpose = None
tags = group.find_all(attrs={"data-testid": "recipe-ingredient"})
for item in group.children:
if item.div:
# has div descendants => is ingredient
for tag in tags:
if tag.div:
tag.div.replace_with(", " + tag.div.string)
ingredients.append(normalize_string(tag.get_text()))
else:
# is group
purpose = normalize_string(item.get_text())
group = IngredientGroup(ingredients=ingredients, purpose=purpose)
ingredient_groups.append(group)
return ingredient_groups

# Helper method:
def _steps(self):
instructions = self.soup.find(attrs={"data-scroll": "instructions"})
try:
return instructions.find_all("div", string=re.compile(r"^Step \d+$"))
except AttributeError:
return None

def instructions(self):
instructions = []
if not self._steps():
raise ElementNotFoundInHtml
for step in self._steps():
instructions.append(normalize_string(step.next_sibling.get_text()))
return "\n".join(instructions)

def equipment(self):
equipment = []
seen = set()
if not self._steps():
return None
for step in self._steps():
try:
equ_list = step.next_sibling.next_sibling
for equ in equ_list.find_all(
"div", recursive=False
): # only direct children
if not equ.div.span.contents[0] in seen:
seen.add(equ.div.span.contents[0])
equipment.append(
normalize_string(str(equ.div.span.contents[0]))
)
except AttributeError:
continue
return equipment

def keywords(self):
try:
key_list = self.soup.find(attrs={"data-testid": "tags"}).children
except AttributeError:
return None
return [normalize_string(key.get_text()) for key in key_list]

def cook_time(self):
try:
return get_minutes(
self.soup.find(string=re.compile(r"^Cook: $")).next_element.string
)
except AttributeError:
return None

def prep_time(self):
try:
return get_minutes(
self.soup.find(string=re.compile(r"^Prep: $")).next_element.string
)
except AttributeError:
return None

def total_time(self):
prep = self.prep_time()
cook = self.cook_time()
if prep and cook:
return prep + cook
elif prep:
return prep
elif cook:
return cook
else:
raise ElementNotFoundInHtml

# Helper method:
def _likes(self):
try:
return int(
self.soup.find(string=re.compile(r"^\d+ liked$")).replace(" liked", "")
)
except AttributeError:
return 0

# Helper method:
def _dislikes(self):
try:
return int(
self.soup.find(string=re.compile(r"^\d+ disliked$")).replace(
" disliked", ""
)
)
except AttributeError:
return 0

def ratings(self):
# portion of positive ratings on a scale of 0-5
if not self._likes() + self._dislikes() == 0:
return self._likes() / (self._likes() + self._dislikes()) * 5
else:
return None

def ratings_count(self):
return self._likes() + self._dislikes()

def nutrients(self):
nutrients = {}
parameters = {
"Calories": {"unit": "calories", "key": "calories"},
"Total Fat": {"unit": "grams fat", "key": "fatContent"},
"Carbs": {"unit": "grams carbohydrates", "key": "carbohydrateContent"},
"Sugars": {"unit": "grams sugar", "key": "sugarContent"},
"Protein": {"unit": "grams protein", "key": "proteinContent"},
}

try:
data = self.soup.find(
"h3", string="Nutrition per serving"
).next_element.next_element.next_element.children
except AttributeError:
return None

for nutrient in data:
try:
key = normalize_string(nutrient.strong.get_text())
value = normalize_string(nutrient.span.get_text().split()[0])
except AttributeError:
continue
try:
nutrients[parameters[key]["key"]] = (
value + " " + parameters[key]["unit"]
)
except KeyError:
continue
return nutrients
55 changes: 55 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"author": "test source",
"canonical_url": "https://app.samsungfood.com/recipes/1070191967dda557270adeb97ab8d4ca1c6",
"host": "app.samsungfood.com",
"language": "en",
"title": "Test Recipe 1",
"ingredients": [
"1 egg, large",
"100g butter",
"10g cheddar, finely chopped"
],
"ingredient_groups": [
{
"ingredients": [
"1 egg, large",
"100g butter"
]
},
{
"ingredients": [
"10g cheddar, finely chopped"
],
"purpose": "To serve"
}
],
"instructions_list": [
"Finely chop the egg.",
"Bake until golden brown. Meanwhile make the sauce.",
"Serve immediately."
],
"yields": "4 servings",
"description": "Description goes here.",
"total_time": 75,
"cook_time": 70,
"prep_time": 5,
"ratings": 5.0,
"equipment": [
"Oven",
"Baking sheet"
],
"nutrients": {
"calories": "207.2 calories",
"fatContent": "22.3 grams fat",
"carbohydrateContent": "0.2 grams carbohydrates",
"sugarContent": "0.1 grams sugar",
"proteinContent": "2.4 grams protein"
},
"image": "https://bunny-image-cdn.whisk.com/image/upload/v1724806979/v3/user-recipes/a6df80b07fa2dcefc95e7e58393687ef.jpg?height=1120&width=1002&aspect_ratio=1002:1120",
"keywords": [
"Fresh",
"Go-to",
"Kid-friendly",
"Sweet"
]
}
106 changes: 106 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_1.testhtml

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"author": "tomtjes",
"host": "app.samsungfood.com",
"language": "en",
"title": "test recipe 2",
"canonical_url": "https://app.samsungfood.com/recipes/1070191a10606157ff9b73867be9099670f"
}
96 changes: 96 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_2.testhtml

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_3.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"author": "tomtjes",
"host": "app.samsungfood.com",
"language": "en",
"title": "Test 3",
"canonical_url": "https://app.samsungfood.com/recipes/1070191a56ce4467d7e80c68f1902512270",
"ingredients": [
"peas",
"mint"
],
"ingredient_goups": [
{
"ingredients": [
"peas",
"mint"
]
}
],
"instructions_list": [
"make mush"
],
"yields": "1 serving"
}
164 changes: 164 additions & 0 deletions tests/test_data/app.samsungfood.com/samsungfood_3.testhtml

Large diffs are not rendered by default.