Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions src/PaperBee/papers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,45 +43,47 @@ def process_articles(self) -> None:
def filter_columns(self) -> None:
"""Filters the DataFrame to include specific columns."""
columns = ["databases", "publication_date", "title", "keywords", "url"]
self.articles = self.articles.loc[:, columns]

if self.articles.empty:
self.articles = pd.DataFrame(columns=columns)
else:
self.articles = self.articles.loc[:, columns]

def extract_doi(self) -> None:
"""Extracts DOIs from URLs and adds them as a new column."""
self.articles["DOI"] = self.articles["url"].apply(lambda x: x[x.find("10.") :])
if not self.articles.empty:
self.articles["DOI"] = self.articles["url"].apply(lambda x: x[x.find("10.") :])

def set_dates(self) -> None:
"""Sets the publication date and the date of processing."""
self.articles["Date"] = self.today_str
self.articles["PostedDate"] = self.articles["publication_date"]
if not self.articles.empty:
self.articles["Date"] = self.today_str
self.articles["PostedDate"] = self.articles["publication_date"]

def determine_preprint_status(self) -> None:
"""Determines whether each article is a preprint based on its database."""
self.articles["IsPreprint"] = self.articles["databases"].apply(
lambda dbs: "FALSE" if "PubMed" in dbs else "TRUE"
)
if not self.articles.empty:
self.articles["IsPreprint"] = self.articles["databases"].apply(
lambda dbs: "FALSE" if "PubMed" in dbs else "TRUE"
)

def rename_and_process_columns(self) -> None:
"""Renames columns and processes keywords."""
self.articles["Title"] = self.articles["title"]
self.articles["Keywords"] = self.articles["keywords"].apply(lambda kws: ", ".join(kw[2:] for kw in kws))
self.articles["URL"] = self.articles["url"]
if not self.articles.empty:
self.articles["Title"] = self.articles["title"]
self.articles["Keywords"] = self.articles["keywords"].apply(lambda kws: ", ".join(kw[2:] for kw in kws))
self.articles["URL"] = self.articles["url"]

def select_last_columns(self) -> None:
"""Selects and rearranges the final set of columns for the DataFrame."""
self.articles["Preprint"] = None # TODO add search for preprint of published articles
self.articles = self.articles[
[
"DOI",
"Date",
"PostedDate",
"IsPreprint",
"Title",
"Keywords",
"Preprint",
"URL",
]
]

expected_columns = ["DOI", "Date", "PostedDate", "IsPreprint", "Title", "Keywords", "Preprint", "URL"]
if self.articles.empty:
self.articles["Preprint"] = []
# Create empty DataFrame with expected columns
self.articles = pd.DataFrame(columns=expected_columns)
else:
self.articles["Preprint"] = None # TODO add search for preprint of published articles
self.articles = self.articles[expected_columns]

class PubMedClient:
"""
Expand Down
7 changes: 7 additions & 0 deletions tests/test_telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,10 @@ async def test_publish_many_papers(publisher, papers):
message = await publisher.publish_papers(papers, preprints, today=None, spreadsheet_id=None)

assert message.message_id is not None

@pytest.mark.asyncio
async def test_publish_empty_list(publisher, papers):
papers, preprints = publisher.format_papers([])
message = await publisher.publish_papers(papers, preprints, today=None, spreadsheet_id=None)

assert message.message_id is not None
Loading