-
Notifications
You must be signed in to change notification settings - Fork 239
More precise newsflash coordinates (using Waze data) #1552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 4 commits
02e0f38
08ddb7a
55a4bee
570db17
17ac527
dd2b36a
6d74a58
be81f6e
f7b383f
c9ef932
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| from datetime import datetime, timedelta | ||
| import logging | ||
| import re | ||
|
|
||
|
|
@@ -6,10 +7,13 @@ | |
| import numpy as np | ||
| from geographiclib.geodesic import Geodesic | ||
|
|
||
| from anyway.models import NewsFlash | ||
| from anyway.parsers import resolution_dict | ||
| from anyway.models import NewsFlash, WazeAlert | ||
| from anyway.parsers import resolution_dict, short_distance_resolutions, long_distance_resolutions | ||
| from anyway.parsers.utils import get_bounding_box_polygon | ||
| from anyway import secrets | ||
|
|
||
| WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS = 3 | ||
|
|
||
|
|
||
| def extract_road_number(location): | ||
| """ | ||
|
|
@@ -297,15 +301,60 @@ def extract_location_text(text): | |
| return text | ||
|
|
||
|
|
||
| def get_related_waze_accident_alert(db, geo_location, newsflash): | ||
|
|
||
| # determine what distance (in kilometers) to look for waze accidents in, according to the newsflash's resolution | ||
| if newsflash.resolution in short_distance_resolutions: | ||
| distance = 0.3 | ||
| elif newsflash.resolution in long_distance_resolutions: | ||
| distance = 5 | ||
| else: | ||
|
|
||
| # unknown resolution - skip this optimization | ||
| return None | ||
|
|
||
| # create the bounding box according to the coordinate we have, and the resolution distance | ||
| bounding_box_polygon_str = get_bounding_box_polygon( | ||
| geo_location["lat"], geo_location["lon"], distance | ||
| ) | ||
|
|
||
| # find waze alerts in that bounding box, from the recent time delta - and return the first as the related waze alert | ||
| matching_alert = ( | ||
| db.session.query(WazeAlert) | ||
| .filter(WazeAlert.alert_type == "ACCIDENT") | ||
| .filter( | ||
| WazeAlert.created_at.between( | ||
| newsflash.date - timedelta(hours=WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS), | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since this constant is only used inside the function, it's should probably be local constant. This way the occasional reader doesn't have to wonder where else it is used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, it can be a |
||
| datetime.now(), | ||
| ) | ||
| ) | ||
| .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) | ||
| .first() | ||
| ) | ||
|
|
||
| return matching_alert | ||
|
|
||
|
|
||
| def extract_geo_features(db, newsflash: NewsFlash) -> None: | ||
| newsflash.location = extract_location_text(newsflash.description) or extract_location_text( | ||
| newsflash.title | ||
| ) | ||
| geo_location = geocode_extract(newsflash.location) | ||
| if geo_location is not None: | ||
| newsflash.resolution = set_accident_resolution(geo_location) | ||
|
|
||
| newsflash.lat = geo_location["geom"]["lat"] | ||
| newsflash.lon = geo_location["geom"]["lng"] | ||
| newsflash.resolution = set_accident_resolution(geo_location) | ||
|
|
||
| # improve location using waze | ||
| related_waze_accident = get_related_waze_accident_alert(db, geo_location, newsflash) | ||
| if related_waze_accident: | ||
| newsflash.waze_alert = related_waze_accident.id | ||
|
|
||
| # TODO: uncomment this after testing the related waze accidents mechanism is working properly on real data | ||
| # newsflash.lat = related_waze_accident.latitude | ||
| # newsflash.lon = related_waze_accident.longitude | ||
|
|
||
| location_from_db = get_db_matching_location( | ||
| db, | ||
| newsflash.lat, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,12 +3,13 @@ | |
|
|
||
| import pytest | ||
|
|
||
| from anyway.app_and_db import db | ||
| from anyway.parsers import rss_sites, twitter, location_extraction | ||
| from anyway.parsers.news_flash_classifiers import classify_tweets, classify_rss | ||
| from anyway import secrets | ||
| from anyway.parsers.news_flash_db_adapter import init_db | ||
| from anyway.models import NewsFlash | ||
| from anyway.parsers import timezones | ||
| from anyway.models import NewsFlash, WazeAlert | ||
| from anyway.parsers import timezones, short_distance_resolutions, long_distance_resolutions | ||
| from anyway.parsers.infographics_data_cache_updater import is_cache_eligible, is_in_cache | ||
|
|
||
|
|
||
|
|
@@ -232,6 +233,57 @@ def test_extract_location_text(): | |
| assert expected_location_text == actual_location_text | ||
|
|
||
|
|
||
| def test_waze_alert(): | ||
|
|
||
| # create a waze alert | ||
| waze_alert = _create_waze_accident_alert() | ||
|
|
||
| try: | ||
| newsflash = NewsFlash(date=datetime.datetime.now()) | ||
|
|
||
| # set the geo_location to be close to the waze accident alert location | ||
| geo_location = { | ||
| "lon": waze_alert.longitude + 0.001, | ||
| "lat": waze_alert.latitude + 0.0001, | ||
| } | ||
|
|
||
| # check that we successfully get the related waze accident event | ||
| for resolution in short_distance_resolutions: | ||
| newsflash.resolution = resolution | ||
| related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, | ||
| geo_location, | ||
| newsflash) | ||
|
|
||
| assert waze_alert == related_waze_accident_alert | ||
|
|
||
| # set geo_location to a further location | ||
| geo_location = { | ||
| "lon": waze_alert.longitude + 0.01, | ||
| "lat": waze_alert.latitude + 0.0001, | ||
| } | ||
|
|
||
| # make sure short_distance_resolutions *do not* get any waze accident alert | ||
| for resolution in short_distance_resolutions: | ||
| newsflash.resolution = resolution | ||
| related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, | ||
| geo_location, | ||
| newsflash) | ||
|
|
||
| assert related_waze_accident_alert is None | ||
|
|
||
| # make sure we successfully get the related waze accident for long_distance_resolutions | ||
| for resolution in long_distance_resolutions: | ||
| newsflash.resolution = resolution | ||
| related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, | ||
| geo_location, | ||
| newsflash) | ||
|
|
||
| assert waze_alert == related_waze_accident_alert | ||
|
|
||
| finally: | ||
| _delete_waze_alert(waze_alert.id) | ||
|
|
||
|
|
||
| def test_timeparse(): | ||
| twitter = timezones.parse_creation_datetime("Sun May 31 08:26:18 +0000 2020") | ||
| ynet = timezones.parse_creation_datetime("Sun, 31 May 2020 11:26:18 +0300") | ||
|
|
@@ -268,3 +320,40 @@ def test_classification_statistics_ynet(): | |
| assert precision > BEST_PRECISION_YNET | ||
| assert recall > BEST_RECALL_YNET | ||
| assert f1 > BEST_F1_YNET | ||
|
|
||
|
|
||
|
elazarg marked this conversation as resolved.
Outdated
|
||
| def _create_waze_accident_alert(): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now that you're using the context manager, there's no need to write these two functions separately. Just inline them. |
||
| id = db.session.query(WazeAlert).count() + 1, | ||
|
|
||
| longitude, latitude = ( | ||
| float(31.0), | ||
| float(34.0), | ||
| ) | ||
| point_str = "POINT({0} {1})".format(longitude, latitude) | ||
|
|
||
| waze_alert = WazeAlert( | ||
| id=id[0], | ||
| city='באר שבע', | ||
| confidence=2, | ||
| created_at=datetime.datetime.now(), | ||
| longitude=longitude, | ||
| latitude=latitude, | ||
| magvar=190, | ||
| number_thumbs_up=1, | ||
| report_rating=5, | ||
| reliability=10, | ||
| alert_type='ACCIDENT', | ||
| alert_subtype='', | ||
| street='דרך מצדה', | ||
| road_type=3, | ||
| geom=point_str, | ||
| ) | ||
| db.session.add(waze_alert) | ||
| db.session.commit() | ||
|
|
||
| return waze_alert | ||
|
|
||
|
|
||
| def _delete_waze_alert(waze_alert_id): | ||
| db.session.query(WazeAlert).filter_by(id=waze_alert_id).delete() | ||
| db.session.commit() | ||
Uh oh!
There was an error while loading. Please reload this page.