# doctest: +SKIP_FILE
import json
import geopandas
import pandas as pd
import pyrosm
from pyrosm import OSM, get_data, data
geopandas.options.io_engine = "pyogrio"
# Available regions
print(data.available)
# POLAND = get_data('Poland', update=True, directory='/Users/matt/Desktop/')
# poland = OSM(POLAND)
WARSAW = get_data('Warsaw', directory='/Users/matt/Desktop/')
warsaw = OSM(WARSAW)
warsaw
# <pyrosm.pyrosm.OSM at 0x16945a750>
dir(warsaw)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__',
'__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__',
'__gt__', '__hash__', '__init__', '__init_subclass__', '__le__',
'__lt__', '__module__', '__ne__', '__new__', '__reduce__',
'__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__',
'__subclasshook__', '__weakref__', '_current_timestamp',
'_data_bounding_box', '_get_network_filter', '_get_pbf_elements',
'_node_coordinates', '_nodes', '_nodes_gdf', '_osh_file',
'_read_pbf', '_relations', '_set_current_time', '_timestamp_changed',
'_verbose', '_way_records', 'allowed_bbox_types', 'bounding_box',
'conf', 'file_size', 'filepath', 'get_boundaries', 'get_buildings',
'get_data_by_custom_criteria', 'get_landuse', 'get_natural',
'get_network', 'get_pois', 'keep_node_info', 'to_graph']
help(warsaw)
# Sieci Dróg
drive_net = warsaw.get_network(network_type="driving")
drive_net.plot()
# Budynki
buildings = warsaw.get_buildings()
buildings.plot()
# Point of Interest
pois = warsaw.get_pois()
restaurants = pois.query('amenity == "restaurant"')
restaurants.shape
# (2374, 96)
restaurants.head(5)
# lat timestamp changeset id lon version tags ... taxi agrarian dry_cleaning lottery ticket water hotel
# 0 52.308403 0 0.0 30776210 20.769335 0 None ... NaN NaN NaN NaN NaN NaN NaN
# 1 52.176907 0 0.0 31005854 20.945950 0 None ... NaN NaN NaN NaN NaN NaN NaN
# 33 52.218987 0 0.0 201830837 20.974339 0 {"check_date:opening_hours":"2023-06-30","cuis... ... NaN NaN NaN NaN NaN NaN NaN
# 42 52.239948 0 0.0 247441607 21.061977 0 {"check_date:opening_hours":"2022-08-22","cuis... ... NaN NaN NaN NaN NaN NaN NaN
# 47 52.231236 0 0.0 247461210 21.012108 0 {"addr:city:simc":"0918123","brand":"Pizza Hut... ... NaN NaN NaN NaN NaN NaN NaN
# [5 rows x 96 columns]
restaurants.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 2384 entries, 0 to 2383
# Data columns (total 33 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 timestamp 2384 non-null uint32
# 1 visible 2384 non-null object
# 2 lat 2204 non-null float32
# 3 version 2384 non-null int32
# 4 lon 2204 non-null float32
# 5 tags 1937 non-null object
# 6 changeset 2204 non-null float64
# 7 id 2384 non-null int64
# 8 addr:city 868 non-null object
# 9 addr:country 23 non-null object
# 10 addr:housenumber 1245 non-null object
# 11 addr:housename 7 non-null object
# 12 addr:postcode 670 non-null object
# 13 addr:place 1 non-null object
# 14 addr:street 1264 non-null object
# 15 email 289 non-null object
# 16 name 2340 non-null object
# 17 opening_hours 1211 non-null object
# 18 operator 43 non-null object
# 19 phone 727 non-null object
# 20 ref 8 non-null object
# 21 url 5 non-null object
# 22 website 819 non-null object
# 23 amenity 2384 non-null object
# 24 bar 10 non-null object
# 25 internet_access 59 non-null object
# 26 source 423 non-null object
# 27 start_date 22 non-null object
# 28 wikipedia 1 non-null object
# 29 geometry 2384 non-null geometry
# 30 osm_type 2384 non-null object
# 31 building 160 non-null object
# 32 building:levels 113 non-null object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(26), uint32(1)
# memory usage: 2.3 MB
def get_tags(obj):
if type(obj) is not str:
return pd.NA
data = json.loads(obj)
if 'cuisine' not in data:
return pd.NA
return data['cuisine'].replace(';', ',')
restaurants = (
warsaw
.get_pois(custom_filter={'amenity': ['restaurant']})
.loc[:, ['name','lon','lat','addr:street','addr:housenumber','tags','geometry']]
.assign(tags=lambda df: df['tags'].map(get_tags))
)
der_elefant = restaurants.query('name == "Der Elefant"')
der_elefant.transpose()
custom_filter = {'amenity': True, "shop": False}
pois = warsaw.get_pois(custom_filter=custom_filter)
pois["poi_type"] = pois["amenity"]
pois["poi_type"] = pois["poi_type"].fillna(pois["shop"])
ax = pois.plot(column='poi_type', markersize=3, figsize=(12,12), legend=True, legend_kwds=dict(loc='upper left', ncol=5, bbox_to_anchor=(1, 1)))
# >>> pois.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 124219 entries, 0 to 124218
# Data columns (total 88 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 tags 86500 non-null object
# 1 id 124219 non-null int64
# 2 visible 124115 non-null object
# 3 lon 66761 non-null float32
# 4 timestamp 124219 non-null uint32
# 5 changeset 66865 non-null float64
# 6 version 124219 non-null int32
# 7 lat 66761 non-null float32
# 8 addr:city 11066 non-null object
# 9 addr:country 378 non-null object
# 10 addr:full 1 non-null object
# 11 addr:housenumber 16014 non-null object
# 12 addr:housename 123 non-null object
# 13 addr:postcode 9308 non-null object
# 14 addr:place 34 non-null object
# 15 addr:street 16288 non-null object
# 16 email 3891 non-null object
# 17 name 31224 non-null object
# 18 opening_hours 16768 non-null object
# 19 operator 10757 non-null object
# 20 phone 6726 non-null object
# 21 ref 5291 non-null object
# 22 url 102 non-null object
# 23 website 8720 non-null object
# 24 amenity 107032 non-null object
# 25 atm 258 non-null object
# 26 bicycle_parking 5081 non-null object
# 27 bicycle_rental 339 non-null object
# 28 bar 17 non-null object
# 29 building 5238 non-null object
# 30 building:levels 2858 non-null object
# 31 drinking_water 63 non-null object
# 32 fast_food 16 non-null object
# 33 fountain 59 non-null object
# 34 fuel 4 non-null object
# 35 internet_access 395 non-null object
# 36 kindergarten 1 non-null object
# 37 landuse 633 non-null object
# 38 office 139 non-null object
# 39 parking 22509 non-null object
# 40 post_office 6 non-null object
# 41 social_facility 202 non-null object
# 42 source 3026 non-null object
# 43 start_date 276 non-null object
# 44 wikipedia 354 non-null object
# 45 alcohol 1 non-null object
# 46 bed 1 non-null object
# 47 bicycle 29 non-null object
# 48 books 5 non-null object
# 49 car 1 non-null object
# 50 charity 1 non-null object
# 51 clothes 351 non-null object
# 52 coffee 2 non-null object
# 53 collector 10 non-null object
# 54 computer 1 non-null object
# 55 craft 234 non-null object
# 56 e-cigarette 1 non-null object
# 57 fireplace 8 non-null object
# 58 furniture 3 non-null object
# 59 hairdresser 8 non-null object
# 60 massage 2 non-null object
# 61 medical_supply 1 non-null object
# 62 model 94 non-null object
# 63 motorcycle 5 non-null object
# 64 music 1 non-null object
# 65 organic 52 non-null object
# 66 outdoor 3 non-null object
# 67 pet 4 non-null object
# 68 religion 570 non-null object
# 69 second_hand 233 non-null object
# 70 shoes 12 non-null object
# 71 shop 17384 non-null object
# 72 swimming_pool 1 non-null object
# 73 trade 48 non-null object
# 74 vacuum_cleaner 3 non-null object
# 75 wholesale 20 non-null object
# 76 wine 1 non-null object
# 77 geometry 124219 non-null geometry
# 78 osm_type 124219 non-null object
# 79 arts_centre 1 non-null object
# 80 car_wash 1 non-null object
# 81 police 3 non-null object
# 82 taxi 2 non-null object
# 83 agrarian 1 non-null object
# 84 dry_cleaning 1 non-null object
# 85 lottery 1 non-null object
# 86 ticket 3 non-null object
# 87 water 38 non-null object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(81), uint32(1)
# memory usage: 273.8 MB
# Typ Terenu
landuse = warsaw.get_landuse()
landuse.plot(column='landuse', legend=True, figsize=(10,6))
# >>> landuse.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 145562 entries, 0 to 145561
# Data columns (total 20 columns):
# # Column Non-Null Count Dtype
# --- ------ -------------- -----
# 0 tags 145561 non-null object
# 1 id 145562 non-null int64
# 2 visible 9 non-null object
# 3 lon 9 non-null float32
# 4 timestamp 145562 non-null uint32
# 5 changeset 748 non-null float64
# 6 version 145562 non-null int32
# 7 lat 9 non-null float32
# 8 landuse 145562 non-null object
# 9 military 28 non-null object
# 10 residential 2812 non-null object
# 11 geometry 145562 non-null geometry
# 12 osm_type 145562 non-null object
# 13 basin 2 non-null object
# 14 cemetery 12 non-null object
# 15 construction 414 non-null object
# 16 depot 11 non-null object
# 17 industrial 99 non-null object
# 18 meadow 1 non-null object
# 19 railway 1 non-null object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(13), uint32(1)
# memory usage: 64.9 MB
# Naturalne
natural = warsaw.get_natural()
natural.plot(column='natural', legend=True, figsize=(10,6))
# Granice
boundaries = warsaw.get_boundaries()
boundaries.plot(facecolor="none", edgecolor="blue")
boundaries = warsaw.get_boundaries()
selected_boundary['name'].unique()
# pip install geopandas pyogrio pyrosm
COLUMNS = [
'lat',
'lon',
'addr:city',
'addr:street',
'addr:housenumber',
'name',
'amenity',
'geometry',
'osm_type'
]
# Read OSM data
WARSZAWA = pyrosm.get_data('Warsaw', directory='/Users/matt/Desktop/')
warsaw = pyrosm.OSM(WARSZAWA)
pois = warsaw.get_pois()
# pois.info(memory_usage='deep')
restaurants = pois.query('amenity == "restaurant"')[COLUMNS]
restaurants.to_file('/Users/matt/Desktop/restaurants')