6.1. About Geopandas

6.1.1. Open Street Map

# doctest: +SKIP_FILE

import json
import geopandas
import pandas as pd
import pyrosm
from pyrosm import OSM, get_data, data

geopandas.options.io_engine = "pyogrio"


# Available regions
print(data.available)


# POLAND = get_data('Poland', update=True, directory='/Users/matt/Desktop/')
# poland = OSM(POLAND)

WARSAW = get_data('Warsaw', directory='/Users/matt/Desktop/')
warsaw = OSM(WARSAW)

warsaw
# <pyrosm.pyrosm.OSM at 0x16945a750>

dir(warsaw)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__',
 '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__',
 '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__',
 '__lt__', '__module__', '__ne__', '__new__', '__reduce__',
 '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__',
 '__subclasshook__', '__weakref__', '_current_timestamp',
 '_data_bounding_box', '_get_network_filter', '_get_pbf_elements',
 '_node_coordinates', '_nodes', '_nodes_gdf', '_osh_file',
 '_read_pbf', '_relations', '_set_current_time', '_timestamp_changed',
 '_verbose', '_way_records', 'allowed_bbox_types', 'bounding_box',
 'conf', 'file_size', 'filepath', 'get_boundaries', 'get_buildings',
 'get_data_by_custom_criteria', 'get_landuse', 'get_natural',
 'get_network', 'get_pois', 'keep_node_info', 'to_graph']

help(warsaw)


# Sieci Dróg
drive_net = warsaw.get_network(network_type="driving")
drive_net.plot()

# Budynki
buildings = warsaw.get_buildings()
buildings.plot()

# Point of Interest
pois = warsaw.get_pois()

restaurants = pois.query('amenity == "restaurant"')

restaurants.shape
# (2374, 96)

restaurants.head(5)
#           lat  timestamp  changeset         id        lon  version                                               tags  ... taxi agrarian dry_cleaning lottery ticket water hotel
# 0   52.308403          0        0.0   30776210  20.769335        0                                               None  ...  NaN      NaN          NaN     NaN    NaN   NaN   NaN
# 1   52.176907          0        0.0   31005854  20.945950        0                                               None  ...  NaN      NaN          NaN     NaN    NaN   NaN   NaN
# 33  52.218987          0        0.0  201830837  20.974339        0  {"check_date:opening_hours":"2023-06-30","cuis...  ...  NaN      NaN          NaN     NaN    NaN   NaN   NaN
# 42  52.239948          0        0.0  247441607  21.061977        0  {"check_date:opening_hours":"2022-08-22","cuis...  ...  NaN      NaN          NaN     NaN    NaN   NaN   NaN
# 47  52.231236          0        0.0  247461210  21.012108        0  {"addr:city:simc":"0918123","brand":"Pizza Hut...  ...  NaN      NaN          NaN     NaN    NaN   NaN   NaN
# [5 rows x 96 columns]

restaurants.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 2384 entries, 0 to 2383
# Data columns (total 33 columns):
#  #   Column            Non-Null Count  Dtype
# ---  ------            --------------  -----
#  0   timestamp         2384 non-null   uint32
#  1   visible           2384 non-null   object
#  2   lat               2204 non-null   float32
#  3   version           2384 non-null   int32
#  4   lon               2204 non-null   float32
#  5   tags              1937 non-null   object
#  6   changeset         2204 non-null   float64
#  7   id                2384 non-null   int64
#  8   addr:city         868 non-null    object
#  9   addr:country      23 non-null     object
#  10  addr:housenumber  1245 non-null   object
#  11  addr:housename    7 non-null      object
#  12  addr:postcode     670 non-null    object
#  13  addr:place        1 non-null      object
#  14  addr:street       1264 non-null   object
#  15  email             289 non-null    object
#  16  name              2340 non-null   object
#  17  opening_hours     1211 non-null   object
#  18  operator          43 non-null     object
#  19  phone             727 non-null    object
#  20  ref               8 non-null      object
#  21  url               5 non-null      object
#  22  website           819 non-null    object
#  23  amenity           2384 non-null   object
#  24  bar               10 non-null     object
#  25  internet_access   59 non-null     object
#  26  source            423 non-null    object
#  27  start_date        22 non-null     object
#  28  wikipedia         1 non-null      object
#  29  geometry          2384 non-null   geometry
#  30  osm_type          2384 non-null   object
#  31  building          160 non-null    object
#  32  building:levels   113 non-null    object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(26), uint32(1)
# memory usage: 2.3 MB


def get_tags(obj):
    if type(obj) is not str:
        return pd.NA
    data = json.loads(obj)
    if 'cuisine' not in data:
        return pd.NA
    return data['cuisine'].replace(';', ',')


restaurants = (
    warsaw
    .get_pois(custom_filter={'amenity': ['restaurant']})
    .loc[:, ['name','lon','lat','addr:street','addr:housenumber','tags','geometry']]
    .assign(tags=lambda df: df['tags'].map(get_tags))
)


der_elefant = restaurants.query('name == "Der Elefant"')
der_elefant.transpose()



custom_filter = {'amenity': True, "shop": False}
pois = warsaw.get_pois(custom_filter=custom_filter)
pois["poi_type"] = pois["amenity"]
pois["poi_type"] = pois["poi_type"].fillna(pois["shop"])
ax = pois.plot(column='poi_type', markersize=3, figsize=(12,12), legend=True, legend_kwds=dict(loc='upper left', ncol=5, bbox_to_anchor=(1, 1)))

# >>> pois.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 124219 entries, 0 to 124218
# Data columns (total 88 columns):
#  #   Column            Non-Null Count   Dtype
# ---  ------            --------------   -----
#  0   tags              86500 non-null   object
#  1   id                124219 non-null  int64
#  2   visible           124115 non-null  object
#  3   lon               66761 non-null   float32
#  4   timestamp         124219 non-null  uint32
#  5   changeset         66865 non-null   float64
#  6   version           124219 non-null  int32
#  7   lat               66761 non-null   float32
#  8   addr:city         11066 non-null   object
#  9   addr:country      378 non-null     object
#  10  addr:full         1 non-null       object
#  11  addr:housenumber  16014 non-null   object
#  12  addr:housename    123 non-null     object
#  13  addr:postcode     9308 non-null    object
#  14  addr:place        34 non-null      object
#  15  addr:street       16288 non-null   object
#  16  email             3891 non-null    object
#  17  name              31224 non-null   object
#  18  opening_hours     16768 non-null   object
#  19  operator          10757 non-null   object
#  20  phone             6726 non-null    object
#  21  ref               5291 non-null    object
#  22  url               102 non-null     object
#  23  website           8720 non-null    object
#  24  amenity           107032 non-null  object
#  25  atm               258 non-null     object
#  26  bicycle_parking   5081 non-null    object
#  27  bicycle_rental    339 non-null     object
#  28  bar               17 non-null      object
#  29  building          5238 non-null    object
#  30  building:levels   2858 non-null    object
#  31  drinking_water    63 non-null      object
#  32  fast_food         16 non-null      object
#  33  fountain          59 non-null      object
#  34  fuel              4 non-null       object
#  35  internet_access   395 non-null     object
#  36  kindergarten      1 non-null       object
#  37  landuse           633 non-null     object
#  38  office            139 non-null     object
#  39  parking           22509 non-null   object
#  40  post_office       6 non-null       object
#  41  social_facility   202 non-null     object
#  42  source            3026 non-null    object
#  43  start_date        276 non-null     object
#  44  wikipedia         354 non-null     object
#  45  alcohol           1 non-null       object
#  46  bed               1 non-null       object
#  47  bicycle           29 non-null      object
#  48  books             5 non-null       object
#  49  car               1 non-null       object
#  50  charity           1 non-null       object
#  51  clothes           351 non-null     object
#  52  coffee            2 non-null       object
#  53  collector         10 non-null      object
#  54  computer          1 non-null       object
#  55  craft             234 non-null     object
#  56  e-cigarette       1 non-null       object
#  57  fireplace         8 non-null       object
#  58  furniture         3 non-null       object
#  59  hairdresser       8 non-null       object
#  60  massage           2 non-null       object
#  61  medical_supply    1 non-null       object
#  62  model             94 non-null      object
#  63  motorcycle        5 non-null       object
#  64  music             1 non-null       object
#  65  organic           52 non-null      object
#  66  outdoor           3 non-null       object
#  67  pet               4 non-null       object
#  68  religion          570 non-null     object
#  69  second_hand       233 non-null     object
#  70  shoes             12 non-null      object
#  71  shop              17384 non-null   object
#  72  swimming_pool     1 non-null       object
#  73  trade             48 non-null      object
#  74  vacuum_cleaner    3 non-null       object
#  75  wholesale         20 non-null      object
#  76  wine              1 non-null       object
#  77  geometry          124219 non-null  geometry
#  78  osm_type          124219 non-null  object
#  79  arts_centre       1 non-null       object
#  80  car_wash          1 non-null       object
#  81  police            3 non-null       object
#  82  taxi              2 non-null       object
#  83  agrarian          1 non-null       object
#  84  dry_cleaning      1 non-null       object
#  85  lottery           1 non-null       object
#  86  ticket            3 non-null       object
#  87  water             38 non-null      object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(81), uint32(1)
# memory usage: 273.8 MB

# Typ Terenu
landuse = warsaw.get_landuse()
landuse.plot(column='landuse', legend=True, figsize=(10,6))
# >>> landuse.info(memory_usage='deep')
# <class 'geopandas.geodataframe.GeoDataFrame'>
# RangeIndex: 145562 entries, 0 to 145561
# Data columns (total 20 columns):
#  #   Column        Non-Null Count   Dtype
# ---  ------        --------------   -----
#  0   tags          145561 non-null  object
#  1   id            145562 non-null  int64
#  2   visible       9 non-null       object
#  3   lon           9 non-null       float32
#  4   timestamp     145562 non-null  uint32
#  5   changeset     748 non-null     float64
#  6   version       145562 non-null  int32
#  7   lat           9 non-null       float32
#  8   landuse       145562 non-null  object
#  9   military      28 non-null      object
#  10  residential   2812 non-null    object
#  11  geometry      145562 non-null  geometry
#  12  osm_type      145562 non-null  object
#  13  basin         2 non-null       object
#  14  cemetery      12 non-null      object
#  15  construction  414 non-null     object
#  16  depot         11 non-null      object
#  17  industrial    99 non-null      object
#  18  meadow        1 non-null       object
#  19  railway       1 non-null       object
# dtypes: float32(2), float64(1), geometry(1), int32(1), int64(1), object(13), uint32(1)
# memory usage: 64.9 MB

# Naturalne
natural = warsaw.get_natural()
natural.plot(column='natural', legend=True, figsize=(10,6))

# Granice
boundaries = warsaw.get_boundaries()
boundaries.plot(facecolor="none", edgecolor="blue")


boundaries = warsaw.get_boundaries()
selected_boundary['name'].unique()




# pip install geopandas pyogrio pyrosm
COLUMNS = [
    'lat',
    'lon',
    'addr:city',
    'addr:street',
    'addr:housenumber',
    'name',
    'amenity',
    'geometry',
    'osm_type'
]


# Read OSM data
WARSZAWA = pyrosm.get_data('Warsaw', directory='/Users/matt/Desktop/')
warsaw = pyrosm.OSM(WARSZAWA)
pois = warsaw.get_pois()
# pois.info(memory_usage='deep')
restaurants = pois.query('amenity == "restaurant"')[COLUMNS]


restaurants.to_file('/Users/matt/Desktop/restaurants')