1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
|
"""
DOM parsing for the OpenStreetMap API.
"""
from datetime import datetime
import xml.dom.minidom
import xml.parsers.expat
import logging
from typing import Any, Union, Optional
from xml.dom.minidom import Element
from . import errors
from . import xmlbuilder
logger = logging.getLogger(__name__)
def OsmResponseToDom(
response: bytes, tag: str, single: bool = False, allow_empty: bool = False
) -> Union[Element, list[Element]]:
"""
Returns the (sub-) DOM parsed from an OSM response
"""
try:
dom = xml.dom.minidom.parseString(response)
osm_dom = dom.getElementsByTagName("osm")[0]
all_data = osm_dom.getElementsByTagName(tag)
first_element = all_data[0]
except IndexError as e:
if allow_empty:
return []
raise errors.XmlResponseInvalidError(
f"The XML response from the OSM API is invalid: {e!r}"
)
except xml.parsers.expat.ExpatError as e:
raise errors.XmlResponseInvalidError(
f"The XML response from the OSM API is invalid: {e!r}"
)
if single:
return first_element
return list(all_data)
def dom_parse_node(dom_element: Element) -> dict[str, Any]:
"""
Returns NodeData for the node.
"""
result = _dom_get_attributes(dom_element)
result["tag"] = _dom_get_tag(dom_element)
return result
def dom_parse_way(dom_element: Element) -> dict[str, Any]:
"""
Returns WayData for the way.
"""
result = _dom_get_attributes(dom_element)
result["tag"] = _dom_get_tag(dom_element)
result["nd"] = _dom_get_nd(dom_element)
return result
def dom_parse_relation(dom_element: Element) -> dict[str, Any]:
"""
Returns RelationData for the relation.
"""
result = _dom_get_attributes(dom_element)
result["tag"] = _dom_get_tag(dom_element)
result["member"] = _dom_get_member(dom_element)
return result
def dom_parse_changeset(
dom_element: Element, include_discussion: bool = False
) -> dict[str, Any]:
"""
Returns ChangesetData for the changeset.
"""
result = _dom_get_attributes(dom_element)
result["tag"] = _dom_get_tag(dom_element)
if include_discussion:
result["discussion"] = _dom_get_discussion(dom_element)
return result
def dom_parse_note(dom_element: Element) -> dict[str, Any]:
"""
Returns NoteData for the note.
"""
result = _dom_get_attributes(dom_element)
result["id"] = xmlbuilder._get_xml_value(dom_element, "id")
result["status"] = xmlbuilder._get_xml_value(dom_element, "status")
result["date_created"] = _parse_date(
xmlbuilder._get_xml_value(dom_element, "date_created")
)
result["date_closed"] = _parse_date(
xmlbuilder._get_xml_value(dom_element, "date_closed")
)
result["comments"] = _dom_get_comments(dom_element)
return result
def _dom_get_attributes(dom_element: Element) -> dict[str, Any]:
"""
Returns a formated dictionnary of attributes of a dom_element.
"""
def is_true(v: str) -> bool:
return v == "true"
attribute_mapping: dict[str, Any] = {
"uid": int,
"changeset": int,
"version": int,
"id": int,
"lat": float,
"lon": float,
"open": is_true,
"visible": is_true,
"ref": int,
"comments_count": int,
"timestamp": _parse_date,
"created_at": _parse_date,
"closed_at": _parse_date,
"date": _parse_date,
}
result: dict[str, Any] = {}
for k, v in dom_element.attributes.items():
try:
result[k] = attribute_mapping[k](v)
except KeyError:
result[k] = v
return result
def _dom_get_tag(dom_element: Element) -> dict[str, str]:
"""
Returns the dictionnary of tags of a dom_element.
"""
result: dict[str, str] = {}
for t in dom_element.getElementsByTagName("tag"):
k = t.attributes["k"].value
v = t.attributes["v"].value
result[k] = v
return result
def _dom_get_nd(dom_element: Element) -> list[int]:
"""
Returns the list of nodes of a dom_element.
"""
result: list[int] = []
for t in dom_element.getElementsByTagName("nd"):
result.append(int(int(t.attributes["ref"].value)))
return result
def _dom_get_discussion(dom_element: Element) -> list[dict[str, Any]]:
"""
Returns the dictionnary of comments of a dom_element.
"""
result: list[dict[str, Any]] = []
try:
discussion = dom_element.getElementsByTagName("discussion")[0]
for t in discussion.getElementsByTagName("comment"):
comment = _dom_get_attributes(t)
comment["text"] = xmlbuilder._get_xml_value(t, "text")
result.append(comment)
except IndexError:
pass
return result
def _dom_get_comments(dom_element: Element) -> list[dict[str, Any]]:
"""
Returns the list of comments of a dom_element.
"""
result: list[dict[str, Any]] = []
for t in dom_element.getElementsByTagName("comment"):
comment: dict[str, Any] = {}
comment["date"] = _parse_date(xmlbuilder._get_xml_value(t, "date"))
comment["action"] = xmlbuilder._get_xml_value(t, "action")
comment["text"] = xmlbuilder._get_xml_value(t, "text")
comment["html"] = xmlbuilder._get_xml_value(t, "html")
comment["uid"] = xmlbuilder._get_xml_value(t, "uid")
comment["user"] = xmlbuilder._get_xml_value(t, "user")
result.append(comment)
return result
def _dom_get_member(dom_element: Element) -> list[dict[str, Any]]:
"""
Returns a list of relation members.
"""
result: list[dict[str, Any]] = []
for m in dom_element.getElementsByTagName("member"):
result.append(_dom_get_attributes(m))
return result
def _parse_date(date_string: Optional[str]) -> Union[datetime, str, None]:
date_formats = ["%Y-%m-%d %H:%M:%S UTC", "%Y-%m-%dT%H:%M:%SZ"]
for date_format in date_formats:
try:
result = datetime.strptime(date_string, date_format) # type: ignore[arg-type] # noqa: E501
return result
except (ValueError, TypeError):
logger.debug(f"{date_string} does not match {date_format}")
return date_string
|