georss.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. # Support for the GeoRSS format
  2. # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
  3. # Copyright 2002-2008 Mark Pilgrim
  4. # All rights reserved.
  5. #
  6. # This file is a part of feedparser.
  7. #
  8. # Redistribution and use in source and binary forms, with or without
  9. # modification, are permitted provided that the following conditions are met:
  10. #
  11. # * Redistributions of source code must retain the above copyright notice,
  12. # this list of conditions and the following disclaimer.
  13. # * Redistributions in binary form must reproduce the above copyright notice,
  14. # this list of conditions and the following disclaimer in the documentation
  15. # and/or other materials provided with the distribution.
  16. #
  17. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
  18. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. # POSSIBILITY OF SUCH DAMAGE.
  28. # Required for Python 3.6 compatibility.
  29. from __future__ import generator_stop
  30. from ..util import FeedParserDict
  31. class Namespace(object):
  32. supported_namespaces = {
  33. 'http://www.w3.org/2003/01/geo/wgs84_pos#': 'geo',
  34. 'http://www.georss.org/georss': 'georss',
  35. 'http://www.opengis.net/gml': 'gml',
  36. }
  37. def __init__(self):
  38. self.ingeometry = 0
  39. super(Namespace, self).__init__()
  40. def _start_georssgeom(self, attrs_d):
  41. self.push('geometry', 0)
  42. context = self._get_context()
  43. context['where'] = FeedParserDict()
  44. _start_georss_point = _start_georssgeom
  45. _start_georss_line = _start_georssgeom
  46. _start_georss_polygon = _start_georssgeom
  47. _start_georss_box = _start_georssgeom
  48. def _save_where(self, geometry):
  49. context = self._get_context()
  50. context['where'].update(geometry)
  51. def _end_georss_point(self):
  52. geometry = _parse_georss_point(self.pop('geometry'))
  53. if geometry:
  54. self._save_where(geometry)
  55. def _end_georss_line(self):
  56. geometry = _parse_georss_line(self.pop('geometry'))
  57. if geometry:
  58. self._save_where(geometry)
  59. def _end_georss_polygon(self):
  60. this = self.pop('geometry')
  61. geometry = _parse_georss_polygon(this)
  62. if geometry:
  63. self._save_where(geometry)
  64. def _end_georss_box(self):
  65. geometry = _parse_georss_box(self.pop('geometry'))
  66. if geometry:
  67. self._save_where(geometry)
  68. def _start_where(self, attrs_d):
  69. self.push('where', 0)
  70. context = self._get_context()
  71. context['where'] = FeedParserDict()
  72. _start_georss_where = _start_where
  73. def _parse_srs_attrs(self, attrs_d):
  74. srs_name = attrs_d.get('srsname')
  75. try:
  76. srs_dimension = int(attrs_d.get('srsdimension', '2'))
  77. except ValueError:
  78. srs_dimension = 2
  79. context = self._get_context()
  80. if 'where' not in context:
  81. context['where'] = {}
  82. context['where']['srsName'] = srs_name
  83. context['where']['srsDimension'] = srs_dimension
  84. def _start_gml_point(self, attrs_d):
  85. self._parse_srs_attrs(attrs_d)
  86. self.ingeometry = 1
  87. self.push('geometry', 0)
  88. def _start_gml_linestring(self, attrs_d):
  89. self._parse_srs_attrs(attrs_d)
  90. self.ingeometry = 'linestring'
  91. self.push('geometry', 0)
  92. def _start_gml_polygon(self, attrs_d):
  93. self._parse_srs_attrs(attrs_d)
  94. self.push('geometry', 0)
  95. def _start_gml_exterior(self, attrs_d):
  96. self.push('geometry', 0)
  97. def _start_gml_linearring(self, attrs_d):
  98. self.ingeometry = 'polygon'
  99. self.push('geometry', 0)
  100. def _start_gml_pos(self, attrs_d):
  101. self.push('pos', 0)
  102. def _end_gml_pos(self):
  103. this = self.pop('pos')
  104. context = self._get_context()
  105. srs_name = context['where'].get('srsName')
  106. srs_dimension = context['where'].get('srsDimension', 2)
  107. swap = True
  108. if srs_name and "EPSG" in srs_name:
  109. epsg = int(srs_name.split(":")[-1])
  110. swap = bool(epsg in _geogCS)
  111. geometry = _parse_georss_point(this, swap=swap, dims=srs_dimension)
  112. if geometry:
  113. self._save_where(geometry)
  114. def _start_gml_poslist(self, attrs_d):
  115. self.push('pos', 0)
  116. def _end_gml_poslist(self):
  117. this = self.pop('pos')
  118. context = self._get_context()
  119. srs_name = context['where'].get('srsName')
  120. srs_dimension = context['where'].get('srsDimension', 2)
  121. swap = True
  122. if srs_name and "EPSG" in srs_name:
  123. epsg = int(srs_name.split(":")[-1])
  124. swap = bool(epsg in _geogCS)
  125. geometry = _parse_poslist(
  126. this, self.ingeometry, swap=swap, dims=srs_dimension)
  127. if geometry:
  128. self._save_where(geometry)
  129. def _end_geom(self):
  130. self.ingeometry = 0
  131. self.pop('geometry')
  132. _end_gml_point = _end_geom
  133. _end_gml_linestring = _end_geom
  134. _end_gml_linearring = _end_geom
  135. _end_gml_exterior = _end_geom
  136. _end_gml_polygon = _end_geom
  137. def _end_where(self):
  138. self.pop('where')
  139. _end_georss_where = _end_where
  140. # GeoRSS geometry parsers. Each return a dict with 'type' and 'coordinates'
  141. # items, or None in the case of a parsing error.
  142. def _parse_poslist(value, geom_type, swap=True, dims=2):
  143. if geom_type == 'linestring':
  144. return _parse_georss_line(value, swap, dims)
  145. elif geom_type == 'polygon':
  146. ring = _parse_georss_line(value, swap, dims)
  147. return {'type': 'Polygon', 'coordinates': (ring['coordinates'],)}
  148. else:
  149. return None
  150. def _gen_georss_coords(value, swap=True, dims=2):
  151. # A generator of (lon, lat) pairs from a string of encoded GeoRSS
  152. # coordinates. Converts to floats and swaps order.
  153. latlons = (float(ll) for ll in value.replace(',', ' ').split())
  154. while True:
  155. try:
  156. t = [next(latlons), next(latlons)][::swap and -1 or 1]
  157. if dims == 3:
  158. t.append(next(latlons))
  159. yield tuple(t)
  160. except StopIteration:
  161. return
  162. def _parse_georss_point(value, swap=True, dims=2):
  163. # A point contains a single latitude-longitude pair, separated by
  164. # whitespace. We'll also handle comma separators.
  165. try:
  166. coords = list(_gen_georss_coords(value, swap, dims))
  167. return {'type': 'Point', 'coordinates': coords[0]}
  168. except (IndexError, ValueError):
  169. return None
  170. def _parse_georss_line(value, swap=True, dims=2):
  171. # A line contains a space separated list of latitude-longitude pairs in
  172. # WGS84 coordinate reference system, with each pair separated by
  173. # whitespace. There must be at least two pairs.
  174. try:
  175. coords = list(_gen_georss_coords(value, swap, dims))
  176. return {'type': 'LineString', 'coordinates': coords}
  177. except (IndexError, ValueError):
  178. return None
  179. def _parse_georss_polygon(value, swap=True, dims=2):
  180. # A polygon contains a space separated list of latitude-longitude pairs,
  181. # with each pair separated by whitespace. There must be at least four
  182. # pairs, with the last being identical to the first (so a polygon has a
  183. # minimum of three actual points).
  184. try:
  185. ring = list(_gen_georss_coords(value, swap, dims))
  186. except (IndexError, ValueError):
  187. return None
  188. if len(ring) < 4:
  189. return None
  190. return {'type': 'Polygon', 'coordinates': (ring,)}
  191. def _parse_georss_box(value, swap=True, dims=2):
  192. # A bounding box is a rectangular region, often used to define the extents
  193. # of a map or a rough area of interest. A box contains two space separate
  194. # latitude-longitude pairs, with each pair separated by whitespace. The
  195. # first pair is the lower corner, the second is the upper corner.
  196. try:
  197. coords = list(_gen_georss_coords(value, swap, dims))
  198. return {'type': 'Box', 'coordinates': tuple(coords)}
  199. except (IndexError, ValueError):
  200. return None
  201. # The list of EPSG codes for geographic (latitude/longitude) coordinate
  202. # systems to support decoding of GeoRSS GML profiles.
  203. _geogCS = [
  204. 3819, 3821, 3824, 3889, 3906, 4001, 4002, 4003, 4004, 4005, 4006, 4007, 4008,
  205. 4009, 4010, 4011, 4012, 4013, 4014, 4015, 4016, 4018, 4019, 4020, 4021, 4022,
  206. 4023, 4024, 4025, 4027, 4028, 4029, 4030, 4031, 4032, 4033, 4034, 4035, 4036,
  207. 4041, 4042, 4043, 4044, 4045, 4046, 4047, 4052, 4053, 4054, 4055, 4075, 4081,
  208. 4120, 4121, 4122, 4123, 4124, 4125, 4126, 4127, 4128, 4129, 4130, 4131, 4132,
  209. 4133, 4134, 4135, 4136, 4137, 4138, 4139, 4140, 4141, 4142, 4143, 4144, 4145,
  210. 4146, 4147, 4148, 4149, 4150, 4151, 4152, 4153, 4154, 4155, 4156, 4157, 4158,
  211. 4159, 4160, 4161, 4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 4170, 4171,
  212. 4172, 4173, 4174, 4175, 4176, 4178, 4179, 4180, 4181, 4182, 4183, 4184, 4185,
  213. 4188, 4189, 4190, 4191, 4192, 4193, 4194, 4195, 4196, 4197, 4198, 4199, 4200,
  214. 4201, 4202, 4203, 4204, 4205, 4206, 4207, 4208, 4209, 4210, 4211, 4212, 4213,
  215. 4214, 4215, 4216, 4218, 4219, 4220, 4221, 4222, 4223, 4224, 4225, 4226, 4227,
  216. 4228, 4229, 4230, 4231, 4232, 4233, 4234, 4235, 4236, 4237, 4238, 4239, 4240,
  217. 4241, 4242, 4243, 4244, 4245, 4246, 4247, 4248, 4249, 4250, 4251, 4252, 4253,
  218. 4254, 4255, 4256, 4257, 4258, 4259, 4260, 4261, 4262, 4263, 4264, 4265, 4266,
  219. 4267, 4268, 4269, 4270, 4271, 4272, 4273, 4274, 4275, 4276, 4277, 4278, 4279,
  220. 4280, 4281, 4282, 4283, 4284, 4285, 4286, 4287, 4288, 4289, 4291, 4292, 4293,
  221. 4294, 4295, 4296, 4297, 4298, 4299, 4300, 4301, 4302, 4303, 4304, 4306, 4307,
  222. 4308, 4309, 4310, 4311, 4312, 4313, 4314, 4315, 4316, 4317, 4318, 4319, 4322,
  223. 4324, 4326, 4463, 4470, 4475, 4483, 4490, 4555, 4558, 4600, 4601, 4602, 4603,
  224. 4604, 4605, 4606, 4607, 4608, 4609, 4610, 4611, 4612, 4613, 4614, 4615, 4616,
  225. 4617, 4618, 4619, 4620, 4621, 4622, 4623, 4624, 4625, 4626, 4627, 4628, 4629,
  226. 4630, 4631, 4632, 4633, 4634, 4635, 4636, 4637, 4638, 4639, 4640, 4641, 4642,
  227. 4643, 4644, 4645, 4646, 4657, 4658, 4659, 4660, 4661, 4662, 4663, 4664, 4665,
  228. 4666, 4667, 4668, 4669, 4670, 4671, 4672, 4673, 4674, 4675, 4676, 4677, 4678,
  229. 4679, 4680, 4681, 4682, 4683, 4684, 4685, 4686, 4687, 4688, 4689, 4690, 4691,
  230. 4692, 4693, 4694, 4695, 4696, 4697, 4698, 4699, 4700, 4701, 4702, 4703, 4704,
  231. 4705, 4706, 4707, 4708, 4709, 4710, 4711, 4712, 4713, 4714, 4715, 4716, 4717,
  232. 4718, 4719, 4720, 4721, 4722, 4723, 4724, 4725, 4726, 4727, 4728, 4729, 4730,
  233. 4731, 4732, 4733, 4734, 4735, 4736, 4737, 4738, 4739, 4740, 4741, 4742, 4743,
  234. 4744, 4745, 4746, 4747, 4748, 4749, 4750, 4751, 4752, 4753, 4754, 4755, 4756,
  235. 4757, 4758, 4759, 4760, 4761, 4762, 4763, 4764, 4765, 4801, 4802, 4803, 4804,
  236. 4805, 4806, 4807, 4808, 4809, 4810, 4811, 4813, 4814, 4815, 4816, 4817, 4818,
  237. 4819, 4820, 4821, 4823, 4824, 4901, 4902, 4903, 4904, 4979,
  238. ]