greek.py 3.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
  2. # Copyright 2002-2008 Mark Pilgrim
  3. # All rights reserved.
  4. #
  5. # This file is a part of feedparser.
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions are met:
  9. #
  10. # * Redistributions of source code must retain the above copyright notice,
  11. # this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above copyright notice,
  13. # this list of conditions and the following disclaimer in the documentation
  14. # and/or other materials provided with the distribution.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. import re
  28. from .rfc822 import _parse_date_rfc822
  29. # Unicode strings for Greek date strings
  30. _greek_months = {
  31. '\u0399\u03b1\u03bd': 'Jan', # c9e1ed in iso-8859-7
  32. '\u03a6\u03b5\u03b2': 'Feb', # d6e5e2 in iso-8859-7
  33. '\u039c\u03ac\u03ce': 'Mar', # ccdcfe in iso-8859-7
  34. '\u039c\u03b1\u03ce': 'Mar', # cce1fe in iso-8859-7
  35. '\u0391\u03c0\u03c1': 'Apr', # c1f0f1 in iso-8859-7
  36. '\u039c\u03ac\u03b9': 'May', # ccdce9 in iso-8859-7
  37. '\u039c\u03b1\u03ca': 'May', # cce1fa in iso-8859-7
  38. '\u039c\u03b1\u03b9': 'May', # cce1e9 in iso-8859-7
  39. '\u0399\u03bf\u03cd\u03bd': 'Jun', # c9effded in iso-8859-7
  40. '\u0399\u03bf\u03bd': 'Jun', # c9efed in iso-8859-7
  41. '\u0399\u03bf\u03cd\u03bb': 'Jul', # c9effdeb in iso-8859-7
  42. '\u0399\u03bf\u03bb': 'Jul', # c9f9eb in iso-8859-7
  43. '\u0391\u03cd\u03b3': 'Aug', # c1fde3 in iso-8859-7
  44. '\u0391\u03c5\u03b3': 'Aug', # c1f5e3 in iso-8859-7
  45. '\u03a3\u03b5\u03c0': 'Sep', # d3e5f0 in iso-8859-7
  46. '\u039f\u03ba\u03c4': 'Oct', # cfeaf4 in iso-8859-7
  47. '\u039d\u03bf\u03ad': 'Nov', # cdefdd in iso-8859-7
  48. '\u039d\u03bf\u03b5': 'Nov', # cdefe5 in iso-8859-7
  49. '\u0394\u03b5\u03ba': 'Dec', # c4e5ea in iso-8859-7
  50. }
  51. _greek_wdays = {
  52. '\u039a\u03c5\u03c1': 'Sun', # caf5f1 in iso-8859-7
  53. '\u0394\u03b5\u03c5': 'Mon', # c4e5f5 in iso-8859-7
  54. '\u03a4\u03c1\u03b9': 'Tue', # d4f1e9 in iso-8859-7
  55. '\u03a4\u03b5\u03c4': 'Wed', # d4e5f4 in iso-8859-7
  56. '\u03a0\u03b5\u03bc': 'Thu', # d0e5ec in iso-8859-7
  57. '\u03a0\u03b1\u03c1': 'Fri', # d0e1f1 in iso-8859-7
  58. '\u03a3\u03b1\u03b2': 'Sat', # d3e1e2 in iso-8859-7
  59. }
  60. _greek_date_format_re = re.compile(r'([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)')
  61. def _parse_date_greek(date_string):
  62. """Parse a string according to a Greek 8-bit date format."""
  63. m = _greek_date_format_re.match(date_string)
  64. if not m:
  65. return
  66. wday = _greek_wdays[m.group(1)]
  67. month = _greek_months[m.group(3)]
  68. rfc822date = '%(wday)s, %(day)s %(month)s %(year)s %(hour)s:%(minute)s:%(second)s %(zonediff)s' % \
  69. {
  70. 'wday': wday,
  71. 'day': m.group(2),
  72. 'month': month,
  73. 'year': m.group(4),
  74. 'hour': m.group(5),
  75. 'minute': m.group(6),
  76. 'second': m.group(7),
  77. 'zonediff': m.group(8),
  78. }
  79. return _parse_date_rfc822(rfc822date)