w3dtf.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
  2. # Copyright 2002-2008 Mark Pilgrim
  3. # All rights reserved.
  4. #
  5. # This file is a part of feedparser.
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions are met:
  9. #
  10. # * Redistributions of source code must retain the above copyright notice,
  11. # this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above copyright notice,
  13. # this list of conditions and the following disclaimer in the documentation
  14. # and/or other materials provided with the distribution.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. import datetime
  28. timezonenames = {
  29. 'ut': 0, 'gmt': 0, 'z': 0,
  30. 'adt': -3, 'ast': -4, 'at': -4,
  31. 'edt': -4, 'est': -5, 'et': -5,
  32. 'cdt': -5, 'cst': -6, 'ct': -6,
  33. 'mdt': -6, 'mst': -7, 'mt': -7,
  34. 'pdt': -7, 'pst': -8, 'pt': -8,
  35. 'a': -1, 'n': 1,
  36. 'm': -12, 'y': 12,
  37. }
  38. # W3 date and time format parser
  39. # http://www.w3.org/TR/NOTE-datetime
  40. # Also supports MSSQL-style datetimes as defined at:
  41. # http://msdn.microsoft.com/en-us/library/ms186724.aspx
  42. # (basically, allow a space as a date/time/timezone separator)
  43. def _parse_date_w3dtf(datestr):
  44. if not datestr.strip():
  45. return None
  46. parts = datestr.lower().split('t')
  47. if len(parts) == 1:
  48. # This may be a date only, or may be an MSSQL-style date
  49. parts = parts[0].split()
  50. if len(parts) == 1:
  51. # Treat this as a date only
  52. parts.append('00:00:00z')
  53. elif len(parts) > 2:
  54. return None
  55. date = parts[0].split('-', 2)
  56. if not date or len(date[0]) != 4:
  57. return None
  58. # Ensure that `date` has 3 elements. Using '1' sets the default
  59. # month to January and the default day to the 1st of the month.
  60. date.extend(['1'] * (3 - len(date)))
  61. try:
  62. year, month, day = [int(i) for i in date]
  63. except ValueError:
  64. # `date` may have more than 3 elements or may contain
  65. # non-integer strings.
  66. return None
  67. if parts[1].endswith('z'):
  68. parts[1] = parts[1][:-1]
  69. parts.append('z')
  70. # Append the numeric timezone offset, if any, to parts.
  71. # If this is an MSSQL-style date then parts[2] already contains
  72. # the timezone information, so `append()` will not affect it.
  73. # Add 1 to each value so that if `find()` returns -1 it will be
  74. # treated as False.
  75. loc = parts[1].find('-') + 1 or parts[1].find('+') + 1 or len(parts[1]) + 1
  76. loc = loc - 1
  77. parts.append(parts[1][loc:])
  78. parts[1] = parts[1][:loc]
  79. time = parts[1].split(':', 2)
  80. # Ensure that time has 3 elements. Using '0' means that the
  81. # minutes and seconds, if missing, will default to 0.
  82. time.extend(['0'] * (3 - len(time)))
  83. if parts[2][:1] in ('-', '+'):
  84. try:
  85. tzhour = int(parts[2][1:3])
  86. tzmin = int(parts[2][4:])
  87. except ValueError:
  88. return None
  89. if parts[2].startswith('-'):
  90. tzhour = tzhour * -1
  91. tzmin = tzmin * -1
  92. else:
  93. tzhour = timezonenames.get(parts[2], 0)
  94. tzmin = 0
  95. try:
  96. hour, minute, second = [int(float(i)) for i in time]
  97. except ValueError:
  98. return None
  99. # Create the datetime object and timezone delta objects
  100. try:
  101. stamp = datetime.datetime(year, month, day, hour, minute, second)
  102. except ValueError:
  103. return None
  104. delta = datetime.timedelta(0, 0, 0, 0, tzmin, tzhour)
  105. # Return the date and timestamp in a UTC 9-tuple
  106. try:
  107. return (stamp - delta).utctimetuple()
  108. except (OverflowError, ValueError):
  109. # IronPython throws ValueErrors instead of OverflowErrors
  110. return None