iso8601.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
  2. # Copyright 2002-2008 Mark Pilgrim
  3. # All rights reserved.
  4. #
  5. # This file is a part of feedparser.
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions are met:
  9. #
  10. # * Redistributions of source code must retain the above copyright notice,
  11. # this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above copyright notice,
  13. # this list of conditions and the following disclaimer in the documentation
  14. # and/or other materials provided with the distribution.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. import re
  28. import time
  29. # ISO-8601 date parsing routines written by Fazal Majid.
  30. # The ISO 8601 standard is very convoluted and irregular - a full ISO 8601
  31. # parser is beyond the scope of feedparser and would be a worthwhile addition
  32. # to the Python library.
  33. # A single regular expression cannot parse ISO 8601 date formats into groups
  34. # as the standard is highly irregular (for instance is 030104 2003-01-04 or
  35. # 0301-04-01), so we use templates instead.
  36. # Please note the order in templates is significant because we need a
  37. # greedy match.
  38. _iso8601_tmpl = [
  39. 'YYYY-?MM-?DD',
  40. 'YYYY-0MM?-?DD',
  41. 'YYYY-MM',
  42. 'YYYY-?OOO',
  43. 'YY-?MM-?DD',
  44. 'YY-?OOO',
  45. 'YYYY',
  46. '-YY-?MM',
  47. '-OOO',
  48. '-YY',
  49. '--MM-?DD',
  50. '--MM',
  51. '---DD',
  52. 'CC',
  53. '',
  54. ]
  55. _iso8601_re = [
  56. tmpl.replace(
  57. 'YYYY', r'(?P<year>\d{4})').replace(
  58. 'YY', r'(?P<year>\d\d)').replace(
  59. 'MM', r'(?P<month>[01]\d)').replace(
  60. 'DD', r'(?P<day>[0123]\d)').replace(
  61. 'OOO', r'(?P<ordinal>[0123]\d\d)').replace(
  62. 'CC', r'(?P<century>\d\d$)')
  63. + r'(T?(?P<hour>\d{2}):(?P<minute>\d{2})'
  64. + r'(:(?P<second>\d{2}))?'
  65. + r'(\.(?P<fracsecond>\d+))?'
  66. + r'(?P<tz>[+-](?P<tzhour>\d{2})(:(?P<tzmin>\d{2}))?|Z)?)?'
  67. for tmpl in _iso8601_tmpl]
  68. try:
  69. del tmpl
  70. except NameError:
  71. pass
  72. _iso8601_matches = [re.compile(regex).match for regex in _iso8601_re]
  73. try:
  74. del regex
  75. except NameError:
  76. pass
  77. def _parse_date_iso8601(date_string):
  78. """Parse a variety of ISO-8601-compatible formats like 20040105"""
  79. m = None
  80. for _iso8601_match in _iso8601_matches:
  81. m = _iso8601_match(date_string)
  82. if m:
  83. break
  84. if not m:
  85. return
  86. if m.span() == (0, 0):
  87. return
  88. params = m.groupdict()
  89. ordinal = params.get('ordinal', 0)
  90. if ordinal:
  91. ordinal = int(ordinal)
  92. else:
  93. ordinal = 0
  94. year = params.get('year', '--')
  95. if not year or year == '--':
  96. year = time.gmtime()[0]
  97. elif len(year) == 2:
  98. # ISO 8601 assumes current century, i.e. 93 -> 2093, NOT 1993
  99. year = 100 * int(time.gmtime()[0] / 100) + int(year)
  100. else:
  101. year = int(year)
  102. month = params.get('month', '-')
  103. if not month or month == '-':
  104. # ordinals are NOT normalized by mktime, we simulate them
  105. # by setting month=1, day=ordinal
  106. if ordinal:
  107. month = 1
  108. else:
  109. month = time.gmtime()[1]
  110. month = int(month)
  111. day = params.get('day', 0)
  112. if not day:
  113. # see above
  114. if ordinal:
  115. day = ordinal
  116. elif params.get('century', 0) or \
  117. params.get('year', 0) or params.get('month', 0):
  118. day = 1
  119. else:
  120. day = time.gmtime()[2]
  121. else:
  122. day = int(day)
  123. # special case of the century - is the first year of the 21st century
  124. # 2000 or 2001 ? The debate goes on...
  125. if 'century' in params:
  126. year = (int(params['century']) - 1) * 100 + 1
  127. # in ISO 8601 most fields are optional
  128. for field in ['hour', 'minute', 'second', 'tzhour', 'tzmin']:
  129. if not params.get(field, None):
  130. params[field] = 0
  131. hour = int(params.get('hour', 0))
  132. minute = int(params.get('minute', 0))
  133. second = int(float(params.get('second', 0)))
  134. # weekday is normalized by mktime(), we can ignore it
  135. weekday = 0
  136. daylight_savings_flag = -1
  137. tm = [year, month, day, hour, minute, second, weekday,
  138. ordinal, daylight_savings_flag]
  139. # ISO 8601 time zone adjustments
  140. tz = params.get('tz')
  141. if tz and tz != 'Z':
  142. if tz[0] == '-':
  143. tm[3] += int(params.get('tzhour', 0))
  144. tm[4] += int(params.get('tzmin', 0))
  145. elif tz[0] == '+':
  146. tm[3] -= int(params.get('tzhour', 0))
  147. tm[4] -= int(params.get('tzmin', 0))
  148. else:
  149. return None
  150. # Python's time.mktime() is a wrapper around the ANSI C mktime(3c)
  151. # which is guaranteed to normalize d/m/y/h/m/s.
  152. # Many implementations have bugs, but we'll pretend they don't.
  153. return time.localtime(time.mktime(tuple(tm)))