korean.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. # Copyright 2010-2023 Kurt McKee <contactme@kurtmckee.org>
  2. # Copyright 2002-2008 Mark Pilgrim
  3. # All rights reserved.
  4. #
  5. # This file is a part of feedparser.
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions are met:
  9. #
  10. # * Redistributions of source code must retain the above copyright notice,
  11. # this list of conditions and the following disclaimer.
  12. # * Redistributions in binary form must reproduce the above copyright notice,
  13. # this list of conditions and the following disclaimer in the documentation
  14. # and/or other materials provided with the distribution.
  15. #
  16. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
  17. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  26. # POSSIBILITY OF SUCH DAMAGE.
  27. import re
  28. from .w3dtf import _parse_date_w3dtf
  29. # 8-bit date handling routines written by ytrewq1.
  30. _korean_year = '\ub144' # b3e2 in euc-kr
  31. _korean_month = '\uc6d4' # bff9 in euc-kr
  32. _korean_day = '\uc77c' # c0cf in euc-kr
  33. _korean_am = '\uc624\uc804' # bfc0 c0fc in euc-kr
  34. _korean_pm = '\uc624\ud6c4' # bfc0 c8c4 in euc-kr
  35. _korean_onblog_date_re = re.compile(
  36. r'(\d{4})%s\s+(\d{2})%s\s+(\d{2})%s\s+(\d{2}):(\d{2}):(\d{2})'
  37. % (_korean_year, _korean_month, _korean_day)
  38. )
  39. _korean_nate_date_re = re.compile(
  40. r'(\d{4})-(\d{2})-(\d{2})\s+(%s|%s)\s+(\d{,2}):(\d{,2}):(\d{,2})'
  41. % (_korean_am, _korean_pm))
  42. def _parse_date_onblog(dateString):
  43. """Parse a string according to the OnBlog 8-bit date format"""
  44. m = _korean_onblog_date_re.match(dateString)
  45. if not m:
  46. return
  47. w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
  48. {'year': m.group(1), 'month': m.group(2), 'day': m.group(3),
  49. 'hour': m.group(4), 'minute': m.group(5), 'second': m.group(6),
  50. 'zonediff': '+09:00'}
  51. return _parse_date_w3dtf(w3dtfdate)
  52. def _parse_date_nate(dateString):
  53. """Parse a string according to the Nate 8-bit date format"""
  54. m = _korean_nate_date_re.match(dateString)
  55. if not m:
  56. return
  57. hour = int(m.group(5))
  58. ampm = m.group(4)
  59. if ampm == _korean_pm:
  60. hour += 12
  61. hour = str(hour)
  62. if len(hour) == 1:
  63. hour = '0' + hour
  64. w3dtfdate = '%(year)s-%(month)s-%(day)sT%(hour)s:%(minute)s:%(second)s%(zonediff)s' % \
  65. {
  66. 'year': m.group(1),
  67. 'month': m.group(2),
  68. 'day': m.group(3),
  69. 'hour': hour,
  70. 'minute': m.group(6),
  71. 'second': m.group(7),
  72. 'zonediff': '+09:00',
  73. }
  74. return _parse_date_w3dtf(w3dtfdate)