[wsj] Add new extractor (Fixes #4854)

This commit is contained in:
Philipp Hagemeister 2015-02-03 10:58:28 +01:00
parent 1a6373ef39
commit 9bb8e0a3f9
5 changed files with 95 additions and 1 deletions

View file

@ -701,7 +701,7 @@ def unified_strdate(date_str, day_first=True):
# %z (UTC offset) is only supported in python>=3.2
date_str = re.sub(r' ?(\+|-)[0-9]{2}:?[0-9]{2}$', '', date_str)
# Remove AM/PM + timezone
date_str = re.sub(r'(?i)\s*(?:AM|PM)\s+[A-Z]+', '', date_str)
date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
format_expressions = [
'%d %B %Y',