Improve geo bypass mechanism

* Rename options to preffixly match with --geo-verification-proxy
* Introduce _GEO_COUNTRIES for extractors
* Implement faking IP right away for sites with known geo restriction
This commit is contained in:
Sergey M․ 2017-02-19 03:53:23 +07:00 committed by Sergey M
parent 0a840f584c
commit 4248dad92b
13 changed files with 72 additions and 31 deletions

View file

@ -323,10 +323,15 @@ class InfoExtractor(object):
_real_extract() methods and define a _VALID_URL regexp.
Probably, they should also be added to the list of extractors.
_BYPASS_GEO attribute may be set to False in order to disable
_GEO_BYPASS attribute may be set to False in order to disable
geo restriction bypass mechanisms for a particular extractor.
Though it won't disable explicit geo restriction bypass based on
country code provided with geo_bypass_country.
country code provided with geo_bypass_country. (experimental)
_GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
countries for this extractor. One of these countries will be used by
geo restriction bypass mechanism right away in order to bypass
geo restriction, of course, if the mechanism is not disabled. (experimental)
Finally, the _WORKING attribute should be set to False for broken IEs
in order to warn the users and skip the tests.
@ -335,7 +340,8 @@ class InfoExtractor(object):
_ready = False
_downloader = None
_x_forwarded_for_ip = None
_BYPASS_GEO = True
_GEO_BYPASS = True
_GEO_COUNTRIES = None
_WORKING = True
def __init__(self, downloader=None):
@ -370,14 +376,28 @@ class InfoExtractor(object):
def initialize(self):
"""Initializes an instance (authentication, etc)."""
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
self.__initialize_geo_bypass()
if not self._ready:
self._real_initialize()
self._ready = True
def __initialize_geo_bypass(self):
if not self._x_forwarded_for_ip:
country_code = self._downloader.params.get('geo_bypass_country', None)
# If there is no explicit country for geo bypass specified and
# the extractor is known to be geo restricted let's fake IP
# as X-Forwarded-For right away.
if (not country_code and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
self._GEO_COUNTRIES):
country_code = random.choice(self._GEO_COUNTRIES)
if country_code:
self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
if self._downloader.params.get('verbose', False):
self._downloader.to_stdout(
'[debug] Using fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
def extract(self, url):
"""Extracts URL information and returns it in list of dicts."""
try:
@ -389,16 +409,8 @@ class InfoExtractor(object):
ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
return ie_result
except GeoRestrictedError as e:
if (not self._downloader.params.get('geo_bypass_country', None) and
self._BYPASS_GEO and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
e.countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(e.countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
continue
if self.__maybe_fake_ip_and_retry(e.countries):
continue
raise
except ExtractorError:
raise
@ -407,6 +419,19 @@ class InfoExtractor(object):
except (KeyError, StopIteration) as e:
raise ExtractorError('An extractor error has occurred.', cause=e)
def __maybe_fake_ip_and_retry(self, countries):
if (not self._downloader.params.get('geo_bypass_country', None) and
self._GEO_BYPASS and
self._downloader.params.get('geo_bypass', True) and
not self._x_forwarded_for_ip and
countries):
self._x_forwarded_for_ip = GeoUtils.random_ipv4(random.choice(countries))
if self._x_forwarded_for_ip:
self.report_warning(
'Video is geo restricted. Retrying extraction with fake %s IP as X-Forwarded-For.' % self._x_forwarded_for_ip)
return True
return False
def set_downloader(self, downloader):
"""Sets the downloader for this IE."""
self._downloader = downloader