Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/02_concepts/05_proxy_management.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ When no `session_id` is provided, your custom proxy URLs are rotated round-robin

### Apify proxy configuration

With Apify Proxy, you can select specific proxy groups to use, or countries to connect from. This allows you to get better proxy performance after some initial research.
With Apify Proxy, you can select specific proxy groups to use, or countries to connect from. For even finer control, you can also target a specific subdivision (e.g. a US state) using the `subdivision_code` parameter alongside `country_code`. This allows you to get better proxy performance after some initial research.

<RunnableCodeBlock className="language-python" language="python">
{ApifyProxyConfig}
</RunnableCodeBlock>

Now your connections using proxy_url will use only Residential proxies from the US. Note that you must first get access to a proxy group before you are able to use it. You can find your available proxy groups in the [proxy dashboard](https://console.apify.com/proxy).
Now your connections using proxy_url will use only Residential proxies from California, US. The `subdivision_code` accepts a 1–3 character ISO 3166-2 code (e.g. `CA` for California) and currently only works for the United States (`country_code='US'`). Note that you must first get access to a proxy group before you are able to use it. You can find your available proxy groups in the [proxy dashboard](https://console.apify.com/proxy).

If you don't specify any proxy groups, automatic proxy selection will be used.

Expand Down
1 change: 1 addition & 0 deletions docs/02_concepts/code/05_apify_proxy_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ async def main() -> None:
proxy_cfg = await Actor.create_proxy_configuration(
groups=['RESIDENTIAL'],
country_code='US',
subdivision_code='CA',
)

if not proxy_cfg:
Expand Down
5 changes: 5 additions & 0 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,7 @@ async def create_proxy_configuration(
password: str | None = None,
groups: list[str] | None = None,
country_code: str | None = None,
subdivision_code: str | None = None,
proxy_urls: list[str | None] | None = None,
new_url_function: _NewUrlFunction | None = None,
) -> ProxyConfiguration | None:
Expand All @@ -1332,6 +1333,8 @@ async def create_proxy_configuration(
if available.
groups: Proxy groups which the Apify Proxy should use, if provided.
country_code: Country which the Apify Proxy should use, if provided.
subdivision_code: Subdivision (e.g. US state) which the Apify Proxy should use, if provided.
Requires `country_code` to be set. Two-letter ISO 3166-2 code (e.g. `CA` for California).
proxy_urls: Custom proxy server URLs which should be rotated through.
new_url_function: Function which returns a custom proxy URL to be used.

Expand All @@ -1342,6 +1345,7 @@ async def create_proxy_configuration(
if actor_proxy_input is not None:
if actor_proxy_input.get('useApifyProxy', False):
country_code = country_code or actor_proxy_input.get('apifyProxyCountry')
subdivision_code = subdivision_code or actor_proxy_input.get('apifyProxySubdivision')
groups = groups or actor_proxy_input.get('apifyProxyGroups')
else:
proxy_urls = actor_proxy_input.get('proxyUrls', [])
Expand All @@ -1352,6 +1356,7 @@ async def create_proxy_configuration(
password=password,
groups=groups,
country_code=country_code,
subdivision_code=subdivision_code,
proxy_urls=proxy_urls,
new_url_function=new_url_function,
_actor_config=self.configuration,
Expand Down
28 changes: 26 additions & 2 deletions src/apify/_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)
SUBDIVISION_CODE_REGEX = re.compile(r'^[A-Z0-9]{1,3}$')
SESSION_ID_MAX_LENGTH = 50


Expand Down Expand Up @@ -89,6 +91,13 @@ class ProxyInfo(CrawleeProxyInfo):
This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
"""

subdivision_code: str | None = None
"""If set, the proxy will use IP addresses geolocated to the specified subdivision (e.g. US state).
Requires `country_code` to be set. The subdivision code must be a 1-3 character ISO 3166-2 code
consisting of uppercase letters and digits (e.g. `CA` for California). Currently only supported for
the United States (`country_code='US'`).
"""


@docs_group('Configuration')
class ProxyConfiguration(CrawleeProxyConfiguration):
Expand All @@ -111,6 +120,7 @@ def __init__(
password: str | None = None,
groups: list[str] | None = None,
country_code: str | None = None,
subdivision_code: str | None = None,
proxy_urls: list[str | None] | None = None,
new_url_function: _NewUrlFunction | None = None,
tiered_proxy_urls: list[list[str | None]] | None = None,
Expand All @@ -126,6 +136,9 @@ def __init__(
if available.
groups: Proxy groups which the Apify Proxy should use, if provided.
country_code: Country which the Apify Proxy should use, if provided.
subdivision_code: Subdivision (e.g. US state) which the Apify Proxy should use, if provided.
Requires `country_code` to be set. 1-3 character ISO 3166-2 code of uppercase letters/digits
(e.g. `CA` for California).
proxy_urls: Custom proxy server URLs which should be rotated through.
new_url_function: Function which returns a custom proxy URL to be used.
tiered_proxy_urls: Proxy URLs arranged into tiers
Expand All @@ -141,11 +154,17 @@ def __init__(
country_code = str(country_code)
_check(country_code, label='country_code', pattern=COUNTRY_CODE_REGEX)

if subdivision_code:
if not country_code:
raise ValueError('ProxyConfiguration: Cannot set "subdivision_code" without "country_code".')
subdivision_code = str(subdivision_code)
_check(subdivision_code, label='subdivision_code', pattern=SUBDIVISION_CODE_REGEX)

if (proxy_urls or new_url_function or tiered_proxy_urls) and (groups or country_code):
raise ValueError(
'Cannot combine custom proxies with Apify Proxy!'
' It is not allowed to set "proxy_urls" or "new_url_function" combined with'
' "groups" or "country_code".'
' "groups", "country_code", or "subdivision_code".'
)

if proxy_urls and any('apify.com' in (url or '') for url in proxy_urls):
Expand Down Expand Up @@ -176,6 +195,7 @@ def __init__(

self._groups = list(groups) if groups else []
self._country_code = country_code
self._subdivision_code = subdivision_code

async def initialize(self) -> None:
"""Check if using proxy, if so, check the access.
Expand Down Expand Up @@ -247,6 +267,7 @@ async def new_proxy_info(
proxy_tier=proxy_info.proxy_tier,
groups=self._groups,
country_code=self._country_code or None,
subdivision_code=self._subdivision_code or None,
)

return ProxyInfo(
Expand Down Expand Up @@ -309,7 +330,10 @@ def _get_username(self, session_id: int | str | None = None) -> str:
if session_id is not None:
parts.append(f'session-{session_id}')
if self._country_code:
parts.append(f'country-{self._country_code}')
if self._subdivision_code:
parts.append(f'country-{self._country_code}_{self._subdivision_code}')
else:
parts.append(f'country-{self._country_code}')

if not parts:
return 'auto'
Expand Down
19 changes: 17 additions & 2 deletions tests/unit/actor/test_actor_create_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,22 @@ def request_handler(request: Request, response: Response) -> Response:
== f'http://groups-{"+".join(groups)},country-{country_code}:{DUMMY_PASSWORD}@proxy.apify.com:8000'
)

assert len(patched_apify_client.calls['user']['get']) == 2 # ty: ignore[unresolved-attribute]
assert call_mock.call_count == 2
subdivision = 'CA'
proxy_configuration = await Actor.create_proxy_configuration(
actor_proxy_input={
'useApifyProxy': True,
'apifyProxyGroups': groups,
'apifyProxyCountry': country_code,
'apifyProxySubdivision': subdivision,
}
)
assert proxy_configuration is not None
assert (
await proxy_configuration.new_url()
== f'http://groups-{"+".join(groups)},country-{country_code}_{subdivision}:{DUMMY_PASSWORD}@proxy.apify.com:8000'
)

assert len(patched_apify_client.calls['user']['get']) == 3 # ty: ignore[unresolved-attribute]
assert call_mock.call_count == 3

await Actor.exit()
33 changes: 33 additions & 0 deletions tests/unit/test_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ def test_invalid_arguments() -> None:
with pytest.raises(ValueError, match=match_pattern):
ProxyConfiguration(country_code=invalid_country_code) # ty: ignore[invalid-argument-type]

for invalid_subdivision_code in ['California', 'ca', 'ABCD', 'A1b']:
escaped = re.escape(str(invalid_subdivision_code))
match_pattern = f'Value {escaped} of argument subdivision_code does not match pattern'
with pytest.raises(ValueError, match=match_pattern):
ProxyConfiguration(country_code='US', subdivision_code=invalid_subdivision_code)

with pytest.raises(ValueError, match=r'Cannot set "subdivision_code" without "country_code"'):
ProxyConfiguration(subdivision_code='CA')

with pytest.raises(ValueError, match=r'Exactly one of .* must be specified'):
ProxyConfiguration(
proxy_urls=['http://proxy.com:1111'],
Expand All @@ -105,6 +114,9 @@ def test_invalid_arguments() -> None:
new_url_function=lambda session_id=None, request=None: 'http://proxy.com:2222', groups=['GROUP1']
)

with pytest.raises(ValueError, match=r'Cannot combine custom proxies with Apify Proxy'):
ProxyConfiguration(proxy_urls=['http://proxy.com:1111'], country_code='US', subdivision_code='CA')


async def test_new_url_basic() -> None:
groups = ['GROUP1', 'GROUP2']
Expand All @@ -124,6 +136,26 @@ async def test_new_url_basic() -> None:
assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}'


async def test_new_url_with_subdivision() -> None:
groups = ['RESIDENTIAL']
password = 'abcd1234'
country_code = 'US'
subdivision = 'CA'
proxy_configuration = ProxyConfiguration(
groups=groups,
password=password,
country_code=country_code,
subdivision_code=subdivision,
)
proxy_url = await proxy_configuration.new_url()

expected_username = f'groups-{"+".join(groups)},country-{country_code}_{subdivision}'
expected_hostname = 'proxy.apify.com'
expected_port = 8000

assert proxy_url == f'http://{expected_username}:{password}@{expected_hostname}:{expected_port}'


async def test_new_url_with_session_ids() -> None:
groups = ['GROUP1', 'GROUP2']
password = 'abcd1234'
Expand Down Expand Up @@ -287,6 +319,7 @@ async def test_new_proxy_info_basic_construction() -> None:
'port': expected_port,
'groups': groups,
'country_code': country_code,
'subdivision_code': None,
'username': expected_username,
'password': password,
'proxy_tier': None,
Expand Down
Loading