From 528352edd2e720ccd2d2f1fb9f540e047c2fa965 Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Tue, 12 May 2026 08:31:39 +1000 Subject: [PATCH 1/3] Drop Worker-id suffix from UA --- internal/crawler/crawler.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go index 1c93f2af..5175a773 100644 --- a/internal/crawler/crawler.go +++ b/internal/crawler/crawler.go @@ -223,7 +223,6 @@ func buildRequestAttemptDiagnostics( type Crawler struct { config *Config colly *colly.Collector - id string metricsMap *sync.Map aia *aiaTransport probeClient *http.Client // Shared to avoid per-call transport leaks. @@ -291,23 +290,13 @@ func (t *tracingRoundTripper) RoundTrip(req *http.Request) (*http.Response, erro return t.transport.RoundTrip(req) } -func New(config *Config, id ...string) *Crawler { +func New(config *Config) *Crawler { if config == nil { config = DefaultConfig() } - crawlerID := "" - if len(id) > 0 { - crawlerID = id[0] - } - - userAgent := config.UserAgent - if crawlerID != "" { - userAgent = fmt.Sprintf("%s Worker-%s", config.UserAgent, crawlerID) - } - c := colly.NewCollector( - colly.UserAgent(userAgent), + colly.UserAgent(config.UserAgent), colly.MaxDepth(1), colly.Async(true), colly.AllowURLRevisit(), @@ -373,7 +362,6 @@ func New(config *Config, id ...string) *Crawler { return &Crawler{ config: config, colly: c, - id: crawlerID, metricsMap: metricsMap, aia: aiaRT, probeClient: &http.Client{ From 9c4d551767231a73c364b4552ff29b671d90bcf9 Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Tue, 12 May 2026 08:40:51 +1000 Subject: [PATCH 2/3] Add changelog entry for UA cleanup --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa274966..83cb119b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,11 @@ On merge, CI will: ## [Unreleased] -_Add unreleased changes here._ +### Changed + +- Crawler user agent is now always exactly `config.UserAgent`. Dropped the dead + ` Worker-` suffix branch in `crawler.New` along with the unused variadic + ID parameter and struct field. ## Full changelog history From c4cb28360c5d4a91513935dd5415b6b9ac9e6f55 Mon Sep 17 00:00:00 2001 From: Simon Smallchua <40650011+simonsmallchua@users.noreply.github.com> Date: Tue, 12 May 2026 08:45:19 +1000 Subject: [PATCH 3/3] Fix MD038 in changelog code span --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83cb119b..8bcdfe9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,8 +31,8 @@ On merge, CI will: ### Changed - Crawler user agent is now always exactly `config.UserAgent`. Dropped the dead - ` Worker-` suffix branch in `crawler.New` along with the unused variadic - ID parameter and struct field. + `Worker-` suffix branch in `crawler.New` along with the unused variadic ID + parameter and struct field. ## Full changelog history