codenameone · shai-almog · May 5, 2026 · May 5, 2026
diff --git a/.github/workflows/skin-designer-devices-update.yml b/.github/workflows/skin-designer-devices-update.yml
@@ -1,22 +1,13 @@
 name: Skin Designer Device DB Update
 
-# Runs only on master so it doesn't fire on every PR that touches the script.
-# Each run pulls a small batch of the latest GSMArena entries and merges them
-# into the bundled devices.json. Slow-and-steady avoids the 30+ minute scrapes
-# that risk IP bans and CI cancellation.
+# Walks GSMArena's curated brand listings once a week and merges any new
+# phones/tablets into the bundled devices.json. The HTML cache is persisted
+# across runs so re-fetches are limited to pages that actually changed.
 on:
   schedule:
-    # Every 6 hours. Each run only scrapes the "latest mobiles" page, so we
-    # accumulate fresh devices without ever doing a multi-thousand-page crawl.
-    - cron: '0 */6 * * *'
+    # Mondays at 03:00 UTC. Weekly is plenty — phones don't release that fast.
+    - cron: '0 3 * * 1'
   workflow_dispatch:
-    inputs:
-      mode:
-        description: "trickle (~50 latest) or full (all curated brands)"
-        required: false
-        default: "trickle"
-        type: choice
-        options: [trickle, full]
 
 permissions:
   actions: write
@@ -28,7 +19,9 @@ jobs:
     # Only run on the canonical repo's master, never on forks or feature branches.
     if: github.repository == 'codenameone/CodenameOne' && github.ref == 'refs/heads/master'
     runs-on: ubuntu-latest
-    timeout-minutes: 25
+    # Cold runs (empty cache) can take 30–60 min to walk the curated brands;
+    # warm runs are typically a few minutes.
+    timeout-minutes: 75
 
     steps:
       - name: Check out repository
@@ -47,17 +40,10 @@ jobs:
           restore-keys: |
             skin-designer-devicedb-cache-
 
-      - name: Trickle scrape (~50 latest)
-        if: github.event_name == 'schedule' || github.event.inputs.mode == 'trickle' || github.event.inputs.mode == ''
+      - name: Scrape curated brands
         run: |
           python3 scripts/skindesigner/tools/devicedb/build_devices_json.py \
-            --mode latest --limit 50 --delay 2.0 --max-pages 1
-
-      - name: Full scrape (all curated brands)
-        if: github.event_name == 'workflow_dispatch' && github.event.inputs.mode == 'full'
-        run: |
-          python3 scripts/skindesigner/tools/devicedb/build_devices_json.py \
-            --mode brands --delay 2.0 --max-pages 12
+            --delay 2.0 --max-pages 12
 
       - name: Bail out if nothing changed
         id: diff
@@ -79,8 +65,8 @@ jobs:
           body: |
             ## Summary
             - Regenerates `scripts/skindesigner/common/src/main/resources/devices.json`
-              by scraping a small batch of the latest devices from GSMArena and
-              merging into the existing catalog.
+              by walking the curated GSMArena brand pages and merging new
+              entries into the existing catalog.
 
             Created automatically by the Skin Designer device DB workflow.
           base: master

diff --git a/scripts/skindesigner/tools/devicedb/README.md b/scripts/skindesigner/tools/devicedb/README.md
@@ -82,11 +82,11 @@ curated list misses a brand someone needs):
 
 ## Refreshing in CI
 
-`.github/workflows/skin-designer-devices-update.yml` runs the scraper on the
-1st of each month and opens an automated PR if the JSON drifted. The HTML
-cache is persisted across runs via `actions/cache` so the scrape only
-re-fetches phones whose pages changed. Run it manually via the Actions tab
-when you want a fresh dump.
+`.github/workflows/skin-designer-devices-update.yml` runs the scraper every
+Monday at 03:00 UTC and opens an automated PR only when device records
+actually change. The HTML cache is persisted across runs via `actions/cache`
+so scrapes after the first cold run only re-fetch phones whose pages
+changed. Trigger it manually via the Actions tab when you want a fresh dump.
 
 ## Notes / caveats
 

diff --git a/scripts/skindesigner/tools/devicedb/build_devices_json.py b/scripts/skindesigner/tools/devicedb/build_devices_json.py
@@ -331,6 +331,20 @@ def merge(existing: list[dict], fresh: list[dict]) -> list[dict]:
     return list(by_id.values())
 
 
+def _devices_changed(existing: list[dict], merged: list[dict]) -> bool:
+    """True iff the device records (ignoring order) actually differ."""
+    if len(existing) != len(merged):
+        return True
+    old_by_id = {r["id"]: r for r in existing}
+    new_by_id = {r["id"]: r for r in merged}
+    if old_by_id.keys() != new_by_id.keys():
+        return True
+    for k, old in old_by_id.items():
+        if old != new_by_id[k]:
+            return True
+    return False
+
+
 def load_existing(path: str) -> list[dict]:
     if not os.path.exists(path):
         return []
@@ -342,62 +356,15 @@ def load_existing(path: str) -> list[dict]:
         return []
 
 
-RE_LATEST_LINK = re.compile(
-    r'<a href="([a-z0-9_()]+-\d+\.php)"[^>]*>\s*<img[^>]*alt="([^"]+)"[^>]*>\s*</a>',
-    re.I,
-)
-
-
-def walk_latest(*, max_pages: int = 1, delay: float = 1.0) -> Iterable[tuple[str, str, str]]:
-    """Yields (phone_url, brand, model) from latest-mobiles.php3.
-
-    The "latest" page lists newly-added devices across every brand. It's the
-    cheapest source of fresh data — one or two listing pages cover the last
-    few weeks of releases.
-    """
-    seen_pages: set[str] = set()
-    queue: list[str] = ["latest-mobiles.php3"]
-    pages_fetched = 0
-    while queue and pages_fetched < max_pages:
-        page = queue.pop(0)
-        if page in seen_pages:
-            continue
-        seen_pages.add(page)
-        pages_fetched += 1
-        try:
-            html = http_get(BASE + page, delay=delay)
-        except RuntimeError as err:
-            sys.stderr.write(f"  ! latest page {page} failed: {err}\n")
-            continue
-        for m in RE_LATEST_LINK.finditer(html):
-            phone_url = m.group(1)
-            if phone_url.endswith("-review.php"):
-                continue
-            alt = unescape(m.group(2)).strip()
-            # alt is "<Brand> <Model>"; split on first space to recover.
-            parts = alt.split(" ", 1)
-            brand = parts[0] if parts else "Unknown"
-            model = parts[1] if len(parts) > 1 else alt
-            yield phone_url, brand, model
-        # Pagination: latest-mobiles-pN.php
-        for m in RE_NAV_PAGE.finditer(html):
-            np = m.group(1)
-            if np.startswith("latest-mobiles") and np not in seen_pages:
-                queue.append(np)
-
-
 def main() -> int:
     ap = argparse.ArgumentParser(description=__doc__,
                                   formatter_class=argparse.RawDescriptionHelpFormatter)
-    ap.add_argument("--mode", choices=("brands", "latest"), default="brands",
-                     help="brands = walk per-brand listings (large/slow); "
-                          "latest = scrape recent additions only (fast/trickle)")
     ap.add_argument("--brands", default=",".join(DEFAULT_BRANDS),
                      help="Comma-separated brand slugs (no .php). Use --full to walk all brands instead.")
     ap.add_argument("--full", action="store_true",
-                     help="Discover and walk every brand on makers.php3 (only with --mode brands)")
+                     help="Discover and walk every brand on makers.php3")
     ap.add_argument("--max-pages", type=int, default=10,
-                     help="Cap on listing pages walked per brand (or per latest run)")
+                     help="Cap on listing pages walked per brand")
     ap.add_argument("--min-year", type=int, default=2014,
                      help="Drop devices announced before this year")
     ap.add_argument("--delay", type=float, default=1.0,
@@ -415,11 +382,25 @@ def main() -> int:
     seen: set[str] = set()
     total_phones = 0
 
-    if args.mode == "latest":
-        sys.stderr.write(f"Trickle-scraping latest-mobiles "
-                          f"(max-pages={args.max_pages}, limit={args.limit or 'none'}, "
-                          f"delay={args.delay}s)…\n")
-        for url, brand, model in walk_latest(max_pages=args.max_pages, delay=args.delay):
+    if args.full:
+        sys.stderr.write("Discovering all brands…\n")
+        all_brands = discover_brands()
+        target = [(s, l) for s, l in all_brands if s.endswith(tuple(f"-{i}" for i in range(1000)))]
+        if not target:
+            target = all_brands
+    else:
+        slugs = [b.strip() for b in args.brands.split(",") if b.strip()]
+        try:
+            label_map = dict(discover_brands())
+        except Exception:
+            label_map = {}
+        target = [(s, brand_label_from_slug(s, label_map)) for s in slugs]
+
+    sys.stderr.write(f"Scraping {len(target)} brand(s) at {args.delay}s/request…\n")
+
+    for slug, label in target:
+        sys.stderr.write(f"\n=== {label} [{slug}] ===\n")
+        for url, model in walk_brand(slug, max_pages=args.max_pages, delay=args.delay):
             if url in seen:
                 continue
             seen.add(url)
@@ -429,60 +410,33 @@ def main() -> int:
                 sys.stderr.write(f"  ! skip {url}: {err}\n")
                 continue
             specs = parse_phone_page(html)
-            rec = normalise(brand, model, specs)
+            rec = normalise(label, model, specs)
             if rec is not None and rec["year"] >= args.min_year:
                 fresh.append(rec)
                 total_phones += 1
                 if args.limit and total_phones >= args.limit:
                     sys.stderr.write(f"  · hit --limit {args.limit}, stopping\n")
                     break
-    else:
-        if args.full:
-            sys.stderr.write("Discovering all brands…\n")
-            all_brands = discover_brands()
-            target = [(s, l) for s, l in all_brands if s.endswith(tuple(f"-{i}" for i in range(1000)))]
-            if not target:
-                target = all_brands
-        else:
-            slugs = [b.strip() for b in args.brands.split(",") if b.strip()]
-            try:
-                label_map = dict(discover_brands())
-            except Exception:
-                label_map = {}
-            target = [(s, brand_label_from_slug(s, label_map)) for s in slugs]
-
-        sys.stderr.write(f"Scraping {len(target)} brand(s) at {args.delay}s/request…\n")
-
-        for slug, label in target:
-            sys.stderr.write(f"\n=== {label} [{slug}] ===\n")
-            for url, model in walk_brand(slug, max_pages=args.max_pages, delay=args.delay):
-                if url in seen:
-                    continue
-                seen.add(url)
-                try:
-                    html = http_get(BASE + url, delay=args.delay, use_cache=use_cache)
-                except RuntimeError as err:
-                    sys.stderr.write(f"  ! skip {url}: {err}\n")
-                    continue
-                specs = parse_phone_page(html)
-                rec = normalise(label, model, specs)
-                if rec is not None and rec["year"] >= args.min_year:
-                    fresh.append(rec)
-                    total_phones += 1
-                    if args.limit and total_phones >= args.limit:
-                        sys.stderr.write(f"  · hit --limit {args.limit}, stopping\n")
-                        break
-            if args.limit and total_phones >= args.limit:
-                break
+        if args.limit and total_phones >= args.limit:
+            break
 
     merged = merge(existing, fresh)
     merged.sort(key=lambda r: (-(r["year"] or 0), r["brand"].lower(), r["name"].lower()))
 
+    # Don't rewrite the file when no device record actually changed. The
+    # envelope (version/generator/source) shifting is not a reason to spam
+    # PRs; only real device-data changes warrant a commit.
+    if not _devices_changed(existing, merged):
+        sys.stderr.write(
+            f"\nNo device records changed ({len(fresh)} scraped, "
+            f"{len(merged)} total); leaving {args.out} untouched.\n"
+        )
+        return 0
+
     payload = {
         "version": 2,
         "generator": "build_devices_json.py",
         "source": BASE,
-        "fresh_count": len(fresh),
         "count": len(merged),
         "devices": merged,
     }