diff --git a/demos/BERT.ipynb b/demos/BERT.ipynb
index 9338fd30e..0b6efb8bf 100644
--- a/demos/BERT.ipynb
+++ b/demos/BERT.ipynb
@@ -28,16 +28,21 @@
},
{
"cell_type": "code",
- "execution_count": 39,
- "metadata": {},
+ "execution_count": 1,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:16.632770Z",
+ "iopub.status.busy": "2026-05-07T23:58:16.632549Z",
+ "iopub.status.idle": "2026-05-07T23:58:16.663150Z",
+ "shell.execute_reply": "2026-05-07T23:58:16.662858Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Running as a Jupyter notebook - intended for development only!\n",
- "The autoreload extension is already loaded. To reload it, use:\n",
- " %reload_ext autoreload\n"
+ "Running as a Jupyter notebook - intended for development only!\n"
]
}
],
@@ -77,8 +82,15 @@
},
{
"cell_type": "code",
- "execution_count": 40,
- "metadata": {},
+ "execution_count": 2,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:16.680332Z",
+ "iopub.status.busy": "2026-05-07T23:58:16.680240Z",
+ "iopub.status.idle": "2026-05-07T23:58:17.094703Z",
+ "shell.execute_reply": "2026-05-07T23:58:17.094502Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -101,27 +113,34 @@
},
{
"cell_type": "code",
- "execution_count": 41,
- "metadata": {},
+ "execution_count": 3,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:17.095708Z",
+ "iopub.status.busy": "2026-05-07T23:58:17.095633Z",
+ "iopub.status.idle": "2026-05-07T23:58:17.766649Z",
+ "shell.execute_reply": "2026-05-07T23:58:17.766420Z"
+ }
+ },
"outputs": [
{
"data": {
"text/html": [
- "
\n",
+ "\n",
" "
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 41,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -135,8 +154,15 @@
},
{
"cell_type": "code",
- "execution_count": 42,
- "metadata": {},
+ "execution_count": 4,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:17.767701Z",
+ "iopub.status.busy": "2026-05-07T23:58:17.767614Z",
+ "iopub.status.idle": "2026-05-07T23:58:20.235678Z",
+ "shell.execute_reply": "2026-05-07T23:58:20.235292Z"
+ }
+ },
"outputs": [],
"source": [
"# Import stuff\n",
@@ -149,8 +175,15 @@
},
{
"cell_type": "code",
- "execution_count": 43,
- "metadata": {},
+ "execution_count": 5,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:20.236982Z",
+ "iopub.status.busy": "2026-05-07T23:58:20.236839Z",
+ "iopub.status.idle": "2026-05-07T23:58:20.263136Z",
+ "shell.execute_reply": "2026-05-07T23:58:20.262870Z"
+ }
+ },
"outputs": [],
"source": [
"# NBVAL_IGNORE_OUTPUT\n",
@@ -169,13 +202,27 @@
},
{
"cell_type": "code",
- "execution_count": 44,
- "metadata": {},
+ "execution_count": 6,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:20.264217Z",
+ "iopub.status.busy": "2026-05-07T23:58:20.264152Z",
+ "iopub.status.idle": "2026-05-07T23:58:21.584024Z",
+ "shell.execute_reply": "2026-05-07T23:58:21.583632Z"
+ }
+ },
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n"
+ ]
+ },
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "1d4b75dcfcbf488da7196992cde5c9bb",
+ "model_id": "291186d81cdf425f85c9c6accf8f3170",
"version_major": 2,
"version_minor": 0
},
@@ -207,8 +254,15 @@
},
{
"cell_type": "code",
- "execution_count": 45,
- "metadata": {},
+ "execution_count": 7,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:21.585622Z",
+ "iopub.status.busy": "2026-05-07T23:58:21.585530Z",
+ "iopub.status.idle": "2026-05-07T23:58:21.634792Z",
+ "shell.execute_reply": "2026-05-07T23:58:21.634568Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -237,8 +291,15 @@
},
{
"cell_type": "code",
- "execution_count": 46,
- "metadata": {},
+ "execution_count": 8,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:21.635905Z",
+ "iopub.status.busy": "2026-05-07T23:58:21.635834Z",
+ "iopub.status.idle": "2026-05-07T23:58:21.686269Z",
+ "shell.execute_reply": "2026-05-07T23:58:21.686039Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -258,6 +319,124 @@
"print(f'Prediction: \"{predictions}\"')"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Inspecting BERT internals: cache and hooks\n",
+ "\n",
+ "Everything you'd do on a `HookedTransformer` (run-with-cache, run-with-hooks, the `hooks(...)` context manager) works the same way on the BERT bridge. Two differences worth pointing out as we go:\n",
+ "\n",
+ "- **Bidirectional attention.** GPT-style models have a causal mask, so attention patterns are lower-triangular. BERT attends in both directions, so the pattern is dense — every token can see every other token.\n",
+ "- **Targets live mid-sequence.** With causal LMs you usually intervene at the final position; with BERT-style MLM the position of interest is wherever you put `[MASK]`. Almost all the mech-interp recipes carry over once you remember to index there."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Cache: peek at activations and confirm bidirectionality\n",
+ "\n",
+ "`run_with_cache` exposes every hook point the bridge installs. Below we grab the residual stream after layer 0 and the attention pattern, then quantify that the pattern is genuinely two-sided (a causal model would have zero mass above the diagonal)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:21.687327Z",
+ "iopub.status.busy": "2026-05-07T23:58:21.687268Z",
+ "iopub.status.idle": "2026-05-07T23:58:21.732798Z",
+ "shell.execute_reply": "2026-05-07T23:58:21.732587Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "resid_post (layer 0): (1, 8, 768)\n",
+ "attn pattern (layer 0):(1, 12, 8, 8)\n",
+ "attention mass above diagonal: 3.04\n",
+ "attention mass below diagonal: 2.76\n"
+ ]
+ }
+ ],
+ "source": [
+ "prompt = \"The [MASK] is bright today.\"\n",
+ "tokens = bert.tokenizer(prompt, return_tensors=\"pt\")[\"input_ids\"]\n",
+ "mask_pos = (tokens[0] == bert.tokenizer.mask_token_id).nonzero()[0].item()\n",
+ "\n",
+ "_, cache = bert.run_with_cache(tokens)\n",
+ "print(f\"resid_post (layer 0): {tuple(cache['blocks.0.hook_resid_post'].shape)}\")\n",
+ "print(f\"attn pattern (layer 0):{tuple(cache['blocks.0.attn.hook_pattern'].shape)}\")\n",
+ "\n",
+ "# Confirm the attention pattern is bidirectional, not lower-triangular like a causal model.\n",
+ "pattern = cache[\"blocks.0.attn.hook_pattern\"][0, 0] # layer 0, head 0\n",
+ "upper = pattern.triu(diagonal=1).sum().item()\n",
+ "lower = pattern.tril(diagonal=-1).sum().item()\n",
+ "print(f\"attention mass above diagonal: {upper:.2f}\")\n",
+ "print(f\"attention mass below diagonal: {lower:.2f}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Hooks: ablate an attention head and watch the logit shift\n",
+ "\n",
+ "Same `bert.hooks(...)` context manager as `HookedTransformer`. We zero one head in layer 0 for the duration of the call. A single-head ablation rarely flips the top-1 prediction (the model is too redundant for that), but it does shift the logit at the target token — and that shift is the signal mech-interp work usually cares about."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:21.733792Z",
+ "iopub.status.busy": "2026-05-07T23:58:21.733733Z",
+ "iopub.status.idle": "2026-05-07T23:58:21.789116Z",
+ "shell.execute_reply": "2026-05-07T23:58:21.788914Z"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "target token: 'sun'\n",
+ "baseline logit('sun'): 14.524\n",
+ "ablated logit('sun'): 15.029\n",
+ "shift: +0.504\n"
+ ]
+ }
+ ],
+ "source": [
+ "def mask_logits(model, tokens, mask_pos):\n",
+ " return model(tokens)[0, mask_pos]\n",
+ "\n",
+ "\n",
+ "def ablate_layer_0_head_5(z, hook):\n",
+ " z[..., 5, :] = 0\n",
+ " return z\n",
+ "\n",
+ "\n",
+ "baseline = mask_logits(bert, tokens, mask_pos)\n",
+ "target_id = baseline.argmax().item()\n",
+ "target_str = bert.tokenizer.decode([target_id])\n",
+ "\n",
+ "with bert.hooks(fwd_hooks=[(\"blocks.0.attn.hook_z\", ablate_layer_0_head_5)]):\n",
+ " ablated = mask_logits(bert, tokens, mask_pos)\n",
+ "\n",
+ "# Top-1 is usually robust to a single-head ablation in an early layer — the\n",
+ "# clearer signal is the logit shift at the target token.\n",
+ "print(f\"target token: {target_str!r}\")\n",
+ "print(f\"baseline logit({target_str!r}): {baseline[target_id].item():7.3f}\")\n",
+ "print(f\"ablated logit({target_str!r}): {ablated[target_id].item():7.3f}\")\n",
+ "print(f\"shift: {(ablated[target_id] - baseline[target_id]).item():+7.3f}\")"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -270,13 +449,20 @@
},
{
"cell_type": "code",
- "execution_count": 47,
- "metadata": {},
+ "execution_count": 11,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:21.790081Z",
+ "iopub.status.busy": "2026-05-07T23:58:21.790022Z",
+ "iopub.status.idle": "2026-05-07T23:58:22.688419Z",
+ "shell.execute_reply": "2026-05-07T23:58:22.688161Z"
+ }
+ },
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "9cea0cab8d47422098f5bcec64a74126",
+ "model_id": "fde8bcf6ba14465096ce2180a2d3b4f5",
"version_major": 2,
"version_minor": 0
},
@@ -329,8 +515,15 @@
},
{
"cell_type": "code",
- "execution_count": 48,
- "metadata": {},
+ "execution_count": 12,
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2026-05-07T23:58:22.689564Z",
+ "iopub.status.busy": "2026-05-07T23:58:22.689502Z",
+ "iopub.status.idle": "2026-05-07T23:58:22.728869Z",
+ "shell.execute_reply": "2026-05-07T23:58:22.728661Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
@@ -382,7 +575,734 @@
"pygments_lexer": "ipython3",
"version": "3.12.12"
},
- "orig_nbformat": 4
+ "widgets": {
+ "application/vnd.jupyter.widget-state+json": {
+ "state": {
+ "0cb2ba7cf783495f8a767b504946ffaa": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "0e8ff295a5b2417d99bd81afcf735f79": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "15d93f98328748e8872d387981d43976": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_69abc6e9c5ac46e7977bb152a2b08545",
+ "max": 202.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_f1d52b5dcaf147348ca4f3bec321356e",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 202.0
+ }
+ },
+ "17e19169de9b4ccca06920c27e4611ec": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_54ca0ea7b1874a58a07c4477f69dd9e5",
+ "placeholder": "",
+ "style": "IPY_MODEL_429f1c9017dd4f3d9e10c5b126bf7216",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 202/202 [00:00<00:00, 5456.37it/s, Materializing param=cls.predictions.transform.dense.weight]"
+ }
+ },
+ "18b80fe7ba924f9f9c0585c3f4899ae4": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "291186d81cdf425f85c9c6accf8f3170": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_f6152742c7584aac81fdc110c9f55a8d",
+ "IPY_MODEL_15d93f98328748e8872d387981d43976",
+ "IPY_MODEL_17e19169de9b4ccca06920c27e4611ec"
+ ],
+ "layout": "IPY_MODEL_e5c30e8c00c5440b868f5c1df9c2e25e",
+ "tabbable": null,
+ "tooltip": null
+ }
+ },
+ "429f1c9017dd4f3d9e10c5b126bf7216": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "4d9a89e7bc43452190bf282deb77b1e2": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "54ca0ea7b1874a58a07c4477f69dd9e5": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "69abc6e9c5ac46e7977bb152a2b08545": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "77033e4e1062414c85e095a2f6dc0e94": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "790fc4cf502643f98677aaedc5c66e95": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "background": null,
+ "description_width": "",
+ "font_size": null,
+ "text_color": null
+ }
+ },
+ "82e11d27e7ed43bcb8c6402353094b47": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "98ff75826c2d4415bf2c9d9ac1f0bbe9": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_77033e4e1062414c85e095a2f6dc0e94",
+ "placeholder": "",
+ "style": "IPY_MODEL_18b80fe7ba924f9f9c0585c3f4899ae4",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "Loading weights: 100%"
+ }
+ },
+ "a44ef0f1d6194a149e5191d8197a1ff8": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "FloatProgressModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "FloatProgressModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "ProgressView",
+ "bar_style": "success",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_0e8ff295a5b2417d99bd81afcf735f79",
+ "max": 201.0,
+ "min": 0.0,
+ "orientation": "horizontal",
+ "style": "IPY_MODEL_b913cb229aa542ee99d1f0d111c4080a",
+ "tabbable": null,
+ "tooltip": null,
+ "value": 201.0
+ }
+ },
+ "b913cb229aa542ee99d1f0d111c4080a": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "d6621c3a124d4aef954c5e5845ca59af": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "d7c6004161924be4a237098954c2ee23": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_d6621c3a124d4aef954c5e5845ca59af",
+ "placeholder": "",
+ "style": "IPY_MODEL_790fc4cf502643f98677aaedc5c66e95",
+ "tabbable": null,
+ "tooltip": null,
+ "value": " 201/201 [00:00<00:00, 5484.00it/s, Materializing param=cls.seq_relationship.weight]"
+ }
+ },
+ "e5c30e8c00c5440b868f5c1df9c2e25e": {
+ "model_module": "@jupyter-widgets/base",
+ "model_module_version": "2.0.0",
+ "model_name": "LayoutModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/base",
+ "_model_module_version": "2.0.0",
+ "_model_name": "LayoutModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "LayoutView",
+ "align_content": null,
+ "align_items": null,
+ "align_self": null,
+ "border_bottom": null,
+ "border_left": null,
+ "border_right": null,
+ "border_top": null,
+ "bottom": null,
+ "display": null,
+ "flex": null,
+ "flex_flow": null,
+ "grid_area": null,
+ "grid_auto_columns": null,
+ "grid_auto_flow": null,
+ "grid_auto_rows": null,
+ "grid_column": null,
+ "grid_gap": null,
+ "grid_row": null,
+ "grid_template_areas": null,
+ "grid_template_columns": null,
+ "grid_template_rows": null,
+ "height": null,
+ "justify_content": null,
+ "justify_items": null,
+ "left": null,
+ "margin": null,
+ "max_height": null,
+ "max_width": null,
+ "min_height": null,
+ "min_width": null,
+ "object_fit": null,
+ "object_position": null,
+ "order": null,
+ "overflow": null,
+ "padding": null,
+ "right": null,
+ "top": null,
+ "visibility": null,
+ "width": null
+ }
+ },
+ "f1d52b5dcaf147348ca4f3bec321356e": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "ProgressStyleModel",
+ "state": {
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "ProgressStyleModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/base",
+ "_view_module_version": "2.0.0",
+ "_view_name": "StyleView",
+ "bar_color": null,
+ "description_width": ""
+ }
+ },
+ "f6152742c7584aac81fdc110c9f55a8d": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HTMLModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HTMLModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HTMLView",
+ "description": "",
+ "description_allow_html": false,
+ "layout": "IPY_MODEL_82e11d27e7ed43bcb8c6402353094b47",
+ "placeholder": "",
+ "style": "IPY_MODEL_4d9a89e7bc43452190bf282deb77b1e2",
+ "tabbable": null,
+ "tooltip": null,
+ "value": "Loading weights: 100%"
+ }
+ },
+ "fde8bcf6ba14465096ce2180a2d3b4f5": {
+ "model_module": "@jupyter-widgets/controls",
+ "model_module_version": "2.0.0",
+ "model_name": "HBoxModel",
+ "state": {
+ "_dom_classes": [],
+ "_model_module": "@jupyter-widgets/controls",
+ "_model_module_version": "2.0.0",
+ "_model_name": "HBoxModel",
+ "_view_count": null,
+ "_view_module": "@jupyter-widgets/controls",
+ "_view_module_version": "2.0.0",
+ "_view_name": "HBoxView",
+ "box_style": "",
+ "children": [
+ "IPY_MODEL_98ff75826c2d4415bf2c9d9ac1f0bbe9",
+ "IPY_MODEL_a44ef0f1d6194a149e5191d8197a1ff8",
+ "IPY_MODEL_d7c6004161924be4a237098954c2ee23"
+ ],
+ "layout": "IPY_MODEL_0cb2ba7cf783495f8a767b504946ffaa",
+ "tabbable": null,
+ "tooltip": null
+ }
+ }
+ },
+ "version_major": 2,
+ "version_minor": 0
+ }
+ }
},
"nbformat": 4,
"nbformat_minor": 2
diff --git a/tests/integration/test_hooked_encoder_properties.py b/tests/integration/test_hooked_encoder_properties.py
new file mode 100644
index 000000000..bdbda39bf
--- /dev/null
+++ b/tests/integration/test_hooked_encoder_properties.py
@@ -0,0 +1,171 @@
+"""Convenience-property tests for ``HookedEncoder``.
+
+Closes the last open ask in #277 — verify each ``W_*`` / ``b_*`` / circuit
+property has the right shape AND aliases the right underlying parameter, so
+property-level mech-interp work doesn't silently read the wrong tensor.
+
+Uses a randomly-initialized small encoder (no HF download) so the tests run
+fast and deterministically.
+"""
+
+from __future__ import annotations
+
+import pytest
+import torch
+
+from transformer_lens import FactoredMatrix, HookedEncoder, HookedTransformerConfig
+
+D_MODEL = 12
+D_HEAD = 4
+N_HEADS = D_MODEL // D_HEAD
+D_MLP = 4 * D_MODEL
+N_CTX = 5
+N_LAYERS = 3
+D_VOCAB = 22
+
+
+@pytest.fixture
+def model() -> HookedEncoder:
+ cfg = HookedTransformerConfig(
+ d_head=D_HEAD,
+ d_model=D_MODEL,
+ n_ctx=N_CTX,
+ n_layers=N_LAYERS,
+ act_fn="gelu",
+ d_vocab=D_VOCAB,
+ )
+ encoder = HookedEncoder(cfg)
+ # HookedEncoder uses torch.empty() for params and does no init pass; the
+ # uninitialized memory contains NaNs which break torch.equal comparisons.
+ torch.manual_seed(0)
+ for p in encoder.parameters():
+ torch.nn.init.normal_(p, std=0.02)
+ return encoder
+
+
+# ---------------------------------------------------------------------------
+# Embed / unembed
+# ---------------------------------------------------------------------------
+
+
+def test_W_U(model: HookedEncoder):
+ assert model.W_U.shape == (D_MODEL, D_VOCAB)
+ assert model.W_U is model.unembed.W_U
+
+
+def test_b_U(model: HookedEncoder):
+ assert model.b_U.shape == (D_VOCAB,)
+ assert model.b_U is model.unembed.b_U
+
+
+def test_W_E(model: HookedEncoder):
+ assert model.W_E.shape == (D_VOCAB, D_MODEL)
+ assert model.W_E is model.embed.embed.W_E
+
+
+def test_W_pos(model: HookedEncoder):
+ assert model.W_pos.shape == (N_CTX, D_MODEL)
+ assert model.W_pos is model.embed.pos_embed.W_pos
+
+
+@pytest.mark.xfail(
+ reason=(
+ "HookedEncoder.W_E_pos return annotation 'd_vocab+n_ctx d_model' references "
+ "unbound dimension names (no input args supply them), so the jaxtyping import-hook "
+ "can't resolve the sum at runtime. Same annotation exists on HookedTransformer.W_E_pos; "
+ "fixing it is a separate API-touch."
+ ),
+ strict=True,
+)
+def test_W_E_pos(model: HookedEncoder):
+ assert model.W_E_pos.shape == (D_VOCAB + N_CTX, D_MODEL)
+ # Concatenation, so identity doesn't apply — verify the slices match.
+ assert torch.equal(model.W_E_pos[:D_VOCAB], model.W_E)
+ assert torch.equal(model.W_E_pos[D_VOCAB:], model.W_pos)
+
+
+# ---------------------------------------------------------------------------
+# Per-layer attention weights/biases — stacked across blocks
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize("attr", ["W_Q", "W_K", "W_V"])
+def test_attn_qkv_weight(model: HookedEncoder, attr: str):
+ stacked = getattr(model, attr)
+ assert stacked.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(stacked[layer_idx], getattr(block.attn, attr))
+
+
+def test_W_O(model: HookedEncoder):
+ assert model.W_O.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.W_O[layer_idx], block.attn.W_O)
+
+
+@pytest.mark.parametrize("attr", ["b_Q", "b_K", "b_V"])
+def test_attn_qkv_bias(model: HookedEncoder, attr: str):
+ stacked = getattr(model, attr)
+ assert stacked.shape == (N_LAYERS, N_HEADS, D_HEAD)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(stacked[layer_idx], getattr(block.attn, attr))
+
+
+def test_b_O(model: HookedEncoder):
+ assert model.b_O.shape == (N_LAYERS, D_MODEL)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.b_O[layer_idx], block.attn.b_O)
+
+
+# ---------------------------------------------------------------------------
+# Per-layer MLP weights/biases — stacked across blocks
+# ---------------------------------------------------------------------------
+
+
+def test_W_in(model: HookedEncoder):
+ assert model.W_in.shape == (N_LAYERS, D_MODEL, D_MLP)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.W_in[layer_idx], block.mlp.W_in)
+
+
+def test_W_out(model: HookedEncoder):
+ assert model.W_out.shape == (N_LAYERS, D_MLP, D_MODEL)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.W_out[layer_idx], block.mlp.W_out)
+
+
+def test_b_in(model: HookedEncoder):
+ assert model.b_in.shape == (N_LAYERS, D_MLP)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.b_in[layer_idx], block.mlp.b_in)
+
+
+def test_b_out(model: HookedEncoder):
+ assert model.b_out.shape == (N_LAYERS, D_MODEL)
+ for layer_idx, block in enumerate(model.blocks):
+ assert torch.equal(model.b_out[layer_idx], block.mlp.b_out)
+
+
+# ---------------------------------------------------------------------------
+# Factored circuits
+# ---------------------------------------------------------------------------
+
+
+def test_QK_circuit(model: HookedEncoder):
+ qk = model.QK
+ assert isinstance(qk, FactoredMatrix)
+ # Left factor is W_Q [..., d_model, d_head]; right factor is W_K transposed
+ # to [..., d_head, d_model]. Their product would be [..., d_model, d_model].
+ assert qk.A.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD)
+ assert qk.B.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL)
+ assert torch.equal(qk.A, model.W_Q)
+ assert torch.equal(qk.B, model.W_K.transpose(-2, -1))
+
+
+def test_OV_circuit(model: HookedEncoder):
+ ov = model.OV
+ assert isinstance(ov, FactoredMatrix)
+ assert ov.A.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD)
+ assert ov.B.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL)
+ assert torch.equal(ov.A, model.W_V)
+ assert torch.equal(ov.B, model.W_O)
diff --git a/tests/unit/components/mlps/test_gated_mlp.py b/tests/unit/components/mlps/test_gated_mlp.py
index abb0b7b8c..a7518c85f 100644
--- a/tests/unit/components/mlps/test_gated_mlp.py
+++ b/tests/unit/components/mlps/test_gated_mlp.py
@@ -3,6 +3,7 @@
import pytest
import torch
import torch.nn as nn
+import torch.nn.functional as F
from transformer_lens.components import GatedMLP, LayerNorm
from transformer_lens.utils import solu
@@ -39,3 +40,42 @@ def test_forward(cfg: Dict[str, Any]):
x = torch.randn(2, 10, cfg["d_model"])
output = model(x)
assert output.shape == (2, 10, cfg["d_model"])
+
+
+def test_forward_matches_reference_equation():
+ """Numeric equivalence vs a hand-rolled gated-MLP reference (issue #264).
+
+ Closes the original ask in the thread: build an "equivalent gated MLP in
+ pytorch" and confirm the component matches it under ``torch.allclose``.
+ Uses ``silu`` so the LN-activation branch is not exercised — that keeps the
+ reference equation to the documented form.
+ """
+ cfg: Dict[str, Any] = {
+ "n_layers": 1,
+ "n_ctx": 16,
+ "d_head": 32,
+ "d_model": 64,
+ "d_mlp": 128,
+ "dtype": torch.float32,
+ "act_fn": "silu",
+ "normalization_type": None,
+ "load_in_4bit": False,
+ }
+ torch.manual_seed(0)
+ model = GatedMLP(cfg).eval()
+ # Randomize the params so the test isn't run against zero-bias defaults.
+ for p in model.parameters():
+ torch.nn.init.normal_(p, std=0.02)
+
+ x = torch.randn(2, 5, cfg["d_model"])
+ actual = model(x)
+
+ # Reference: mlp_out = (silu(x @ W_gate) * (x @ W_in) + b_in) @ W_out + b_out.
+ # GatedMLP uses F.linear with .T.contiguous() to match HF accumulation order;
+ # mirror that here so the two compute graphs are bitwise comparable in fp32.
+ pre_act = F.linear(x, model.W_gate.T.contiguous())
+ pre_linear = F.linear(x, model.W_in.T.contiguous())
+ post_act = F.silu(pre_act) * pre_linear + model.b_in
+ expected = F.linear(post_act, model.W_out.T.contiguous(), model.b_out)
+
+ assert torch.allclose(actual, expected, atol=1e-6)
diff --git a/transformer_lens/config/HookedTransformerConfig.py b/transformer_lens/config/HookedTransformerConfig.py
index 8f818b36b..6e4b95150 100644
--- a/transformer_lens/config/HookedTransformerConfig.py
+++ b/transformer_lens/config/HookedTransformerConfig.py
@@ -151,6 +151,12 @@ class HookedTransformerConfig(TransformerLensConfig):
use_hook_tokens (bool): Will add a hook point on the token input to
HookedTransformer.forward, which lets you cache or intervene on the tokens.
Defaults to False.
+ gated_mlp (bool): If True, the MLP layer uses a gated formulation
+ (SwiGLU/GeGLU-style): ``mlp_out = W_out @ (act_fn(W_gate @ x) * (W_in @ x))``,
+ with an extra ``W_gate`` weight matrix alongside ``W_in`` and ``W_out``. Used by
+ LLaMA, Mistral, Gemma, Qwen and similar families. When False (default), the MLP
+ is the plain ``mlp_out = W_out @ act_fn(W_in @ x)`` form. ``loading_from_pretrained``
+ sets this automatically per architecture; only set manually for a custom config.
default_prepend_bos (bool, optional): Default behavior of whether to prepend the BOS token when the
methods of HookedTransformer process input text to tokenize (only when input is a string).
Defaults to True - even for models not explicitly trained with this, heads often use the
diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py
index b5432aff1..02129b856 100644
--- a/transformer_lens/factories/architecture_adapter_factory.py
+++ b/transformer_lens/factories/architecture_adapter_factory.py
@@ -116,6 +116,7 @@
"Qwen3_5ForCausalLM": Qwen3_5ArchitectureAdapter,
"StableLmForCausalLM": StableLmArchitectureAdapter,
"T5ForConditionalGeneration": T5ArchitectureAdapter,
+ "MT5ForConditionalGeneration": T5ArchitectureAdapter,
"XGLMForCausalLM": XGLMArchitectureAdapter,
"NanoGPTForCausalLM": NanogptArchitectureAdapter,
"MinGPTForCausalLM": MingptArchitectureAdapter,
diff --git a/transformer_lens/model_bridge/generalized_components/attention.py b/transformer_lens/model_bridge/generalized_components/attention.py
index 22504b294..89d6203ab 100644
--- a/transformer_lens/model_bridge/generalized_components/attention.py
+++ b/transformer_lens/model_bridge/generalized_components/attention.py
@@ -58,6 +58,8 @@ def __init__(
requires_position_embeddings: bool = False,
requires_attention_mask: bool = False,
attention_mask_4d: bool = False,
+ requires_relative_position_bias: bool = False,
+ is_cross_attention: bool = False,
optional: bool = False,
):
"""Initialize the attention bridge.
@@ -78,6 +80,9 @@ def __init__(
(e.g., GPTNeoX/Pythia). Defaults to False.
attention_mask_4d: If True, generate 4D attention_mask [batch, 1, tgt_len, src_len]
instead of 2D [batch, seq_len]. Required for OPT. Defaults to False.
+ requires_relative_position_bias: T5/mT5-style relative attention; supplies a
+ zero ``position_bias`` so HF's forward skips its ``cache_position[-1]`` fallback.
+ is_cross_attention: Encoder-decoder cross-attention; supplies ``key_value_states``.
"""
if conversion_rule is None:
conversion_rule = AttentionAutoConversion(config)
@@ -122,6 +127,8 @@ def __init__(
self.requires_position_embeddings = requires_position_embeddings
self.requires_attention_mask = requires_attention_mask
self.attention_mask_4d = attention_mask_4d
+ self.requires_relative_position_bias = requires_relative_position_bias
+ self.is_cross_attention = is_cross_attention
self._layer_idx: Optional[int] = None
def set_original_component(self, original_component: torch.nn.Module) -> None:
@@ -212,6 +219,16 @@ def get_random_inputs(
else:
# Generate 2D attention mask [batch, seq_len] for most models
inputs["attention_mask"] = torch.ones(batch_size, seq_len, device=device)
+ if self.requires_relative_position_bias:
+ # Zero bias short-circuits HF's None-cache_position fallback in T5Attention.
+ n_heads = self.config.n_heads if self.config and hasattr(self.config, "n_heads") else 1
+ inputs["position_bias"] = torch.zeros(
+ 1, n_heads, seq_len, seq_len, device=device, dtype=dtype
+ )
+ if self.is_cross_attention:
+ inputs["key_value_states"] = torch.randn(
+ batch_size, seq_len, d_model, device=device, dtype=dtype
+ )
return inputs
def _setup_qkv_hook_reshaping(self) -> None:
diff --git a/transformer_lens/model_bridge/sources/transformers.py b/transformer_lens/model_bridge/sources/transformers.py
index 522553068..b7c4656f4 100644
--- a/transformer_lens/model_bridge/sources/transformers.py
+++ b/transformer_lens/model_bridge/sources/transformers.py
@@ -232,6 +232,7 @@ def determine_architecture_from_hf_config(hf_config):
"openelm": "OpenELMForCausalLM",
"stablelm": "StableLmForCausalLM",
"t5": "T5ForConditionalGeneration",
+ "mt5": "MT5ForConditionalGeneration",
}
if model_type in model_type_mappings:
architectures.append(model_type_mappings[model_type])
diff --git a/transformer_lens/model_bridge/supported_architectures/t5.py b/transformer_lens/model_bridge/supported_architectures/t5.py
index 6e2e51822..3fdc1179b 100644
--- a/transformer_lens/model_bridge/supported_architectures/t5.py
+++ b/transformer_lens/model_bridge/supported_architectures/t5.py
@@ -113,6 +113,7 @@ def __init__(self, cfg: Any) -> None:
"v": LinearBridge(name="v"),
"o": LinearBridge(name="o"),
},
+ requires_relative_position_bias=True,
),
"ln2": RMSNormalizationBridge(name="layer.1.layer_norm", config=self.cfg),
"mlp": encoder_mlp,
@@ -142,6 +143,7 @@ def __init__(self, cfg: Any) -> None:
"v": LinearBridge(name="v"),
"o": LinearBridge(name="o"),
},
+ requires_relative_position_bias=True,
),
"ln2": RMSNormalizationBridge(name="layer.1.layer_norm", config=self.cfg),
"cross_attn": AttentionBridge(
@@ -153,6 +155,8 @@ def __init__(self, cfg: Any) -> None:
"v": LinearBridge(name="v"),
"o": LinearBridge(name="o"),
},
+ requires_relative_position_bias=True,
+ is_cross_attention=True,
),
"ln3": RMSNormalizationBridge(name="layer.2.layer_norm", config=self.cfg),
"mlp": decoder_mlp,
diff --git a/transformer_lens/tools/model_registry/__init__.py b/transformer_lens/tools/model_registry/__init__.py
index 63d61d54c..dd79d58c2 100644
--- a/transformer_lens/tools/model_registry/__init__.py
+++ b/transformer_lens/tools/model_registry/__init__.py
@@ -89,6 +89,7 @@
"Qwen3_5ForCausalLM",
"StableLmForCausalLM",
"T5ForConditionalGeneration",
+ "MT5ForConditionalGeneration",
"XGLMForCausalLM",
}
diff --git a/transformer_lens/tools/model_registry/data/architecture_gaps.json b/transformer_lens/tools/model_registry/data/architecture_gaps.json
index b664ac22a..19723ef94 100644
--- a/transformer_lens/tools/model_registry/data/architecture_gaps.json
+++ b/transformer_lens/tools/model_registry/data/architecture_gaps.json
@@ -1,14 +1,36 @@
{
- "generated_at": "2026-04-16",
+ "generated_at": "2026-05-08T15:02:52.940447Z",
"scan_info": {
- "total_scanned": 4839,
- "task_filter": "text-generation",
+ "task_filter": [
+ "text-generation",
+ "text2text-generation"
+ ],
+ "total_scanned": 5270,
"min_downloads": 500,
- "scan_duration_seconds": 4.9
+ "merged_from_runs": 2
},
- "total_unsupported_architectures": 413,
- "total_unsupported_models": 1364,
+ "total_unsupported_architectures": 439,
+ "total_unsupported_models": 1748,
"gaps": [
+ {
+ "architecture_id": "MarianMTModel",
+ "total_models": 169,
+ "total_downloads": 8456318,
+ "min_param_count": 14843019,
+ "sample_models": [
+ "Helsinki-NLP/opus-mt-nl-en",
+ "Helsinki-NLP/opus-mt-en-de",
+ "Helsinki-NLP/opus-mt-fr-en",
+ "Helsinki-NLP/opus-mt-tr-en",
+ "Helsinki-NLP/opus-mt-de-en",
+ "Helsinki-NLP/opus-mt-ko-en",
+ "Helsinki-NLP/opus-mt-en-fr",
+ "Helsinki-NLP/opus-mt-ru-en",
+ "Helsinki-NLP/opus-mt-en-ru",
+ "Helsinki-NLP/opus-mt-en-es"
+ ],
+ "relevancy_score": 100.0
+ },
{
"architecture_id": "Qwen3_5ForConditionalGeneration",
"total_models": 75,
@@ -85,6 +107,25 @@
],
"relevancy_score": 74.5
},
+ {
+ "architecture_id": "BartForConditionalGeneration",
+ "total_models": 83,
+ "total_downloads": 7451844,
+ "min_param_count": 28176,
+ "sample_models": [
+ "KomeijiForce/bart-large-emojilm",
+ "antalvdb/bart-base-spelling-nl",
+ "lmqg/bart-large-squad-qg",
+ "kengurukleo/deutsch_a2_transformer",
+ "shibing624/bart4csc-base-chinese",
+ "SkitCon/gec-spanish-BARTO-SYNTHETIC",
+ "cive202/humanize-ai-text-bart-base",
+ "Tianlin668/MentalBART",
+ "Nargizi/screeve-lemmatizer",
+ "KomeijiForce/bart-large-emojilm-e2t"
+ ],
+ "relevancy_score": 73.7
+ },
{
"architecture_id": "DeepseekV32ForCausalLM",
"total_models": 12,
@@ -142,6 +183,25 @@
],
"relevancy_score": 60.7
},
+ {
+ "architecture_id": "M2M100ForConditionalGeneration",
+ "total_models": 21,
+ "total_downloads": 4231967,
+ "min_param_count": 332735488,
+ "sample_models": [
+ "dsfsi/nso-en-m2m100-gov",
+ "facebook/m2m100_1.2B",
+ "facebook/nllb-200-distilled-600M",
+ "facebook/m2m100_418M",
+ "facebook/nllb-200-3.3B",
+ "facebook/nllb-200-distilled-1.3B",
+ "facebook/nllb-200-1.3B",
+ "Babelscape/mrebel-base",
+ "Xenova/nllb-200-distilled-600M",
+ "alirezamsh/small100"
+ ],
+ "relevancy_score": 58.8
+ },
{
"architecture_id": "Glm4MoeForCausalLM",
"total_models": 15,
@@ -163,8 +223,8 @@
},
{
"architecture_id": "T5GemmaForConditionalGeneration",
- "total_models": 13,
- "total_downloads": 1127923,
+ "total_models": 22,
+ "total_downloads": 2307747,
"min_param_count": 312517632,
"sample_models": [
"google/t5gemma-s-s-prefixlm",
@@ -200,23 +260,23 @@
"relevancy_score": 57.3
},
{
- "architecture_id": "BartForConditionalGeneration",
- "total_models": 11,
- "total_downloads": 695299,
- "min_param_count": 6044480,
+ "architecture_id": "MBartForConditionalGeneration",
+ "total_models": 35,
+ "total_downloads": 689744,
+ "min_param_count": 131603038,
"sample_models": [
- "KomeijiForce/bart-large-emojilm",
- "antalvdb/bart-base-spelling-nl",
- "lmqg/bart-large-squad-qg",
- "kengurukleo/deutsch_a2_transformer",
- "shibing624/bart4csc-base-chinese",
- "SkitCon/gec-spanish-BARTO-SYNTHETIC",
- "cive202/humanize-ai-text-bart-base",
- "Tianlin668/MentalBART",
- "Nargizi/screeve-lemmatizer",
- "KomeijiForce/bart-large-emojilm-e2t"
+ "Pravopysnyk/best-unlp",
+ "DeepPavlov/mbart-large-50-ru-persona-chat",
+ "sn4kebyt3/ru-bart-large",
+ "MRNH/mbart-italian-grammar-corrector",
+ "MRNH/mbart-german-grammar-corrector",
+ "MRNH/mbart-russian-grammar-corrector",
+ "ai4bharat/IndicBART",
+ "facebook/mbart-large-50-one-to-many-mmt",
+ "moussaKam/mbarthez",
+ "facebook/mbart-large-50-many-to-many-mmt"
],
- "relevancy_score": 55.8
+ "relevancy_score": 57.2
},
{
"architecture_id": "BaichuanForCausalLM",
@@ -406,6 +466,23 @@
],
"relevancy_score": 50.6
},
+ {
+ "architecture_id": "PegasusForConditionalGeneration",
+ "total_models": 8,
+ "total_downloads": 417097,
+ "min_param_count": 568796007,
+ "sample_models": [
+ "google/pegasus-xsum",
+ "human-centered-summarization/financial-summarization-pegasus",
+ "tuner007/pegasus_paraphrase",
+ "google/pegasus-cnn_dailymail",
+ "google/pegasus-large",
+ "nsi319/legal-pegasus",
+ "zaemyung/DElIteraTeR-PEGASUS-Multi-Sent-Revision-Generator",
+ "google/pegasus-pubmed"
+ ],
+ "relevancy_score": 50.5
+ },
{
"architecture_id": "MT5ForConditionalGeneration",
"total_models": 13,
@@ -561,8 +638,8 @@
},
{
"architecture_id": "BloomModel",
- "total_models": 8,
- "total_downloads": 40679,
+ "total_models": 9,
+ "total_downloads": 41582,
"min_param_count": 16156544,
"sample_models": [
"bigscience/bigscience-small-testing",
@@ -594,6 +671,20 @@
],
"relevancy_score": 47.7
},
+ {
+ "architecture_id": "FSMTForConditionalGeneration",
+ "total_models": 5,
+ "total_downloads": 123986,
+ "min_param_count": 271847424,
+ "sample_models": [
+ "stas/tiny-wmt19-en-de",
+ "facebook/wmt19-ru-en",
+ "facebook/wmt19-en-de",
+ "facebook/wmt19-de-en",
+ "facebook/wmt19-en-ru"
+ ],
+ "relevancy_score": 47.1
+ },
{
"architecture_id": "NemotronForCausalLM",
"total_models": 5,
@@ -608,6 +699,19 @@
],
"relevancy_score": 47.0
},
+ {
+ "architecture_id": "BlenderbotForConditionalGeneration",
+ "total_models": 5,
+ "total_downloads": 137109,
+ "min_param_count": 364810568,
+ "sample_models": [
+ "thu-coai/blenderbot-400M-esconv",
+ "facebook/blenderbot-3B",
+ "facebook/blenderbot-400M-distill",
+ "nilotpaldhar2004/blenderbot-chatbot"
+ ],
+ "relevancy_score": 47.0
+ },
{
"architecture_id": "HyenaDNAForCausalLM",
"total_models": 6,
@@ -686,6 +790,24 @@
],
"relevancy_score": 46.0
},
+ {
+ "architecture_id": "LEDForConditionalGeneration",
+ "total_models": 9,
+ "total_downloads": 44482,
+ "min_param_count": 161894745,
+ "sample_models": [
+ "allenai/led-base-16384",
+ "allenai/led-large-16384",
+ "pszemraj/led-large-book-summary",
+ "pszemraj/led-base-book-summary",
+ "allenai/led-large-16384-arxiv",
+ "nsi319/legal-led-base-16384",
+ "allenai/PRIMERA",
+ "patrickvonplaten/led-large-16384-pubmed",
+ "soumitsr/led-base-article-digestor"
+ ],
+ "relevancy_score": 45.9
+ },
{
"architecture_id": "SDARForCausalLM",
"total_models": 8,
@@ -750,6 +872,16 @@
],
"relevancy_score": 45.4
},
+ {
+ "architecture_id": "ProphetNetForConditionalGeneration",
+ "total_models": 1,
+ "total_downloads": 93577,
+ "min_param_count": 391321600,
+ "sample_models": [
+ "microsoft/prophetnet-large-uncased"
+ ],
+ "relevancy_score": 45.4
+ },
{
"architecture_id": "ArceeForCausalLM",
"total_models": 4,
@@ -779,6 +911,33 @@
],
"relevancy_score": 45.1
},
+ {
+ "architecture_id": "IndicTransForConditionalGeneration",
+ "total_models": 6,
+ "total_downloads": 44512,
+ "min_param_count": 228316160,
+ "sample_models": [
+ "ai4bharat/indictrans2-en-indic-dist-200M",
+ "ai4bharat/indictrans2-indic-en-1B",
+ "ai4bharat/indictrans2-en-indic-1B",
+ "ai4bharat/indictrans2-indic-en-dist-200M",
+ "ai4bharat/indictrans2-indic-indic-dist-320M",
+ "ai4bharat/indictrans2-indic-indic-1B"
+ ],
+ "relevancy_score": 45.1
+ },
+ {
+ "architecture_id": "T5Gemma2ForConditionalGeneration",
+ "total_models": 3,
+ "total_downloads": 65800,
+ "min_param_count": 786029296,
+ "sample_models": [
+ "google/t5gemma-2-1b-1b",
+ "google/t5gemma-2-270m-270m",
+ "google/t5gemma-2-4b-4b"
+ ],
+ "relevancy_score": 45.1
+ },
{
"architecture_id": "LlavaQwen2ForCausalLM",
"total_models": 5,
@@ -807,6 +966,21 @@
],
"relevancy_score": 44.8
},
+ {
+ "architecture_id": "LongT5ForConditionalGeneration",
+ "total_models": 6,
+ "total_downloads": 35411,
+ "min_param_count": 222903552,
+ "sample_models": [
+ "google/long-t5-tglobal-base",
+ "google/long-t5-tglobal-xl",
+ "google/long-t5-local-base",
+ "Stancld/longt5-tglobal-large-16384-pubmed-3k_steps",
+ "google/long-t5-tglobal-large",
+ "agemagician/mlong-t5-tglobal-base"
+ ],
+ "relevancy_score": 44.6
+ },
{
"architecture_id": "SeedOssForCausalLM",
"total_models": 4,
@@ -922,21 +1096,6 @@
],
"relevancy_score": 43.1
},
- {
- "architecture_id": "MBartForConditionalGeneration",
- "total_models": 6,
- "total_downloads": 7712,
- "min_param_count": 379691717,
- "sample_models": [
- "Pravopysnyk/best-unlp",
- "DeepPavlov/mbart-large-50-ru-persona-chat",
- "sn4kebyt3/ru-bart-large",
- "MRNH/mbart-italian-grammar-corrector",
- "MRNH/mbart-german-grammar-corrector",
- "MRNH/mbart-russian-grammar-corrector"
- ],
- "relevancy_score": 43.0
- },
{
"architecture_id": "DeciLMForCausalLM",
"total_models": 13,
@@ -956,6 +1115,23 @@
],
"relevancy_score": 42.9
},
+ {
+ "architecture_id": "EncoderDecoderModel",
+ "total_models": 8,
+ "total_downloads": 12696,
+ "min_param_count": 221337,
+ "sample_models": [
+ "optimum-internal-testing/tiny-random-encoder-decoder-gpt2-bert",
+ "mrm8488/bert2bert_shared-spanish-finetuned-summarization",
+ "google/bert2bert_L-24_wmt_de_en",
+ "patrickvonplaten/bert2bert-cnn_dailymail-fp16",
+ "mohitsha/tiny-random-testing-bert2gpt2",
+ "cahya/bert2bert-indonesian-summarization",
+ "patrickvonplaten/bert2bert_cnn_daily_mail",
+ "google/roberta2roberta_L-24_bbc"
+ ],
+ "relevancy_score": 42.9
+ },
{
"architecture_id": "DogeForCausalLM",
"total_models": 6,
@@ -1086,6 +1262,16 @@
],
"relevancy_score": 41.6
},
+ {
+ "architecture_id": "GlmAsrForConditionalGeneration",
+ "total_models": 1,
+ "total_downloads": 101459,
+ "min_param_count": 2257843200,
+ "sample_models": [
+ "zai-org/GLM-ASR-Nano-2512"
+ ],
+ "relevancy_score": 41.6
+ },
{
"architecture_id": "OLMoForCausalLM",
"total_models": 7,
@@ -1356,6 +1542,16 @@
],
"relevancy_score": 38.8
},
+ {
+ "architecture_id": "T5Model",
+ "total_models": 1,
+ "total_downloads": 4694,
+ "min_param_count": 222903552,
+ "sample_models": [
+ "sonoisa/t5-base-japanese"
+ ],
+ "relevancy_score": 38.8
+ },
{
"architecture_id": "Lfm2MoeForCausalLM",
"total_models": 7,
@@ -1372,6 +1568,18 @@
],
"relevancy_score": 38.6
},
+ {
+ "architecture_id": "Florence2ForConditionalGeneration",
+ "total_models": 3,
+ "total_downloads": 3407,
+ "min_param_count": 3549945,
+ "sample_models": [
+ "onnx-community/Florence-2-base-ft",
+ "Xenova/tiny-random-Florence2ForConditionalGeneration",
+ "onnx-community/Florence-2-large-ft"
+ ],
+ "relevancy_score": 38.6
+ },
{
"architecture_id": "GatedDeltaNetForCausalLM",
"total_models": 1,
@@ -1466,8 +1674,8 @@
},
{
"architecture_id": "T5EncoderModel",
- "total_models": 1,
- "total_downloads": 117289,
+ "total_models": 2,
+ "total_downloads": 200188,
"min_param_count": 4762310656,
"sample_models": [
"XLabs-AI/xflux_text_encoders"
@@ -1511,15 +1719,15 @@
"relevancy_score": 37.6
},
{
- "architecture_id": "Moondream",
+ "architecture_id": "PegasusXForConditionalGeneration",
"total_models": 2,
- "total_downloads": 11437,
- "min_param_count": 1857482608,
+ "total_downloads": 2431,
+ "min_param_count": 568667136,
"sample_models": [
- "vikhyatk/moondream1",
- "zesquirrelnator/moondream2-finetuneV2"
+ "google/pegasus-x-base",
+ "pszemraj/pegasus-x-large-book-summary"
],
- "relevancy_score": 37.5
+ "relevancy_score": 37.6
},
{
"architecture_id": "ForCausalLM",
@@ -1533,32 +1741,47 @@
"relevancy_score": 37.5
},
{
- "architecture_id": "Autoencoder",
- "total_models": 1,
- "total_downloads": 2349,
- "min_param_count": 75832064,
+ "architecture_id": "Moondream",
+ "total_models": 2,
+ "total_downloads": 11437,
+ "min_param_count": 1857482608,
"sample_models": [
- "cccczshao/CALM-Autoencoder"
+ "vikhyatk/moondream1",
+ "zesquirrelnator/moondream2-finetuneV2"
+ ],
+ "relevancy_score": 37.5
+ },
+ {
+ "architecture_id": "SwitchTransformersForConditionalGeneration",
+ "total_models": 3,
+ "total_downloads": 11807,
+ "min_param_count": 1978514688,
+ "sample_models": [
+ "google/switch-base-8",
+ "google/switch-base-16",
+ "google/switch-base-32"
],
"relevancy_score": 37.4
},
{
- "architecture_id": "BlenderbotForConditionalGeneration",
+ "architecture_id": "Autoencoder",
"total_models": 1,
- "total_downloads": 2337,
- "min_param_count": 364810568,
+ "total_downloads": 2349,
+ "min_param_count": 75832064,
"sample_models": [
- "thu-coai/blenderbot-400M-esconv"
+ "cccczshao/CALM-Autoencoder"
],
"relevancy_score": 37.4
},
{
- "architecture_id": "TransformerForCausalLM",
- "total_models": 1,
- "total_downloads": 13828,
- "min_param_count": 1364297728,
+ "architecture_id": "Qwen2AudioForConditionalGeneration",
+ "total_models": 3,
+ "total_downloads": 429859,
+ "min_param_count": 8388083712,
"sample_models": [
- "fla-hub/transformer-1.3B-100B"
+ "Qwen/Qwen2-Audio-7B-Instruct",
+ "Qwen/Qwen2-Audio-7B",
+ "skoneru/qwen2_st_ft_v2"
],
"relevancy_score": 37.3
},
@@ -1593,14 +1816,14 @@
"relevancy_score": 37.3
},
{
- "architecture_id": "Plamo3ForCausalLM",
+ "architecture_id": "TransformerForCausalLM",
"total_models": 1,
- "total_downloads": 13053,
- "min_param_count": 2603344384,
+ "total_downloads": 13828,
+ "min_param_count": 1364297728,
"sample_models": [
- "pfnet/plamo-3-nict-2b-base"
+ "fla-hub/transformer-1.3B-100B"
],
- "relevancy_score": 37.2
+ "relevancy_score": 37.3
},
{
"architecture_id": "D3LMForMaskedLM",
@@ -1613,14 +1836,14 @@
"relevancy_score": 37.2
},
{
- "architecture_id": "MoEGPTForCausalLM",
+ "architecture_id": "Plamo3ForCausalLM",
"total_models": 1,
- "total_downloads": 2001,
- "min_param_count": 149603328,
+ "total_downloads": 13053,
+ "min_param_count": 2603344384,
"sample_models": [
- "arnomatic/german-moe-gpt-v8-pretrained"
+ "pfnet/plamo-3-nict-2b-base"
],
- "relevancy_score": 37.1
+ "relevancy_score": 37.2
},
{
"architecture_id": "LanceAI",
@@ -1632,6 +1855,16 @@
],
"relevancy_score": 37.1
},
+ {
+ "architecture_id": "MoEGPTForCausalLM",
+ "total_models": 1,
+ "total_downloads": 2001,
+ "min_param_count": 149603328,
+ "sample_models": [
+ "arnomatic/german-moe-gpt-v8-pretrained"
+ ],
+ "relevancy_score": 37.1
+ },
{
"architecture_id": "HGRNBitForCausalLM",
"total_models": 2,
@@ -1799,22 +2032,22 @@
"relevancy_score": 35.4
},
{
- "architecture_id": "SoraForSLM",
+ "architecture_id": "CircuitGPTForCausalLM",
"total_models": 1,
- "total_downloads": 915,
- "min_param_count": 450707456,
+ "total_downloads": 908,
+ "min_param_count": 419124736,
"sample_models": [
- "Conlanger-LLM-CLEM/Sorie"
+ "openai/circuit-sparsity"
],
"relevancy_score": 35.4
},
{
- "architecture_id": "CircuitGPTForCausalLM",
+ "architecture_id": "SoraForSLM",
"total_models": 1,
- "total_downloads": 908,
- "min_param_count": 419124736,
+ "total_downloads": 915,
+ "min_param_count": 450707456,
"sample_models": [
- "openai/circuit-sparsity"
+ "Conlanger-LLM-CLEM/Sorie"
],
"relevancy_score": 35.4
},
@@ -1829,6 +2062,17 @@
],
"relevancy_score": 35.3
},
+ {
+ "architecture_id": "AudioFlamingo3ForConditionalGeneration",
+ "total_models": 2,
+ "total_downloads": 197941,
+ "min_param_count": 8267215360,
+ "sample_models": [
+ "nvidia/audio-flamingo-3-hf",
+ "nvidia/music-flamingo-hf"
+ ],
+ "relevancy_score": 35.3
+ },
{
"architecture_id": "DotLMForCausalLM",
"total_models": 1,
@@ -1965,6 +2209,17 @@
],
"relevancy_score": 34.3
},
+ {
+ "architecture_id": "MusicFlamingoForConditionalGeneration",
+ "total_models": 2,
+ "total_downloads": 120851,
+ "min_param_count": 8267215360,
+ "sample_models": [
+ "nvidia/music-flamingo-2601-hf",
+ "nvidia/music-flamingo-think-2601-hf"
+ ],
+ "relevancy_score": 34.2
+ },
{
"architecture_id": "GPT2CompetitiveMoE",
"total_models": 1,
@@ -2251,22 +2506,22 @@
"relevancy_score": 31.0
},
{
- "architecture_id": "VeridianForCausalLM",
+ "architecture_id": "AeroForConditionalGeneration",
"total_models": 1,
- "total_downloads": 738,
- "min_param_count": 1659913728,
+ "total_downloads": 717,
+ "min_param_count": 2416221184,
"sample_models": [
- "MagistrTheOne/veridian-beta"
+ "lmms-lab/Aero-1-Audio"
],
"relevancy_score": 30.9
},
{
- "architecture_id": "AeroForConditionalGeneration",
+ "architecture_id": "VeridianForCausalLM",
"total_models": 1,
- "total_downloads": 717,
- "min_param_count": 2416221184,
+ "total_downloads": 738,
+ "min_param_count": 1659913728,
"sample_models": [
- "lmms-lab/Aero-1-Audio"
+ "MagistrTheOne/veridian-beta"
],
"relevancy_score": 30.9
},
@@ -2282,22 +2537,22 @@
"relevancy_score": 30.6
},
{
- "architecture_id": "Lfm2Prototype1ForCausalLM",
+ "architecture_id": "HymbaForCausalLM",
"total_models": 1,
- "total_downloads": 633,
- "min_param_count": 1212304128,
+ "total_downloads": 629,
+ "min_param_count": 1522797824,
"sample_models": [
- "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP"
+ "nvidia/Hymba-1.5B-Instruct"
],
"relevancy_score": 30.6
},
{
- "architecture_id": "HymbaForCausalLM",
+ "architecture_id": "Lfm2Prototype1ForCausalLM",
"total_models": 1,
- "total_downloads": 629,
- "min_param_count": 1522797824,
+ "total_downloads": 633,
+ "min_param_count": 1212304128,
"sample_models": [
- "nvidia/Hymba-1.5B-Instruct"
+ "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP"
],
"relevancy_score": 30.6
},
@@ -2336,26 +2591,24 @@
"relevancy_score": 30.4
},
{
- "architecture_id": "TinyChartPhiForCausalLM",
+ "architecture_id": "GPTSanJapaneseForConditionalGeneration",
"total_models": 1,
- "total_downloads": 3544,
- "min_param_count": 3189407648,
+ "total_downloads": 599,
+ "min_param_count": 2779000992,
"sample_models": [
- "mPLUG/TinyChart-3B-768"
+ "Tanrei/GPTSAN-japanese"
],
"relevancy_score": 30.3
},
{
- "architecture_id": "PersimmonForCausalLM",
- "total_models": 3,
- "total_downloads": 12223,
- "min_param_count": 8823735296,
+ "architecture_id": "TinyChartPhiForCausalLM",
+ "total_models": 1,
+ "total_downloads": 3544,
+ "min_param_count": 3189407648,
"sample_models": [
- "adept/persimmon-8b-chat",
- "adept/persimmon-8b-base",
- "pszemraj/perSLIMmon-8b-base"
+ "mPLUG/TinyChart-3B-768"
],
- "relevancy_score": 30.2
+ "relevancy_score": 30.3
},
{
"architecture_id": "FlexOlmoForCausalLM",
@@ -2369,6 +2622,18 @@
],
"relevancy_score": 30.2
},
+ {
+ "architecture_id": "PersimmonForCausalLM",
+ "total_models": 3,
+ "total_downloads": 12223,
+ "min_param_count": 8823735296,
+ "sample_models": [
+ "adept/persimmon-8b-chat",
+ "adept/persimmon-8b-base",
+ "pszemraj/perSLIMmon-8b-base"
+ ],
+ "relevancy_score": 30.2
+ },
{
"architecture_id": "Phi3SmallForCausalLM",
"total_models": 2,
@@ -2604,22 +2869,22 @@
"relevancy_score": 27.0
},
{
- "architecture_id": "JetMoEForCausalLM",
+ "architecture_id": "InternVLChatModel",
"total_models": 1,
- "total_downloads": 4808,
- "min_param_count": 8522237952,
+ "total_downloads": 758,
+ "min_param_count": 3712637952,
"sample_models": [
- "jetmoe/jetmoe-8b"
+ "numind/NuExtract-2-4B-experimental"
],
"relevancy_score": 27.0
},
{
- "architecture_id": "InternVLChatModel",
+ "architecture_id": "JetMoEForCausalLM",
"total_models": 1,
- "total_downloads": 758,
- "min_param_count": 3712637952,
+ "total_downloads": 4808,
+ "min_param_count": 8522237952,
"sample_models": [
- "numind/NuExtract-2-4B-experimental"
+ "jetmoe/jetmoe-8b"
],
"relevancy_score": 27.0
},
@@ -2634,6 +2899,16 @@
],
"relevancy_score": 26.8
},
+ {
+ "architecture_id": "T5GemmaVoiceForConditionalGeneration",
+ "total_models": 1,
+ "total_downloads": 736,
+ "min_param_count": 5314418949,
+ "sample_models": [
+ "Aratako/T5Gemma-TTS-2b-2b"
+ ],
+ "relevancy_score": 26.8
+ },
{
"architecture_id": "InternLMForCausalLM",
"total_models": 4,
@@ -2647,6 +2922,19 @@
],
"relevancy_score": 26.7
},
+ {
+ "architecture_id": "UMT5ForConditionalGeneration",
+ "total_models": 4,
+ "total_downloads": 115924,
+ "min_param_count": null,
+ "sample_models": [
+ "google/umt5-xxl",
+ "google/umt5-base",
+ "google/umt5-small",
+ "google/umt5-xl"
+ ],
+ "relevancy_score": 26.7
+ },
{
"architecture_id": "SarvamMoEForCausalLM",
"total_models": 2,
@@ -2779,6 +3067,17 @@
],
"relevancy_score": 26.0
},
+ {
+ "architecture_id": "PLBartForConditionalGeneration",
+ "total_models": 2,
+ "total_downloads": 102095,
+ "min_param_count": null,
+ "sample_models": [
+ "uclanlp/plbart-base",
+ "uclanlp/plbart-java-cs"
+ ],
+ "relevancy_score": 25.8
+ },
{
"architecture_id": "Qwen2VLAudioForConditionalGeneration",
"total_models": 1,
@@ -2789,6 +3088,17 @@
],
"relevancy_score": 25.8
},
+ {
+ "architecture_id": "BlenderbotSmallForConditionalGeneration",
+ "total_models": 2,
+ "total_downloads": 94677,
+ "min_param_count": null,
+ "sample_models": [
+ "facebook/blenderbot_small-90M",
+ "facebook/blenderbot-90M"
+ ],
+ "relevancy_score": 25.7
+ },
{
"architecture_id": "Esm2LlamaInstructForCausalLM",
"total_models": 1,
@@ -2888,22 +3198,22 @@
"relevancy_score": 24.4
},
{
- "architecture_id": "HCXVisionForCausalLM",
+ "architecture_id": "Param2MoEForCausalLM",
"total_models": 1,
- "total_downloads": 59628,
- "min_param_count": null,
+ "total_downloads": 9131,
+ "min_param_count": 17151140480,
"sample_models": [
- "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
+ "bharatgenai/Param2-17B-A2.4B-Thinking"
],
"relevancy_score": 24.4
},
{
- "architecture_id": "Param2MoEForCausalLM",
+ "architecture_id": "HCXVisionForCausalLM",
"total_models": 1,
- "total_downloads": 9131,
- "min_param_count": 17151140480,
+ "total_downloads": 59628,
+ "min_param_count": null,
"sample_models": [
- "bharatgenai/Param2-17B-A2.4B-Thinking"
+ "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
],
"relevancy_score": 24.4
},
@@ -2952,22 +3262,22 @@
"relevancy_score": 24.0
},
{
- "architecture_id": "BunnyLlamaForCausalLM",
+ "architecture_id": "MiniCPMSALAForCausalLM",
"total_models": 1,
- "total_downloads": 1147,
- "min_param_count": 8479990848,
+ "total_downloads": 1142,
+ "min_param_count": 9477203968,
"sample_models": [
- "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview"
+ "openbmb/MiniCPM-SALA"
],
"relevancy_score": 23.9
},
{
- "architecture_id": "MiniCPMSALAForCausalLM",
+ "architecture_id": "BunnyLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 1142,
- "min_param_count": 9477203968,
+ "total_downloads": 1147,
+ "min_param_count": 8479990848,
"sample_models": [
- "openbmb/MiniCPM-SALA"
+ "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview"
],
"relevancy_score": 23.9
},
@@ -3023,22 +3333,22 @@
"relevancy_score": 23.0
},
{
- "architecture_id": "Qwen2Model",
+ "architecture_id": "Qwen2VLForConditionalGeneration",
"total_models": 1,
- "total_downloads": 717,
- "min_param_count": 7070619136,
+ "total_downloads": 714,
+ "min_param_count": 8291375616,
"sample_models": [
- "NewBeeKing/MemPO_Qwen2.5-SFT-RL"
+ "typhoon-ai/typhoon2-qwen2vl-7b-vision-instruct"
],
"relevancy_score": 22.9
},
{
- "architecture_id": "Qwen2VLForConditionalGeneration",
+ "architecture_id": "Qwen2Model",
"total_models": 1,
- "total_downloads": 714,
- "min_param_count": 8291375616,
+ "total_downloads": 717,
+ "min_param_count": 7070619136,
"sample_models": [
- "typhoon-ai/typhoon2-qwen2vl-7b-vision-instruct"
+ "NewBeeKing/MemPO_Qwen2.5-SFT-RL"
],
"relevancy_score": 22.9
},
@@ -3064,6 +3374,16 @@
],
"relevancy_score": 22.8
},
+ {
+ "architecture_id": "ICONNForCausalLM",
+ "total_models": 1,
+ "total_downloads": 669,
+ "min_param_count": 7833409536,
+ "sample_models": [
+ "ICONNAI/ICONN-1-Mini-Beta"
+ ],
+ "relevancy_score": 22.7
+ },
{
"architecture_id": "Gemma4ForCausalLM",
"total_models": 1,
@@ -3084,16 +3404,6 @@
],
"relevancy_score": 22.7
},
- {
- "architecture_id": "ICONNForCausalLM",
- "total_models": 1,
- "total_downloads": 669,
- "min_param_count": 7833409536,
- "sample_models": [
- "ICONNAI/ICONN-1-Mini-Beta"
- ],
- "relevancy_score": 22.7
- },
{
"architecture_id": "BailingMoeV2_5ForCausalLM",
"total_models": 1,
@@ -3147,42 +3457,42 @@
"relevancy_score": 22.2
},
{
- "architecture_id": "SolarForCausalLM",
+ "architecture_id": "RecaLLMLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 20538,
- "min_param_count": null,
+ "total_downloads": 514,
+ "min_param_count": 8030294016,
"sample_models": [
- "upstage/solar-pro-preview-instruct"
+ "kswhitecross/RecaLLM-Llama-3.1-8B"
],
"relevancy_score": 22.1
},
{
- "architecture_id": "GptOssPuzzleForCausalLM",
+ "architecture_id": "RecaLLMQwen2ForCausalLM",
"total_models": 1,
- "total_downloads": 20294,
- "min_param_count": 90837823680,
+ "total_downloads": 503,
+ "min_param_count": 7612785152,
"sample_models": [
- "nvidia/gpt-oss-puzzle-88B"
+ "kswhitecross/RecaLLM-Qwen2.5-7B"
],
"relevancy_score": 22.1
},
{
- "architecture_id": "RecaLLMLlamaForCausalLM",
+ "architecture_id": "GptOssPuzzleForCausalLM",
"total_models": 1,
- "total_downloads": 514,
- "min_param_count": 8030294016,
+ "total_downloads": 20294,
+ "min_param_count": 90837823680,
"sample_models": [
- "kswhitecross/RecaLLM-Llama-3.1-8B"
+ "nvidia/gpt-oss-puzzle-88B"
],
"relevancy_score": 22.1
},
{
- "architecture_id": "RecaLLMQwen2ForCausalLM",
+ "architecture_id": "SolarForCausalLM",
"total_models": 1,
- "total_downloads": 503,
- "min_param_count": 7612785152,
+ "total_downloads": 20538,
+ "min_param_count": null,
"sample_models": [
- "kswhitecross/RecaLLM-Qwen2.5-7B"
+ "upstage/solar-pro-preview-instruct"
],
"relevancy_score": 22.1
},
@@ -3254,16 +3564,6 @@
],
"relevancy_score": 21.3
},
- {
- "architecture_id": "MiniCPM3ForCausalLM",
- "total_models": 1,
- "total_downloads": 13980,
- "min_param_count": null,
- "sample_models": [
- "openbmb/MiniCPM3-4B"
- ],
- "relevancy_score": 21.3
- },
{
"architecture_id": "ArcticForCausalLM",
"total_models": 1,
@@ -3284,6 +3584,16 @@
],
"relevancy_score": 21.3
},
+ {
+ "architecture_id": "MiniCPM3ForCausalLM",
+ "total_models": 1,
+ "total_downloads": 13980,
+ "min_param_count": null,
+ "sample_models": [
+ "openbmb/MiniCPM3-4B"
+ ],
+ "relevancy_score": 21.3
+ },
{
"architecture_id": "Dots1ForCausalLM",
"total_models": 2,
@@ -3435,24 +3745,24 @@
"relevancy_score": 20.0
},
{
- "architecture_id": "ModernBertDecoderForCausalLM",
+ "architecture_id": "GPT2Model",
"total_models": 2,
- "total_downloads": 5499,
+ "total_downloads": 5444,
"min_param_count": null,
"sample_models": [
- "jhu-clsp/ettin-decoder-400m",
- "jhu-clsp/ettin-decoder-32m"
+ "keshan/sinhala-gpt2",
+ "cerebras/Cerebras-GPT-13B"
],
"relevancy_score": 19.9
},
{
- "architecture_id": "GPT2Model",
+ "architecture_id": "ModernBertDecoderForCausalLM",
"total_models": 2,
- "total_downloads": 5444,
+ "total_downloads": 5499,
"min_param_count": null,
"sample_models": [
- "keshan/sinhala-gpt2",
- "cerebras/Cerebras-GPT-13B"
+ "jhu-clsp/ettin-decoder-400m",
+ "jhu-clsp/ettin-decoder-32m"
],
"relevancy_score": 19.9
},
@@ -3546,24 +3856,24 @@
"relevancy_score": 19.5
},
{
- "architecture_id": "RobertaForCausalLM",
+ "architecture_id": "MossForCausalLM",
"total_models": 2,
- "total_downloads": 4508,
+ "total_downloads": 4419,
"min_param_count": null,
"sample_models": [
- "uf-aice-lab/math-roberta",
- "gokceuludogan/ChemBERTaLM"
+ "OpenMOSS-Team/moss-moon-003-sft",
+ "OpenMOSS-Team/moss-moon-003-base"
],
"relevancy_score": 19.4
},
{
- "architecture_id": "MossForCausalLM",
+ "architecture_id": "RobertaForCausalLM",
"total_models": 2,
- "total_downloads": 4419,
+ "total_downloads": 4508,
"min_param_count": null,
"sample_models": [
- "OpenMOSS-Team/moss-moon-003-sft",
- "OpenMOSS-Team/moss-moon-003-base"
+ "uf-aice-lab/math-roberta",
+ "gokceuludogan/ChemBERTaLM"
],
"relevancy_score": 19.4
},
@@ -3589,24 +3899,24 @@
"relevancy_score": 19.4
},
{
- "architecture_id": "BartForCausalLM",
+ "architecture_id": "TranceptionLMHeadModel",
"total_models": 2,
- "total_downloads": 4230,
+ "total_downloads": 4204,
"min_param_count": null,
"sample_models": [
- "sanchit-gandhi/tiny-random-bart-fp16",
- "hf-tiny-model-private/tiny-random-BartForCausalLM"
+ "PascalNotin/Tranception_Large",
+ "PascalNotin/Tranception_Small"
],
"relevancy_score": 19.3
},
{
- "architecture_id": "TranceptionLMHeadModel",
+ "architecture_id": "BartForCausalLM",
"total_models": 2,
- "total_downloads": 4204,
+ "total_downloads": 4230,
"min_param_count": null,
"sample_models": [
- "PascalNotin/Tranception_Large",
- "PascalNotin/Tranception_Small"
+ "sanchit-gandhi/tiny-random-bart-fp16",
+ "hf-tiny-model-private/tiny-random-BartForCausalLM"
],
"relevancy_score": 19.3
},
@@ -3682,22 +3992,22 @@
"relevancy_score": 18.6
},
{
- "architecture_id": "LongcatCausalLM",
+ "architecture_id": "TransfoXLLMHeadModel",
"total_models": 1,
- "total_downloads": 3733,
- "min_param_count": 561862880256,
+ "total_downloads": 3677,
+ "min_param_count": null,
"sample_models": [
- "meituan-longcat/LongCat-Flash-Thinking-2601"
+ "transfo-xl/transfo-xl-wt103"
],
"relevancy_score": 18.4
},
{
- "architecture_id": "TransfoXLLMHeadModel",
+ "architecture_id": "LongcatCausalLM",
"total_models": 1,
- "total_downloads": 3677,
- "min_param_count": null,
+ "total_downloads": 3733,
+ "min_param_count": 561862880256,
"sample_models": [
- "transfo-xl/transfo-xl-wt103"
+ "meituan-longcat/LongCat-Flash-Thinking-2601"
],
"relevancy_score": 18.4
},
@@ -3724,7 +4034,18 @@
"relevancy_score": 18.2
},
{
- "architecture_id": "MyAwesomeModelForCausalLM",
+ "architecture_id": "BigBirdPegasusForConditionalGeneration",
+ "total_models": 2,
+ "total_downloads": 2950,
+ "min_param_count": null,
+ "sample_models": [
+ "google/bigbird-pegasus-large-arxiv",
+ "google/bigbird-pegasus-large-pubmed"
+ ],
+ "relevancy_score": 18.1
+ },
+ {
+ "architecture_id": "MyAwesomeModelForCausalLM",
"total_models": 1,
"total_downloads": 3144,
"min_param_count": null,
@@ -3754,12 +4075,12 @@
"relevancy_score": 17.9
},
{
- "architecture_id": "QHEARTForECGQA",
+ "architecture_id": "TAMELM",
"total_models": 1,
- "total_downloads": 2916,
+ "total_downloads": 2877,
"min_param_count": null,
"sample_models": [
- "Manhph2211/Q-HEART"
+ "reaperdoesntknow/TameForCasualLM"
],
"relevancy_score": 17.9
},
@@ -3774,12 +4095,12 @@
"relevancy_score": 17.9
},
{
- "architecture_id": "TAMELM",
+ "architecture_id": "QHEARTForECGQA",
"total_models": 1,
- "total_downloads": 2877,
+ "total_downloads": 2916,
"min_param_count": null,
"sample_models": [
- "reaperdoesntknow/TameForCasualLM"
+ "Manhph2211/Q-HEART"
],
"relevancy_score": 17.9
},
@@ -3795,22 +4116,22 @@
"relevancy_score": 17.8
},
{
- "architecture_id": "CoherenceMomentumModel",
+ "architecture_id": "CPMAntForCausalLM",
"total_models": 1,
- "total_downloads": 2795,
+ "total_downloads": 2778,
"min_param_count": null,
"sample_models": [
- "aisingapore/coherence-momentum"
+ "openbmb/cpm-ant-10b"
],
"relevancy_score": 17.8
},
{
- "architecture_id": "CPMAntForCausalLM",
+ "architecture_id": "ThinkerLM",
"total_models": 1,
- "total_downloads": 2778,
+ "total_downloads": 2726,
"min_param_count": null,
"sample_models": [
- "openbmb/cpm-ant-10b"
+ "prskid1000/micro-Omni"
],
"relevancy_score": 17.8
},
@@ -3825,12 +4146,12 @@
"relevancy_score": 17.8
},
{
- "architecture_id": "ThinkerLM",
+ "architecture_id": "CoherenceMomentumModel",
"total_models": 1,
- "total_downloads": 2726,
+ "total_downloads": 2795,
"min_param_count": null,
"sample_models": [
- "prskid1000/micro-Omni"
+ "aisingapore/coherence-momentum"
],
"relevancy_score": 17.8
},
@@ -3845,42 +4166,32 @@
"relevancy_score": 17.7
},
{
- "architecture_id": "MoEGPT2",
- "total_models": 1,
- "total_downloads": 2577,
- "min_param_count": null,
- "sample_models": [
- "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained"
- ],
- "relevancy_score": 17.6
- },
- {
- "architecture_id": "GPT2",
+ "architecture_id": "TeleFLMForCausalLM",
"total_models": 1,
- "total_downloads": 2566,
+ "total_downloads": 2492,
"min_param_count": null,
"sample_models": [
- "NamrataThakur/Small_Language_Model_MHA_53M_Pretrained"
+ "CofeAI/Tele-FLM-1T"
],
"relevancy_score": 17.6
},
{
- "architecture_id": "GQAGPT2",
+ "architecture_id": "JiRackTernary1B",
"total_models": 1,
- "total_downloads": 2551,
+ "total_downloads": 2525,
"min_param_count": null,
"sample_models": [
- "NamrataThakur/Small_Language_Model_GQA_48M_Pretrained"
+ "kgrabko/JiRackTernary_1b"
],
"relevancy_score": 17.6
},
{
- "architecture_id": "JiRackTernary1B",
+ "architecture_id": "GPT2",
"total_models": 1,
- "total_downloads": 2525,
+ "total_downloads": 2566,
"min_param_count": null,
"sample_models": [
- "kgrabko/JiRackTernary_1b"
+ "NamrataThakur/Small_Language_Model_MHA_53M_Pretrained"
],
"relevancy_score": 17.6
},
@@ -3895,24 +4206,24 @@
"relevancy_score": 17.6
},
{
- "architecture_id": "TeleFLMForCausalLM",
+ "architecture_id": "MoEGPT2",
"total_models": 1,
- "total_downloads": 2492,
+ "total_downloads": 2577,
"min_param_count": null,
"sample_models": [
- "CofeAI/Tele-FLM-1T"
+ "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained"
],
"relevancy_score": 17.6
},
{
- "architecture_id": "SeerAttnLlamaForCausalLM",
+ "architecture_id": "GQAGPT2",
"total_models": 1,
- "total_downloads": 2413,
+ "total_downloads": 2551,
"min_param_count": null,
"sample_models": [
- "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates"
+ "NamrataThakur/Small_Language_Model_GQA_48M_Pretrained"
],
- "relevancy_score": 17.5
+ "relevancy_score": 17.6
},
{
"architecture_id": "Speech2TextTransformerForConditionalGeneration",
@@ -3925,14 +4236,14 @@
"relevancy_score": 17.5
},
{
- "architecture_id": "WhisperMixStyleForConditionalGeneration",
+ "architecture_id": "SeerAttnLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 2299,
+ "total_downloads": 2413,
"min_param_count": null,
"sample_models": [
- "wago5090/mixstyle_multi-s"
+ "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates"
],
- "relevancy_score": 17.4
+ "relevancy_score": 17.5
},
{
"architecture_id": "Videollama2Qwen2ForCausalLM",
@@ -3944,6 +4255,16 @@
],
"relevancy_score": 17.4
},
+ {
+ "architecture_id": "WhisperMixStyleForConditionalGeneration",
+ "total_models": 1,
+ "total_downloads": 2299,
+ "min_param_count": null,
+ "sample_models": [
+ "wago5090/mixstyle_multi-s"
+ ],
+ "relevancy_score": 17.4
+ },
{
"architecture_id": "DenseLLM",
"total_models": 1,
@@ -3985,32 +4306,32 @@
"relevancy_score": 17.3
},
{
- "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration",
+ "architecture_id": "TFGPT2LMHeadModel",
"total_models": 1,
- "total_downloads": 2205,
+ "total_downloads": 2178,
"min_param_count": null,
"sample_models": [
- "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct"
+ "mymusise/gpt2-medium-chinese"
],
"relevancy_score": 17.3
},
{
- "architecture_id": "TFGPT2LMHeadModel",
+ "architecture_id": "GPTModelForTextGeneration",
"total_models": 1,
- "total_downloads": 2178,
+ "total_downloads": 2169,
"min_param_count": null,
"sample_models": [
- "mymusise/gpt2-medium-chinese"
+ "samkeet/GPT_124M-Instruct"
],
"relevancy_score": 17.3
},
{
- "architecture_id": "GPTModelForTextGeneration",
+ "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration",
"total_models": 1,
- "total_downloads": 2169,
+ "total_downloads": 2205,
"min_param_count": null,
"sample_models": [
- "samkeet/GPT_124M-Instruct"
+ "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct"
],
"relevancy_score": 17.3
},
@@ -4026,114 +4347,114 @@
"relevancy_score": 17.2
},
{
- "architecture_id": "LlaMAForCausalLM",
+ "architecture_id": "RobertaPreLayerNormForCausalLM",
"total_models": 1,
- "total_downloads": 2140,
+ "total_downloads": 2067,
"min_param_count": null,
"sample_models": [
- "circulus/alpaca-7b"
+ "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "GeoVForCausalLM",
+ "architecture_id": "EnergyTransformer",
"total_models": 1,
- "total_downloads": 2137,
+ "total_downloads": 2087,
"min_param_count": null,
"sample_models": [
- "GeoV/GeoV-9b"
+ "cccczshao/CALM-M"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "ElectraForCausalLM",
+ "architecture_id": "BlenderbotForCausalLM",
"total_models": 1,
- "total_downloads": 2128,
+ "total_downloads": 2066,
"min_param_count": null,
"sample_models": [
- "smeoni/nbme-electra-large-generator"
+ "hf-tiny-model-private/tiny-random-BlenderbotForCausalLM"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "XModelForCausalLM",
+ "architecture_id": "PegasusForCausalLM",
"total_models": 1,
- "total_downloads": 2098,
+ "total_downloads": 2077,
"min_param_count": null,
"sample_models": [
- "XiaoduoAILab/Xmodel_LM"
+ "hf-tiny-model-private/tiny-random-PegasusForCausalLM"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "EnergyTransformer",
+ "architecture_id": "GeoVForCausalLM",
"total_models": 1,
- "total_downloads": 2087,
+ "total_downloads": 2137,
"min_param_count": null,
"sample_models": [
- "cccczshao/CALM-M"
+ "GeoV/GeoV-9b"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "PegasusForCausalLM",
+ "architecture_id": "XModelForCausalLM",
"total_models": 1,
- "total_downloads": 2077,
+ "total_downloads": 2098,
"min_param_count": null,
"sample_models": [
- "hf-tiny-model-private/tiny-random-PegasusForCausalLM"
+ "XiaoduoAILab/Xmodel_LM"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "RobertaPreLayerNormForCausalLM",
+ "architecture_id": "ElectraForCausalLM",
"total_models": 1,
- "total_downloads": 2067,
+ "total_downloads": 2128,
"min_param_count": null,
"sample_models": [
- "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM"
+ "smeoni/nbme-electra-large-generator"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "BlenderbotForCausalLM",
+ "architecture_id": "LlaMAForCausalLM",
"total_models": 1,
- "total_downloads": 2066,
+ "total_downloads": 2140,
"min_param_count": null,
"sample_models": [
- "hf-tiny-model-private/tiny-random-BlenderbotForCausalLM"
+ "circulus/alpaca-7b"
],
"relevancy_score": 17.2
},
{
- "architecture_id": "MonkeyLMHeadModel",
+ "architecture_id": "PointLLMLlamaForCausalLM",
"total_models": 2,
- "total_downloads": 1519,
+ "total_downloads": 1534,
"min_param_count": null,
"sample_models": [
- "echo840/Monkey-Chat",
- "echo840/Monkey"
+ "RunsenXu/PointLLM_7B_v1.1_init",
+ "RunsenXu/PointLLM_7B_v1.2"
],
"relevancy_score": 17.1
},
{
- "architecture_id": "PointLLMLlamaForCausalLM",
+ "architecture_id": "MonkeyLMHeadModel",
"total_models": 2,
- "total_downloads": 1534,
+ "total_downloads": 1519,
"min_param_count": null,
"sample_models": [
- "RunsenXu/PointLLM_7B_v1.1_init",
- "RunsenXu/PointLLM_7B_v1.2"
+ "echo840/Monkey-Chat",
+ "echo840/Monkey"
],
"relevancy_score": 17.1
},
{
- "architecture_id": "MvpForCausalLM",
+ "architecture_id": "DebertaV2ForCausalLM",
"total_models": 1,
- "total_downloads": 2039,
+ "total_downloads": 1979,
"min_param_count": null,
"sample_models": [
- "hf-tiny-model-private/tiny-random-MvpForCausalLM"
+ "ltg/deberta-xxlarge-fixed"
],
"relevancy_score": 17.1
},
@@ -4158,41 +4479,41 @@
"relevancy_score": 17.1
},
{
- "architecture_id": "DebertaV2ForCausalLM",
+ "architecture_id": "MvpForCausalLM",
"total_models": 1,
- "total_downloads": 1979,
+ "total_downloads": 2039,
"min_param_count": null,
"sample_models": [
- "ltg/deberta-xxlarge-fixed"
+ "hf-tiny-model-private/tiny-random-MvpForCausalLM"
],
"relevancy_score": 17.1
},
{
- "architecture_id": "TelechatForCausalLM",
+ "architecture_id": "OtterForConditionalGeneration",
"total_models": 2,
- "total_downloads": 1456,
+ "total_downloads": 1489,
"min_param_count": null,
"sample_models": [
- "Tele-AI/telechat-7B",
- "Tele-AI/TeleChat-12B"
+ "luodian/OTTER-Video-LLaMA7B-DenseCaption",
+ "luodian/OTTER-MPT1B-RPJama-Init"
],
"relevancy_score": 17.0
},
{
- "architecture_id": "OtterForConditionalGeneration",
+ "architecture_id": "TelechatForCausalLM",
"total_models": 2,
- "total_downloads": 1489,
+ "total_downloads": 1456,
"min_param_count": null,
"sample_models": [
- "luodian/OTTER-Video-LLaMA7B-DenseCaption",
- "luodian/OTTER-MPT1B-RPJama-Init"
+ "Tele-AI/telechat-7B",
+ "Tele-AI/TeleChat-12B"
],
"relevancy_score": 17.0
},
{
"architecture_id": "LSGBartForConditionalGeneration",
- "total_models": 1,
- "total_downloads": 1887,
+ "total_models": 2,
+ "total_downloads": 2605,
"min_param_count": null,
"sample_models": [
"morenolq/LEGIT-BART-LSG-4096"
@@ -4221,14 +4542,14 @@
"relevancy_score": 16.8
},
{
- "architecture_id": "LlavaCrystalForCausalLM",
+ "architecture_id": "NorT5ForConditionalGeneration",
"total_models": 1,
- "total_downloads": 1620,
+ "total_downloads": 1754,
"min_param_count": null,
"sample_models": [
- "LLM360/CrystalChat-7B-Web2Code"
+ "ltg/nort5-base-en-no-translation"
],
- "relevancy_score": 16.6
+ "relevancy_score": 16.7
},
{
"architecture_id": "InternLM2ForRewardModel",
@@ -4241,14 +4562,14 @@
"relevancy_score": 16.6
},
{
- "architecture_id": "MobilintEagle3Qwen2ForCausalLM",
+ "architecture_id": "LlavaCrystalForCausalLM",
"total_models": 1,
- "total_downloads": 1550,
+ "total_downloads": 1620,
"min_param_count": null,
"sample_models": [
- "mobilint/EAGLE3-JPharmatron-7B"
+ "LLM360/CrystalChat-7B-Web2Code"
],
- "relevancy_score": 16.5
+ "relevancy_score": 16.6
},
{
"architecture_id": "MobileLLMForCausalLM",
@@ -4260,6 +4581,16 @@
],
"relevancy_score": 16.5
},
+ {
+ "architecture_id": "MobilintEagle3Qwen2ForCausalLM",
+ "total_models": 1,
+ "total_downloads": 1550,
+ "min_param_count": null,
+ "sample_models": [
+ "mobilint/EAGLE3-JPharmatron-7B"
+ ],
+ "relevancy_score": 16.5
+ },
{
"architecture_id": "GeoChatLlamaForCausalLM",
"total_models": 1,
@@ -4301,22 +4632,22 @@
"relevancy_score": 16.1
},
{
- "architecture_id": "CambrianLlamaForCausalLM",
+ "architecture_id": "JiRackTernaryModel",
"total_models": 1,
- "total_downloads": 1209,
+ "total_downloads": 1195,
"min_param_count": null,
"sample_models": [
- "nyu-visionx/cambrian-8b"
+ "kgrabko/JiRackTernary_70b"
],
"relevancy_score": 16.0
},
{
- "architecture_id": "JiRackTernaryModel",
+ "architecture_id": "CambrianLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 1195,
+ "total_downloads": 1209,
"min_param_count": null,
"sample_models": [
- "kgrabko/JiRackTernary_70b"
+ "nyu-visionx/cambrian-8b"
],
"relevancy_score": 16.0
},
@@ -4361,32 +4692,32 @@
"relevancy_score": 15.5
},
{
- "architecture_id": "TransnormerForCausalLM",
+ "architecture_id": "SOVYN85M",
"total_models": 1,
"total_downloads": 957,
"min_param_count": null,
"sample_models": [
- "OpenNLPLab/TransNormerLLM-385M"
+ "SOVYN/SOVYN-85M"
],
"relevancy_score": 15.5
},
{
- "architecture_id": "SOVYN85M",
+ "architecture_id": "TransnormerForCausalLM",
"total_models": 1,
"total_downloads": 957,
"min_param_count": null,
"sample_models": [
- "SOVYN/SOVYN-85M"
+ "OpenNLPLab/TransNormerLLM-385M"
],
"relevancy_score": 15.5
},
{
- "architecture_id": "ShikraLlamaForCausalLM",
+ "architecture_id": "ZsGPT2LMHeadModel",
"total_models": 1,
- "total_downloads": 931,
+ "total_downloads": 913,
"min_param_count": null,
"sample_models": [
- "shikras/shikra-7b-delta-v1"
+ "claritylab/zero-shot-vanilla-gpt2"
],
"relevancy_score": 15.4
},
@@ -4400,16 +4731,6 @@
],
"relevancy_score": 15.4
},
- {
- "architecture_id": "ZsGPT2LMHeadModel",
- "total_models": 1,
- "total_downloads": 913,
- "min_param_count": null,
- "sample_models": [
- "claritylab/zero-shot-vanilla-gpt2"
- ],
- "relevancy_score": 15.4
- },
{
"architecture_id": "LlamaModel",
"total_models": 1,
@@ -4421,14 +4742,14 @@
"relevancy_score": 15.4
},
{
- "architecture_id": "AquilaDenseForCausalLM",
+ "architecture_id": "ShikraLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 855,
+ "total_downloads": 931,
"min_param_count": null,
"sample_models": [
- "BAAI/AquilaDense-7B"
+ "shikras/shikra-7b-delta-v1"
],
- "relevancy_score": 15.2
+ "relevancy_score": 15.4
},
{
"architecture_id": "HumanGPTForCausalLM",
@@ -4441,22 +4762,22 @@
"relevancy_score": 15.2
},
{
- "architecture_id": "Phi4FlashForCausalLM",
+ "architecture_id": "AquilaDenseForCausalLM",
"total_models": 1,
- "total_downloads": 841,
+ "total_downloads": 855,
"min_param_count": null,
"sample_models": [
- "microsoft/Phi-4-mini-flash-reasoning"
+ "BAAI/AquilaDense-7B"
],
"relevancy_score": 15.2
},
{
- "architecture_id": "EmuForCausalLM",
+ "architecture_id": "Phi4FlashForCausalLM",
"total_models": 1,
- "total_downloads": 822,
+ "total_downloads": 841,
"min_param_count": null,
"sample_models": [
- "BAAI/Emu2-Chat"
+ "microsoft/Phi-4-mini-flash-reasoning"
],
"relevancy_score": 15.2
},
@@ -4471,14 +4792,24 @@
"relevancy_score": 15.2
},
{
- "architecture_id": "FlamingoForCausalLM",
+ "architecture_id": "CodeT5pEncoderDecoderModel",
"total_models": 1,
- "total_downloads": 796,
+ "total_downloads": 891,
"min_param_count": null,
"sample_models": [
- "babylm/flamingo-2024"
+ "Salesforce/codet5p-2b"
],
- "relevancy_score": 15.1
+ "relevancy_score": 15.2
+ },
+ {
+ "architecture_id": "EmuForCausalLM",
+ "total_models": 1,
+ "total_downloads": 822,
+ "min_param_count": null,
+ "sample_models": [
+ "BAAI/Emu2-Chat"
+ ],
+ "relevancy_score": 15.2
},
{
"architecture_id": "VStreamLlamaForCausalLM",
@@ -4500,6 +4831,16 @@
],
"relevancy_score": 15.1
},
+ {
+ "architecture_id": "FlamingoForCausalLM",
+ "total_models": 1,
+ "total_downloads": 796,
+ "min_param_count": null,
+ "sample_models": [
+ "babylm/flamingo-2024"
+ ],
+ "relevancy_score": 15.1
+ },
{
"architecture_id": "MoELLaVAQWenForCausalLM",
"total_models": 1,
@@ -4511,22 +4852,22 @@
"relevancy_score": 15.0
},
{
- "architecture_id": "YayiForCausalLM",
+ "architecture_id": "GPT",
"total_models": 1,
- "total_downloads": 721,
+ "total_downloads": 720,
"min_param_count": null,
"sample_models": [
- "wenge-research/yayi2-30b"
+ "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M"
],
"relevancy_score": 14.9
},
{
- "architecture_id": "GPT",
+ "architecture_id": "YayiForCausalLM",
"total_models": 1,
- "total_downloads": 720,
+ "total_downloads": 721,
"min_param_count": null,
"sample_models": [
- "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M"
+ "wenge-research/yayi2-30b"
],
"relevancy_score": 14.9
},
@@ -4551,12 +4892,12 @@
"relevancy_score": 14.8
},
{
- "architecture_id": "SDARMoeForCausalLM",
+ "architecture_id": "GPTBigCodeLMHeadModel",
"total_models": 1,
- "total_downloads": 675,
- "min_param_count": 30532122624,
+ "total_downloads": 664,
+ "min_param_count": null,
"sample_models": [
- "JetLM/SDAR-30B-A3B-Chat-b32"
+ "bigcode/santacoderpack"
],
"relevancy_score": 14.7
},
@@ -4581,22 +4922,22 @@
"relevancy_score": 14.7
},
{
- "architecture_id": "GPTBigCodeLMHeadModel",
+ "architecture_id": "MobiLlamaForCausalLM",
"total_models": 1,
- "total_downloads": 664,
+ "total_downloads": 663,
"min_param_count": null,
"sample_models": [
- "bigcode/santacoderpack"
+ "MBZUAI/MobiLlama-05B"
],
"relevancy_score": 14.7
},
{
- "architecture_id": "MobiLlamaForCausalLM",
+ "architecture_id": "SDARMoeForCausalLM",
"total_models": 1,
- "total_downloads": 663,
- "min_param_count": null,
+ "total_downloads": 675,
+ "min_param_count": 30532122624,
"sample_models": [
- "MBZUAI/MobiLlama-05B"
+ "JetLM/SDAR-30B-A3B-Chat-b32"
],
"relevancy_score": 14.7
},
@@ -4631,22 +4972,22 @@
"relevancy_score": 14.6
},
{
- "architecture_id": "CacaForCausalLM",
+ "architecture_id": "Llama2ForCausalLM",
"total_models": 1,
- "total_downloads": 607,
+ "total_downloads": 593,
"min_param_count": null,
"sample_models": [
- "Lyon28/caca-1B-untrained"
+ "llmware/dragon-llama-7b-v0"
],
"relevancy_score": 14.5
},
{
- "architecture_id": "LingoWhaleForCausalLM",
+ "architecture_id": "MPLUGOwl2LlamaForCausalLM",
"total_models": 1,
- "total_downloads": 599,
+ "total_downloads": 597,
"min_param_count": null,
"sample_models": [
- "deeplang-ai/LingoWhale-8B"
+ "q-future/q-align-quality"
],
"relevancy_score": 14.5
},
@@ -4661,42 +5002,42 @@
"relevancy_score": 14.5
},
{
- "architecture_id": "MPLUGOwl2LlamaForCausalLM",
+ "architecture_id": "CacaForCausalLM",
"total_models": 1,
- "total_downloads": 597,
+ "total_downloads": 607,
"min_param_count": null,
"sample_models": [
- "q-future/q-align-quality"
+ "Lyon28/caca-1B-untrained"
],
"relevancy_score": 14.5
},
{
- "architecture_id": "Llama2ForCausalLM",
+ "architecture_id": "LingoWhaleForCausalLM",
"total_models": 1,
- "total_downloads": 593,
+ "total_downloads": 599,
"min_param_count": null,
"sample_models": [
- "llmware/dragon-llama-7b-v0"
+ "deeplang-ai/LingoWhale-8B"
],
"relevancy_score": 14.5
},
{
- "architecture_id": "GLaMMForCausalLM",
+ "architecture_id": "OpenBAForConditionalGeneration",
"total_models": 1,
- "total_downloads": 588,
+ "total_downloads": 581,
"min_param_count": null,
"sample_models": [
- "MBZUAI/GLaMM-FullScope"
+ "OpenNLG/OpenBA-V1-Based"
],
"relevancy_score": 14.4
},
{
- "architecture_id": "OpenBAForConditionalGeneration",
+ "architecture_id": "GLaMMForCausalLM",
"total_models": 1,
- "total_downloads": 581,
+ "total_downloads": 588,
"min_param_count": null,
"sample_models": [
- "OpenNLG/OpenBA-V1-Based"
+ "MBZUAI/GLaMM-FullScope"
],
"relevancy_score": 14.4
},
@@ -4731,42 +5072,42 @@
"relevancy_score": 14.3
},
{
- "architecture_id": "M2M100ForConditionalGeneration",
+ "architecture_id": "HgrnForCausalLM",
"total_models": 1,
- "total_downloads": 545,
+ "total_downloads": 534,
"min_param_count": null,
"sample_models": [
- "dsfsi/nso-en-m2m100-gov"
+ "OpenNLPLab/HGRN-150M"
],
- "relevancy_score": 14.3
+ "relevancy_score": 14.2
},
{
- "architecture_id": "AprielHForCausalLM",
+ "architecture_id": "LlavaMistralForCausalLM",
"total_models": 1,
- "total_downloads": 539,
+ "total_downloads": 530,
"min_param_count": null,
"sample_models": [
- "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT"
+ "NousResearch/Nous-Hermes-2-Vision-Alpha"
],
"relevancy_score": 14.2
},
{
- "architecture_id": "HgrnForCausalLM",
+ "architecture_id": "VSMForCausalLM",
"total_models": 1,
- "total_downloads": 534,
+ "total_downloads": 522,
"min_param_count": null,
"sample_models": [
- "OpenNLPLab/HGRN-150M"
+ "craigwu/seal_vsm_7b"
],
"relevancy_score": 14.2
},
{
- "architecture_id": "LlavaMistralForCausalLM",
+ "architecture_id": "AprielHForCausalLM",
"total_models": 1,
- "total_downloads": 530,
+ "total_downloads": 539,
"min_param_count": null,
"sample_models": [
- "NousResearch/Nous-Hermes-2-Vision-Alpha"
+ "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT"
],
"relevancy_score": 14.2
},
@@ -4781,14 +5122,14 @@
"relevancy_score": 14.2
},
{
- "architecture_id": "VSMForCausalLM",
+ "architecture_id": "XLMProphetNetForConditionalGeneration",
"total_models": 1,
- "total_downloads": 522,
+ "total_downloads": 531,
"min_param_count": null,
"sample_models": [
- "craigwu/seal_vsm_7b"
+ "microsoft/xprophetnet-large-wiki100-cased"
],
- "relevancy_score": 14.2
+ "relevancy_score": 14.0
}
]
}
\ No newline at end of file
diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json
index c0fb70907..46454e788 100644
--- a/transformer_lens/tools/model_registry/data/supported_models.json
+++ b/transformer_lens/tools/model_registry/data/supported_models.json
@@ -1,14 +1,14 @@
{
- "generated_at": "2026-04-16",
+ "generated_at": "2026-05-08",
"scan_info": {
- "total_scanned": 4839,
- "task_filter": "text-generation",
+ "total_scanned": 431,
+ "task_filter": "text2text-generation",
"min_downloads": 500,
- "scan_duration_seconds": 4.9
+ "scan_duration_seconds": 0.4
},
- "total_architectures": 50,
- "total_models": 9068,
- "total_verified": 711,
+ "total_architectures": 51,
+ "total_models": 9352,
+ "total_verified": 716,
"models": [
{
"architecture_id": "Qwen3NextForCausalLM",
@@ -125399,6 +125399,3982 @@
"phase4_score": 92.0,
"phase7_score": null,
"phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google-t5/t5-small",
+ "status": 1,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Full verification completed",
+ "phase1_score": 100.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 97.6,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google-t5/t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "E-MIMIC/inclusively-reformulation-it5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "prithivida/parrot_paraphraser_on_T5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "allenai/unifiedqa-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "IlyaGusev/rut5_base_headline_gen_telegram",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-t5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/madlad400-3b-mt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google-t5/t5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-v1_1-xxl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/byt5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "vennify/t5-base-grammar-correction",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Rostlab/prot_t5_xl_uniref50",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "iarfmoose/t5-base-question-generator",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "patrickvonplaten/t5-tiny-random",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/byt5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "fabiochiu/t5-base-tag-generation",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "prithivida/grammar_error_correcter_v1",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-t5-xl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "autogluon/chronos-t5-tiny",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ElnaggarLab/ankh-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "jbochi/madlad400-3b-mt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "trl-internal-testing/tiny-T5ForConditionalGeneration",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-v1_1-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py311-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "optimum/t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Rostlab/ProstT5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "autogluon/chronos-t5-mini",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "charsiu/g2p_multilingual_byT5_small_100",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-efficient-tiny",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "QizhiPei/biot5-plus-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Babelscape/t5-base-summarization-claim-extractor",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/byt5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "autogluon/chronos-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "peft-internal-testing/tiny-random-T5ForConditionalGeneration-calibrated",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "peft-internal-testing/tiny-T5ForConditionalGeneration",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "bigscience/T0pp",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py310-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "utrobinmv/t5_translate_en_ru_zh_small_1024",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-v1_1-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "autogluon/chronos-t5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-v1_1-xl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "peft-internal-testing/tiny-random-t5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-small-e2e-qg",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "gokaygokay/Lamini-Prompt-Enchance",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lytang/MiniCheck-Flan-T5-Large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py312-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ybelkada/flan-t5-xl-sharded-bf16",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py313-pylingual-v1.1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "RussianNLP/FRED-T5-Summarizer",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cnicu/t5-small-booksum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "gokaygokay/Lamini-Prompt-Enchance-Long",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-v1_1-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-t5-xxl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/flan-ul2",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "sonoisa/t5-base-japanese-v1.1",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Unbabel/gec-t5_small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "GT4SD/multitask-text-and-chemistry-t5-base-augm",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Rostlab/ProstT5_fp16",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Falconsai/text_summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "charsiu/g2p_multilingual_byT5_tiny_16_layers_100",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "roborovski/superprompt-v1",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5p-220m",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "UrukHan/t5-russian-spell",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cahya/t5-base-indonesian-summarization-cased",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lizhuang144/flan-t5-base-VG-factual-sg",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "d0rj/rut5-base-summ",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "castorini/monot5-base-msmarco",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-small-qg-hl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "deep-learning-analytics/automatic-title-generation",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-sarcasm-twitter",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "trohith89/KDTS_T5_Summary_FineTune",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py39-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-small-finetuned-common_gen",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "sagawa/ReactionT5v2-forward",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-base-absum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "FinText/Chronos_Tiny_2023_US",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Langboat/mengzi-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "laituan245/molt5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-emotion",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "VietAI/vit5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/ul2",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "yatharth97/T5-base-10K-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ramsrigouthamg/t5_paraphraser",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "chronbmm/sanskrit5-multitask",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/flan-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "MingZhong/unieval-fact",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-base-e2e-qg",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "kaiyuy/leandojo-lean4-retriever-tacgen-byt5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "autogluon/chronos-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "echarlaix/t5-small-openvino",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "deepvk/kazRush-ru-kk",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "peft-internal-testing/tiny-random-T5ForConditionalGeneration",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-base-qg-hl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "grammarly/coedit-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "deepvk/kazRush-kk-ru",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Exscientia/IgT5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/madlad400-7b-mt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "MingZhong/unieval-sum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "RUNorm/RUNorm-kirillizator",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-efficient-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "pyterrier-quality/qt5-tiny",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "kaiyuy/leandojo-lean4-tacgen-byt5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-question-generation-ap",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py38-pylingual-v1.3-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "royweiss1/T5_MiddleSentences",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mesolitica/translation-t5-small-standard-bahasa-cased-v2",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "retrieva-jp/t5-small-short",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "MingZhong/unieval-dialog",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5_xxl_true_nli_mixture",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/madlad400-10b-mt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py314-pylingual-v4-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "razent/SciFive-base-Pubmed_PMC",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "RUNorm/RUNorm-normalizer-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-base-lm-adapt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Falconsai/medical_summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lizhuang144/flan-t5-base-VG-factual-sg-id",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "uer/t5-base-chinese-cluecorpussmall",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "plguillou/t5-base-fr-sum-cnndm",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/LaMini-Flan-T5-783M",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "IlyaGusev/rut5_base_sum_gazeta",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ElnaggarLab/ankh-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "csebuetnlp/banglat5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "svjack/comet-atomic-en",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "allegro/plt5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Ateeqq/news-title-generator",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "retrieva-jp/t5-base-long",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ai-forever/ruT5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5p-770m",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "CohereLabs/aya-101",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-span-sentiment-extraction",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py37-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ai-forever/sage-fredt5-distilled-95m",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "VietAI/envit5-translation",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "bond005/ruT5-ASR-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "yigagilbert/salt_language_ID",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/flan-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "hossboll/clinical-t5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/LaMini-Flan-T5-77M",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "pszemraj/flan-t5-large-grammar-synthesis",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "razent/SciFive-large-Pubmed",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "bigscience/T0_3B",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "recogna-nlp/ptt5-base-summ-xlsum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "pszemraj/grammar-synthesis-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-efficient-tiny-nl2",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "gsarti/it5-base-news-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "chanind/frame-semantic-transformer-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "VietAI/vit5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-small-normalizer",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "jbochi/candle-coedit-quantized",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "csebuetnlp/banglat5_small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Abdou/arabic-tashkeel-flan-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "castorini/monot5-3b-msmarco-10k",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-small-lm-adapt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-small-qa-qg-hl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Xenova/LaMini-Flan-T5-248M",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "castorini/doc2query-t5-base-msmarco",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "dkarthikeyan1/tcrt5_ft_tcrdb",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-base-multitask",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Wikidepia/IndoT5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "fangyuan/hotpotqa_abstractive",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5-base-multi-sum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-cola",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "nrl-ai/vn-diacritic-vit5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "czearing/article-title-generator",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "VietAI/vit5-base-vietnews-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "uer/t5-small-chinese-cluecorpussmall",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "razent/SciFive-base-PMC",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-common_gen",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "chanind/frame-semantic-transformer-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "VietAI/envit5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-xl-lm-adapt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ElnaggarLab/ankh3-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "laituan245/molt5-base-smiles2caption",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "GPT2LMHeadModel",
+ "model_id": "togatogah/jinen-v1-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-mrpc",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-qnli",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-mnli",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Pushkar27/GriceBench-Repair",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "LazarusNLP/IndoNanoT5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-rte",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "philschmid/flan-t5-base-samsum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-qqp",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-sst2",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cjvt/t5-slo-word-spelling-corrector",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "laituan245/molt5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "DeepMount00/OCR_corrector",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "popkek00/mt5-small-valutazione-cadute",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "allegro/plt5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Soyoung97/RankT5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "allenai/tk-instruct-base-def-pos",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mesolitica/finetune-qa-t5-small-standard-bahasa-cased",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "tanganke/flan-t5-base_glue-stsb",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "allenai/unifiedqa-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "khairi/life2lang-small-it",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "GT4SD/multitask-text-and-chemistry-t5-base-standard",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "snrspeaks/t5-one-line-summary",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "visheratin/t5-efficient-mini-grammar-correction",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "minhtoan/t5-translate-vietnamese-nom",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "unicamp-dl/ptt5-v2-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "kevinscaria/atsc_tk-instruct-base-def-pos-neg-neut-combined",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-small-finetuned-text-simplification",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "syssec-utd/py36-pylingual-v1-statement",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "GPT2LMHeadModel",
+ "model_id": "togatogah/jinen-v1-xsmall",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ai-forever/FRED-T5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ldenoue/Title_Generation_T5Small_Model",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "mrm8488/t5-base-finetuned-wikiSQL",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-11b-ssm-nq",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "MaRiOrOsSi/t5-base-finetuned-question-answering",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "textplus-bbaw/transnormer-19c-beta-v02",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "QizhiPei/biot5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "KomeijiForce/t5-base-emojilm",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "penelitianpsmatematika/medical-text-generation-t5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "yunhuijang/7e62tq9m",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "microsoft/codereviewer",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ConvLab/t5-small-dst-multiwoz21",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "TeraSpace/dialofred",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "csebuetnlp/banglat5_nmt_bn_en",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Michau/t5-base-en-generate-headline",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "pengold/t5-vietnamese-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "utrobinmv/t5_translate_en_ru_zh_base_200",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Var3n/hmByT5_anno",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "knowledgator/flan-t5-large-for-classification",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lcw99/t5-base-korean-text-summary",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "Qwen2ForCausalLM",
+ "model_id": "stukenov/sozkz-fix-qwen-500m-kk-gec-v3",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "Qwen2ForCausalLM",
+ "model_id": "stukenov/sozkz-fix-qwen-500m-kk-gec-v4",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-base-qa-qg-hl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "paust/pko-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-base-paraphraser",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "RUNorm/RUNorm-normalizer-medium",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "megagonlabs/t5-base-japanese-web",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ibm-research/regen-disambiguation",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "google/t5-efficient-mini",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ThomasNLG/t5-qa_squad2neg-en",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "Salesforce/codet5p-220m-py",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "ThomasNLG/t5-qg_squad1-en",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "d0rj/FRED-T5-large-instruct",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lmqg/t5-base-tweetqa-qa",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "csebuetnlp/banglat5_nmt_en_bn",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "KETI-AIR/ke-t5-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "osunlp/attrscore-flan-t5-xl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "chentong00/propositionizer-wiki-flan-t5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "laituan245/molt5-large-caption2smiles",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "valhalla/t5-small-qg-prepend",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "lmqg/flan-t5-base-squad-qg",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "sagawa/ReactionT5v2-retrosynthesis",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "T5ForConditionalGeneration",
+ "model_id": "glonor/byt5-arabic-diacritization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "google/mt5-small",
+ "status": 1,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Full verification completed",
+ "phase1_score": 100.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 89.3,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "google/mt5-large",
+ "status": 1,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Full verification completed",
+ "phase1_score": 100.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 88.7,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "google/mt5-base",
+ "status": 1,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Full verification completed",
+ "phase1_score": 100.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 76.3,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "csebuetnlp/mT5_multilingual_XLSum",
+ "status": 3,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 36/282 components failed (36 critical)",
+ "phase1_score": 50.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 88.3,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "persiannlp/mt5-small-parsinlu-opus-translation_fa_en",
+ "status": 3,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)",
+ "phase1_score": 50.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 82.8,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "optimum-internal-testing/tiny-random-mt5",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "persiannlp/mt5-base-parsinlu-opus-translation_fa_en",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "google/mt5-xl",
+ "status": 1,
+ "verified_date": "2026-05-08",
+ "metadata": null,
+ "note": "Full verification completed",
+ "phase1_score": 100.0,
+ "phase2_score": 100.0,
+ "phase3_score": null,
+ "phase4_score": 88.4,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "knowledgator/SMILES2IUPAC-canonical-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "kriton/greek-text-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "cointegrated/rut5-small",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "deutsche-telekom/mt5-small-sum-de-en-v1",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-xl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "csebuetnlp/mT5_m2o_arabic_crossSum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-xxl",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "knowledgator/IUPAC2SMILES-canonical-base",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "indonlp/cendol-mt5-small-inst",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "HiTZ/Medical-mT5-large",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "intelia-lab-uah/mt0-base_AE_SQAC",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "intelia-lab-uah/mt0-base_QG_SQAC",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "thanathorn/mt5-cpe-kmutt-thai-sentence-sum",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-xxl-mt",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "eunsour/en-ko-transliterator",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "Buseak/md_mt5_0109_v8",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "bigscience/mt0-xxl-p3",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "persiannlp/mt5-small-parsinlu-translation_en_fa",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
+ },
+ {
+ "architecture_id": "MT5ForConditionalGeneration",
+ "model_id": "ozcangundes/mt5-small-turkish-summarization",
+ "status": 0,
+ "verified_date": null,
+ "metadata": null,
+ "note": null,
+ "phase1_score": null,
+ "phase2_score": null,
+ "phase3_score": null,
+ "phase4_score": null,
+ "phase7_score": null,
+ "phase8_score": null
}
]
}
diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json
index c87d21798..500076dc9 100644
--- a/transformer_lens/tools/model_registry/data/verification_history.json
+++ b/transformer_lens/tools/model_registry/data/verification_history.json
@@ -1,5 +1,5 @@
{
- "last_updated": "2026-04-21T20:10:35.469418",
+ "last_updated": "2026-05-08T10:21:54.476921",
"records": [
{
"model_id": "Macropodus/macbert4mdcspell_v1",
@@ -11850,6 +11850,86 @@
"notes": "Full verification completed",
"invalidated": false,
"invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-small",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "persiannlp/mt5-small-parsinlu-opus-translation_fa_en",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-xl",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/558 components failed (72 critical)",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-small",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Full verification completed",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-base",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Full verification completed",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-large",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Full verification completed",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google/mt5-xl",
+ "architecture_id": "MT5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Full verification completed",
+ "invalidated": false,
+ "invalidation_reason": null
+ },
+ {
+ "model_id": "google-t5/t5-small",
+ "architecture_id": "T5ForConditionalGeneration",
+ "verified_date": "2026-05-08",
+ "verified_by": "verify_models",
+ "transformerlens_version": null,
+ "notes": "Full verification completed",
+ "invalidated": false,
+ "invalidation_reason": null
}
]
}
diff --git a/transformer_lens/tools/model_registry/hf_scraper.py b/transformer_lens/tools/model_registry/hf_scraper.py
index df6b11bc9..19fb6bace 100644
--- a/transformer_lens/tools/model_registry/hf_scraper.py
+++ b/transformer_lens/tools/model_registry/hf_scraper.py
@@ -248,8 +248,13 @@ def scrape_all_models(
# count data inline with the listing, avoiding per-model API calls.
# With ~1000 models per page, a full scan of 200K+ models needs only
# ~200 paginated requests (well within the 1000 req / 5 min limit).
+ # Use ``filter`` rather than ``pipeline_tag`` so encoder-decoder models
+ # are discoverable: HF assigns T5/mT5 a primary pipeline_tag of
+ # "translation" (or None for mT5) and only lists "text2text-generation"
+ # in the broader tag list. ``filter`` matches against tags, ``pipeline_tag``
+ # only against the canonical primary tag.
list_kwargs: dict = {
- "pipeline_tag": task,
+ "filter": task,
"sort": "downloads",
"expand": ["config", "safetensors"],
}