diff --git a/demos/BERT.ipynb b/demos/BERT.ipynb index 9338fd30e..0b6efb8bf 100644 --- a/demos/BERT.ipynb +++ b/demos/BERT.ipynb @@ -28,16 +28,21 @@ }, { "cell_type": "code", - "execution_count": 39, - "metadata": {}, + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:16.632770Z", + "iopub.status.busy": "2026-05-07T23:58:16.632549Z", + "iopub.status.idle": "2026-05-07T23:58:16.663150Z", + "shell.execute_reply": "2026-05-07T23:58:16.662858Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Running as a Jupyter notebook - intended for development only!\n", - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" + "Running as a Jupyter notebook - intended for development only!\n" ] } ], @@ -77,8 +82,15 @@ }, { "cell_type": "code", - "execution_count": 40, - "metadata": {}, + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:16.680332Z", + "iopub.status.busy": "2026-05-07T23:58:16.680240Z", + "iopub.status.idle": "2026-05-07T23:58:17.094703Z", + "shell.execute_reply": "2026-05-07T23:58:17.094502Z" + } + }, "outputs": [ { "name": "stdout", @@ -101,27 +113,34 @@ }, { "cell_type": "code", - "execution_count": 41, - "metadata": {}, + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:17.095708Z", + "iopub.status.busy": "2026-05-07T23:58:17.095633Z", + "iopub.status.idle": "2026-05-07T23:58:17.766649Z", + "shell.execute_reply": "2026-05-07T23:58:17.766420Z" + } + }, "outputs": [ { "data": { "text/html": [ - "
\n", + "
\n", " " ], "text/plain": [ - "" + "" ] }, - "execution_count": 41, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -135,8 +154,15 @@ }, { "cell_type": "code", - "execution_count": 42, - "metadata": {}, + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:17.767701Z", + "iopub.status.busy": "2026-05-07T23:58:17.767614Z", + "iopub.status.idle": "2026-05-07T23:58:20.235678Z", + "shell.execute_reply": "2026-05-07T23:58:20.235292Z" + } + }, "outputs": [], "source": [ "# Import stuff\n", @@ -149,8 +175,15 @@ }, { "cell_type": "code", - "execution_count": 43, - "metadata": {}, + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:20.236982Z", + "iopub.status.busy": "2026-05-07T23:58:20.236839Z", + "iopub.status.idle": "2026-05-07T23:58:20.263136Z", + "shell.execute_reply": "2026-05-07T23:58:20.262870Z" + } + }, "outputs": [], "source": [ "# NBVAL_IGNORE_OUTPUT\n", @@ -169,13 +202,27 @@ }, { "cell_type": "code", - "execution_count": 44, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:20.264217Z", + "iopub.status.busy": "2026-05-07T23:58:20.264152Z", + "iopub.status.idle": "2026-05-07T23:58:21.584024Z", + "shell.execute_reply": "2026-05-07T23:58:21.583632Z" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n" + ] + }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "1d4b75dcfcbf488da7196992cde5c9bb", + "model_id": "291186d81cdf425f85c9c6accf8f3170", "version_major": 2, "version_minor": 0 }, @@ -207,8 +254,15 @@ }, { "cell_type": "code", - "execution_count": 45, - "metadata": {}, + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:21.585622Z", + "iopub.status.busy": "2026-05-07T23:58:21.585530Z", + "iopub.status.idle": "2026-05-07T23:58:21.634792Z", + "shell.execute_reply": "2026-05-07T23:58:21.634568Z" + } + }, "outputs": [ { "name": "stdout", @@ -237,8 +291,15 @@ }, { "cell_type": "code", - "execution_count": 46, - "metadata": {}, + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:21.635905Z", + "iopub.status.busy": "2026-05-07T23:58:21.635834Z", + "iopub.status.idle": "2026-05-07T23:58:21.686269Z", + "shell.execute_reply": "2026-05-07T23:58:21.686039Z" + } + }, "outputs": [ { "name": "stdout", @@ -258,6 +319,124 @@ "print(f'Prediction: \"{predictions}\"')" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inspecting BERT internals: cache and hooks\n", + "\n", + "Everything you'd do on a `HookedTransformer` (run-with-cache, run-with-hooks, the `hooks(...)` context manager) works the same way on the BERT bridge. Two differences worth pointing out as we go:\n", + "\n", + "- **Bidirectional attention.** GPT-style models have a causal mask, so attention patterns are lower-triangular. BERT attends in both directions, so the pattern is dense — every token can see every other token.\n", + "- **Targets live mid-sequence.** With causal LMs you usually intervene at the final position; with BERT-style MLM the position of interest is wherever you put `[MASK]`. Almost all the mech-interp recipes carry over once you remember to index there." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cache: peek at activations and confirm bidirectionality\n", + "\n", + "`run_with_cache` exposes every hook point the bridge installs. Below we grab the residual stream after layer 0 and the attention pattern, then quantify that the pattern is genuinely two-sided (a causal model would have zero mass above the diagonal)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:21.687327Z", + "iopub.status.busy": "2026-05-07T23:58:21.687268Z", + "iopub.status.idle": "2026-05-07T23:58:21.732798Z", + "shell.execute_reply": "2026-05-07T23:58:21.732587Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "resid_post (layer 0): (1, 8, 768)\n", + "attn pattern (layer 0):(1, 12, 8, 8)\n", + "attention mass above diagonal: 3.04\n", + "attention mass below diagonal: 2.76\n" + ] + } + ], + "source": [ + "prompt = \"The [MASK] is bright today.\"\n", + "tokens = bert.tokenizer(prompt, return_tensors=\"pt\")[\"input_ids\"]\n", + "mask_pos = (tokens[0] == bert.tokenizer.mask_token_id).nonzero()[0].item()\n", + "\n", + "_, cache = bert.run_with_cache(tokens)\n", + "print(f\"resid_post (layer 0): {tuple(cache['blocks.0.hook_resid_post'].shape)}\")\n", + "print(f\"attn pattern (layer 0):{tuple(cache['blocks.0.attn.hook_pattern'].shape)}\")\n", + "\n", + "# Confirm the attention pattern is bidirectional, not lower-triangular like a causal model.\n", + "pattern = cache[\"blocks.0.attn.hook_pattern\"][0, 0] # layer 0, head 0\n", + "upper = pattern.triu(diagonal=1).sum().item()\n", + "lower = pattern.tril(diagonal=-1).sum().item()\n", + "print(f\"attention mass above diagonal: {upper:.2f}\")\n", + "print(f\"attention mass below diagonal: {lower:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hooks: ablate an attention head and watch the logit shift\n", + "\n", + "Same `bert.hooks(...)` context manager as `HookedTransformer`. We zero one head in layer 0 for the duration of the call. A single-head ablation rarely flips the top-1 prediction (the model is too redundant for that), but it does shift the logit at the target token — and that shift is the signal mech-interp work usually cares about." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:21.733792Z", + "iopub.status.busy": "2026-05-07T23:58:21.733733Z", + "iopub.status.idle": "2026-05-07T23:58:21.789116Z", + "shell.execute_reply": "2026-05-07T23:58:21.788914Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "target token: 'sun'\n", + "baseline logit('sun'): 14.524\n", + "ablated logit('sun'): 15.029\n", + "shift: +0.504\n" + ] + } + ], + "source": [ + "def mask_logits(model, tokens, mask_pos):\n", + " return model(tokens)[0, mask_pos]\n", + "\n", + "\n", + "def ablate_layer_0_head_5(z, hook):\n", + " z[..., 5, :] = 0\n", + " return z\n", + "\n", + "\n", + "baseline = mask_logits(bert, tokens, mask_pos)\n", + "target_id = baseline.argmax().item()\n", + "target_str = bert.tokenizer.decode([target_id])\n", + "\n", + "with bert.hooks(fwd_hooks=[(\"blocks.0.attn.hook_z\", ablate_layer_0_head_5)]):\n", + " ablated = mask_logits(bert, tokens, mask_pos)\n", + "\n", + "# Top-1 is usually robust to a single-head ablation in an early layer — the\n", + "# clearer signal is the logit shift at the target token.\n", + "print(f\"target token: {target_str!r}\")\n", + "print(f\"baseline logit({target_str!r}): {baseline[target_id].item():7.3f}\")\n", + "print(f\"ablated logit({target_str!r}): {ablated[target_id].item():7.3f}\")\n", + "print(f\"shift: {(ablated[target_id] - baseline[target_id]).item():+7.3f}\")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -270,13 +449,20 @@ }, { "cell_type": "code", - "execution_count": 47, - "metadata": {}, + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:21.790081Z", + "iopub.status.busy": "2026-05-07T23:58:21.790022Z", + "iopub.status.idle": "2026-05-07T23:58:22.688419Z", + "shell.execute_reply": "2026-05-07T23:58:22.688161Z" + } + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "9cea0cab8d47422098f5bcec64a74126", + "model_id": "fde8bcf6ba14465096ce2180a2d3b4f5", "version_major": 2, "version_minor": 0 }, @@ -329,8 +515,15 @@ }, { "cell_type": "code", - "execution_count": 48, - "metadata": {}, + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2026-05-07T23:58:22.689564Z", + "iopub.status.busy": "2026-05-07T23:58:22.689502Z", + "iopub.status.idle": "2026-05-07T23:58:22.728869Z", + "shell.execute_reply": "2026-05-07T23:58:22.728661Z" + } + }, "outputs": [ { "name": "stdout", @@ -382,7 +575,734 @@ "pygments_lexer": "ipython3", "version": "3.12.12" }, - "orig_nbformat": 4 + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "0cb2ba7cf783495f8a767b504946ffaa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0e8ff295a5b2417d99bd81afcf735f79": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "15d93f98328748e8872d387981d43976": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_69abc6e9c5ac46e7977bb152a2b08545", + "max": 202.0, + "min": 0.0, + "orientation": "horizontal", + "style": "IPY_MODEL_f1d52b5dcaf147348ca4f3bec321356e", + "tabbable": null, + "tooltip": null, + "value": 202.0 + } + }, + "17e19169de9b4ccca06920c27e4611ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_54ca0ea7b1874a58a07c4477f69dd9e5", + "placeholder": "​", + "style": "IPY_MODEL_429f1c9017dd4f3d9e10c5b126bf7216", + "tabbable": null, + "tooltip": null, + "value": " 202/202 [00:00<00:00, 5456.37it/s, Materializing param=cls.predictions.transform.dense.weight]" + } + }, + "18b80fe7ba924f9f9c0585c3f4899ae4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "291186d81cdf425f85c9c6accf8f3170": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f6152742c7584aac81fdc110c9f55a8d", + "IPY_MODEL_15d93f98328748e8872d387981d43976", + "IPY_MODEL_17e19169de9b4ccca06920c27e4611ec" + ], + "layout": "IPY_MODEL_e5c30e8c00c5440b868f5c1df9c2e25e", + "tabbable": null, + "tooltip": null + } + }, + "429f1c9017dd4f3d9e10c5b126bf7216": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "4d9a89e7bc43452190bf282deb77b1e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "54ca0ea7b1874a58a07c4477f69dd9e5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "69abc6e9c5ac46e7977bb152a2b08545": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "77033e4e1062414c85e095a2f6dc0e94": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "790fc4cf502643f98677aaedc5c66e95": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "background": null, + "description_width": "", + "font_size": null, + "text_color": null + } + }, + "82e11d27e7ed43bcb8c6402353094b47": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "98ff75826c2d4415bf2c9d9ac1f0bbe9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_77033e4e1062414c85e095a2f6dc0e94", + "placeholder": "​", + "style": "IPY_MODEL_18b80fe7ba924f9f9c0585c3f4899ae4", + "tabbable": null, + "tooltip": null, + "value": "Loading weights: 100%" + } + }, + "a44ef0f1d6194a149e5191d8197a1ff8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_0e8ff295a5b2417d99bd81afcf735f79", + "max": 201.0, + "min": 0.0, + "orientation": "horizontal", + "style": "IPY_MODEL_b913cb229aa542ee99d1f0d111c4080a", + "tabbable": null, + "tooltip": null, + "value": 201.0 + } + }, + "b913cb229aa542ee99d1f0d111c4080a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d6621c3a124d4aef954c5e5845ca59af": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d7c6004161924be4a237098954c2ee23": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_d6621c3a124d4aef954c5e5845ca59af", + "placeholder": "​", + "style": "IPY_MODEL_790fc4cf502643f98677aaedc5c66e95", + "tabbable": null, + "tooltip": null, + "value": " 201/201 [00:00<00:00, 5484.00it/s, Materializing param=cls.seq_relationship.weight]" + } + }, + "e5c30e8c00c5440b868f5c1df9c2e25e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "2.0.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "2.0.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border_bottom": null, + "border_left": null, + "border_right": null, + "border_top": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f1d52b5dcaf147348ca4f3bec321356e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "2.0.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f6152742c7584aac81fdc110c9f55a8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HTMLView", + "description": "", + "description_allow_html": false, + "layout": "IPY_MODEL_82e11d27e7ed43bcb8c6402353094b47", + "placeholder": "​", + "style": "IPY_MODEL_4d9a89e7bc43452190bf282deb77b1e2", + "tabbable": null, + "tooltip": null, + "value": "Loading weights: 100%" + } + }, + "fde8bcf6ba14465096ce2180a2d3b4f5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "2.0.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "2.0.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "2.0.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_98ff75826c2d4415bf2c9d9ac1f0bbe9", + "IPY_MODEL_a44ef0f1d6194a149e5191d8197a1ff8", + "IPY_MODEL_d7c6004161924be4a237098954c2ee23" + ], + "layout": "IPY_MODEL_0cb2ba7cf783495f8a767b504946ffaa", + "tabbable": null, + "tooltip": null + } + } + }, + "version_major": 2, + "version_minor": 0 + } + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/tests/integration/test_hooked_encoder_properties.py b/tests/integration/test_hooked_encoder_properties.py new file mode 100644 index 000000000..bdbda39bf --- /dev/null +++ b/tests/integration/test_hooked_encoder_properties.py @@ -0,0 +1,171 @@ +"""Convenience-property tests for ``HookedEncoder``. + +Closes the last open ask in #277 — verify each ``W_*`` / ``b_*`` / circuit +property has the right shape AND aliases the right underlying parameter, so +property-level mech-interp work doesn't silently read the wrong tensor. + +Uses a randomly-initialized small encoder (no HF download) so the tests run +fast and deterministically. +""" + +from __future__ import annotations + +import pytest +import torch + +from transformer_lens import FactoredMatrix, HookedEncoder, HookedTransformerConfig + +D_MODEL = 12 +D_HEAD = 4 +N_HEADS = D_MODEL // D_HEAD +D_MLP = 4 * D_MODEL +N_CTX = 5 +N_LAYERS = 3 +D_VOCAB = 22 + + +@pytest.fixture +def model() -> HookedEncoder: + cfg = HookedTransformerConfig( + d_head=D_HEAD, + d_model=D_MODEL, + n_ctx=N_CTX, + n_layers=N_LAYERS, + act_fn="gelu", + d_vocab=D_VOCAB, + ) + encoder = HookedEncoder(cfg) + # HookedEncoder uses torch.empty() for params and does no init pass; the + # uninitialized memory contains NaNs which break torch.equal comparisons. + torch.manual_seed(0) + for p in encoder.parameters(): + torch.nn.init.normal_(p, std=0.02) + return encoder + + +# --------------------------------------------------------------------------- +# Embed / unembed +# --------------------------------------------------------------------------- + + +def test_W_U(model: HookedEncoder): + assert model.W_U.shape == (D_MODEL, D_VOCAB) + assert model.W_U is model.unembed.W_U + + +def test_b_U(model: HookedEncoder): + assert model.b_U.shape == (D_VOCAB,) + assert model.b_U is model.unembed.b_U + + +def test_W_E(model: HookedEncoder): + assert model.W_E.shape == (D_VOCAB, D_MODEL) + assert model.W_E is model.embed.embed.W_E + + +def test_W_pos(model: HookedEncoder): + assert model.W_pos.shape == (N_CTX, D_MODEL) + assert model.W_pos is model.embed.pos_embed.W_pos + + +@pytest.mark.xfail( + reason=( + "HookedEncoder.W_E_pos return annotation 'd_vocab+n_ctx d_model' references " + "unbound dimension names (no input args supply them), so the jaxtyping import-hook " + "can't resolve the sum at runtime. Same annotation exists on HookedTransformer.W_E_pos; " + "fixing it is a separate API-touch." + ), + strict=True, +) +def test_W_E_pos(model: HookedEncoder): + assert model.W_E_pos.shape == (D_VOCAB + N_CTX, D_MODEL) + # Concatenation, so identity doesn't apply — verify the slices match. + assert torch.equal(model.W_E_pos[:D_VOCAB], model.W_E) + assert torch.equal(model.W_E_pos[D_VOCAB:], model.W_pos) + + +# --------------------------------------------------------------------------- +# Per-layer attention weights/biases — stacked across blocks +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("attr", ["W_Q", "W_K", "W_V"]) +def test_attn_qkv_weight(model: HookedEncoder, attr: str): + stacked = getattr(model, attr) + assert stacked.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(stacked[layer_idx], getattr(block.attn, attr)) + + +def test_W_O(model: HookedEncoder): + assert model.W_O.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.W_O[layer_idx], block.attn.W_O) + + +@pytest.mark.parametrize("attr", ["b_Q", "b_K", "b_V"]) +def test_attn_qkv_bias(model: HookedEncoder, attr: str): + stacked = getattr(model, attr) + assert stacked.shape == (N_LAYERS, N_HEADS, D_HEAD) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(stacked[layer_idx], getattr(block.attn, attr)) + + +def test_b_O(model: HookedEncoder): + assert model.b_O.shape == (N_LAYERS, D_MODEL) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.b_O[layer_idx], block.attn.b_O) + + +# --------------------------------------------------------------------------- +# Per-layer MLP weights/biases — stacked across blocks +# --------------------------------------------------------------------------- + + +def test_W_in(model: HookedEncoder): + assert model.W_in.shape == (N_LAYERS, D_MODEL, D_MLP) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.W_in[layer_idx], block.mlp.W_in) + + +def test_W_out(model: HookedEncoder): + assert model.W_out.shape == (N_LAYERS, D_MLP, D_MODEL) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.W_out[layer_idx], block.mlp.W_out) + + +def test_b_in(model: HookedEncoder): + assert model.b_in.shape == (N_LAYERS, D_MLP) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.b_in[layer_idx], block.mlp.b_in) + + +def test_b_out(model: HookedEncoder): + assert model.b_out.shape == (N_LAYERS, D_MODEL) + for layer_idx, block in enumerate(model.blocks): + assert torch.equal(model.b_out[layer_idx], block.mlp.b_out) + + +# --------------------------------------------------------------------------- +# Factored circuits +# --------------------------------------------------------------------------- + + +def test_QK_circuit(model: HookedEncoder): + qk = model.QK + assert isinstance(qk, FactoredMatrix) + # Left factor is W_Q [..., d_model, d_head]; right factor is W_K transposed + # to [..., d_head, d_model]. Their product would be [..., d_model, d_model]. + assert qk.A.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD) + assert qk.B.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL) + assert torch.equal(qk.A, model.W_Q) + assert torch.equal(qk.B, model.W_K.transpose(-2, -1)) + + +def test_OV_circuit(model: HookedEncoder): + ov = model.OV + assert isinstance(ov, FactoredMatrix) + assert ov.A.shape == (N_LAYERS, N_HEADS, D_MODEL, D_HEAD) + assert ov.B.shape == (N_LAYERS, N_HEADS, D_HEAD, D_MODEL) + assert torch.equal(ov.A, model.W_V) + assert torch.equal(ov.B, model.W_O) diff --git a/tests/unit/components/mlps/test_gated_mlp.py b/tests/unit/components/mlps/test_gated_mlp.py index abb0b7b8c..a7518c85f 100644 --- a/tests/unit/components/mlps/test_gated_mlp.py +++ b/tests/unit/components/mlps/test_gated_mlp.py @@ -3,6 +3,7 @@ import pytest import torch import torch.nn as nn +import torch.nn.functional as F from transformer_lens.components import GatedMLP, LayerNorm from transformer_lens.utils import solu @@ -39,3 +40,42 @@ def test_forward(cfg: Dict[str, Any]): x = torch.randn(2, 10, cfg["d_model"]) output = model(x) assert output.shape == (2, 10, cfg["d_model"]) + + +def test_forward_matches_reference_equation(): + """Numeric equivalence vs a hand-rolled gated-MLP reference (issue #264). + + Closes the original ask in the thread: build an "equivalent gated MLP in + pytorch" and confirm the component matches it under ``torch.allclose``. + Uses ``silu`` so the LN-activation branch is not exercised — that keeps the + reference equation to the documented form. + """ + cfg: Dict[str, Any] = { + "n_layers": 1, + "n_ctx": 16, + "d_head": 32, + "d_model": 64, + "d_mlp": 128, + "dtype": torch.float32, + "act_fn": "silu", + "normalization_type": None, + "load_in_4bit": False, + } + torch.manual_seed(0) + model = GatedMLP(cfg).eval() + # Randomize the params so the test isn't run against zero-bias defaults. + for p in model.parameters(): + torch.nn.init.normal_(p, std=0.02) + + x = torch.randn(2, 5, cfg["d_model"]) + actual = model(x) + + # Reference: mlp_out = (silu(x @ W_gate) * (x @ W_in) + b_in) @ W_out + b_out. + # GatedMLP uses F.linear with .T.contiguous() to match HF accumulation order; + # mirror that here so the two compute graphs are bitwise comparable in fp32. + pre_act = F.linear(x, model.W_gate.T.contiguous()) + pre_linear = F.linear(x, model.W_in.T.contiguous()) + post_act = F.silu(pre_act) * pre_linear + model.b_in + expected = F.linear(post_act, model.W_out.T.contiguous(), model.b_out) + + assert torch.allclose(actual, expected, atol=1e-6) diff --git a/transformer_lens/config/HookedTransformerConfig.py b/transformer_lens/config/HookedTransformerConfig.py index 8f818b36b..6e4b95150 100644 --- a/transformer_lens/config/HookedTransformerConfig.py +++ b/transformer_lens/config/HookedTransformerConfig.py @@ -151,6 +151,12 @@ class HookedTransformerConfig(TransformerLensConfig): use_hook_tokens (bool): Will add a hook point on the token input to HookedTransformer.forward, which lets you cache or intervene on the tokens. Defaults to False. + gated_mlp (bool): If True, the MLP layer uses a gated formulation + (SwiGLU/GeGLU-style): ``mlp_out = W_out @ (act_fn(W_gate @ x) * (W_in @ x))``, + with an extra ``W_gate`` weight matrix alongside ``W_in`` and ``W_out``. Used by + LLaMA, Mistral, Gemma, Qwen and similar families. When False (default), the MLP + is the plain ``mlp_out = W_out @ act_fn(W_in @ x)`` form. ``loading_from_pretrained`` + sets this automatically per architecture; only set manually for a custom config. default_prepend_bos (bool, optional): Default behavior of whether to prepend the BOS token when the methods of HookedTransformer process input text to tokenize (only when input is a string). Defaults to True - even for models not explicitly trained with this, heads often use the diff --git a/transformer_lens/factories/architecture_adapter_factory.py b/transformer_lens/factories/architecture_adapter_factory.py index b5432aff1..02129b856 100644 --- a/transformer_lens/factories/architecture_adapter_factory.py +++ b/transformer_lens/factories/architecture_adapter_factory.py @@ -116,6 +116,7 @@ "Qwen3_5ForCausalLM": Qwen3_5ArchitectureAdapter, "StableLmForCausalLM": StableLmArchitectureAdapter, "T5ForConditionalGeneration": T5ArchitectureAdapter, + "MT5ForConditionalGeneration": T5ArchitectureAdapter, "XGLMForCausalLM": XGLMArchitectureAdapter, "NanoGPTForCausalLM": NanogptArchitectureAdapter, "MinGPTForCausalLM": MingptArchitectureAdapter, diff --git a/transformer_lens/model_bridge/generalized_components/attention.py b/transformer_lens/model_bridge/generalized_components/attention.py index 22504b294..89d6203ab 100644 --- a/transformer_lens/model_bridge/generalized_components/attention.py +++ b/transformer_lens/model_bridge/generalized_components/attention.py @@ -58,6 +58,8 @@ def __init__( requires_position_embeddings: bool = False, requires_attention_mask: bool = False, attention_mask_4d: bool = False, + requires_relative_position_bias: bool = False, + is_cross_attention: bool = False, optional: bool = False, ): """Initialize the attention bridge. @@ -78,6 +80,9 @@ def __init__( (e.g., GPTNeoX/Pythia). Defaults to False. attention_mask_4d: If True, generate 4D attention_mask [batch, 1, tgt_len, src_len] instead of 2D [batch, seq_len]. Required for OPT. Defaults to False. + requires_relative_position_bias: T5/mT5-style relative attention; supplies a + zero ``position_bias`` so HF's forward skips its ``cache_position[-1]`` fallback. + is_cross_attention: Encoder-decoder cross-attention; supplies ``key_value_states``. """ if conversion_rule is None: conversion_rule = AttentionAutoConversion(config) @@ -122,6 +127,8 @@ def __init__( self.requires_position_embeddings = requires_position_embeddings self.requires_attention_mask = requires_attention_mask self.attention_mask_4d = attention_mask_4d + self.requires_relative_position_bias = requires_relative_position_bias + self.is_cross_attention = is_cross_attention self._layer_idx: Optional[int] = None def set_original_component(self, original_component: torch.nn.Module) -> None: @@ -212,6 +219,16 @@ def get_random_inputs( else: # Generate 2D attention mask [batch, seq_len] for most models inputs["attention_mask"] = torch.ones(batch_size, seq_len, device=device) + if self.requires_relative_position_bias: + # Zero bias short-circuits HF's None-cache_position fallback in T5Attention. + n_heads = self.config.n_heads if self.config and hasattr(self.config, "n_heads") else 1 + inputs["position_bias"] = torch.zeros( + 1, n_heads, seq_len, seq_len, device=device, dtype=dtype + ) + if self.is_cross_attention: + inputs["key_value_states"] = torch.randn( + batch_size, seq_len, d_model, device=device, dtype=dtype + ) return inputs def _setup_qkv_hook_reshaping(self) -> None: diff --git a/transformer_lens/model_bridge/sources/transformers.py b/transformer_lens/model_bridge/sources/transformers.py index 522553068..b7c4656f4 100644 --- a/transformer_lens/model_bridge/sources/transformers.py +++ b/transformer_lens/model_bridge/sources/transformers.py @@ -232,6 +232,7 @@ def determine_architecture_from_hf_config(hf_config): "openelm": "OpenELMForCausalLM", "stablelm": "StableLmForCausalLM", "t5": "T5ForConditionalGeneration", + "mt5": "MT5ForConditionalGeneration", } if model_type in model_type_mappings: architectures.append(model_type_mappings[model_type]) diff --git a/transformer_lens/model_bridge/supported_architectures/t5.py b/transformer_lens/model_bridge/supported_architectures/t5.py index 6e2e51822..3fdc1179b 100644 --- a/transformer_lens/model_bridge/supported_architectures/t5.py +++ b/transformer_lens/model_bridge/supported_architectures/t5.py @@ -113,6 +113,7 @@ def __init__(self, cfg: Any) -> None: "v": LinearBridge(name="v"), "o": LinearBridge(name="o"), }, + requires_relative_position_bias=True, ), "ln2": RMSNormalizationBridge(name="layer.1.layer_norm", config=self.cfg), "mlp": encoder_mlp, @@ -142,6 +143,7 @@ def __init__(self, cfg: Any) -> None: "v": LinearBridge(name="v"), "o": LinearBridge(name="o"), }, + requires_relative_position_bias=True, ), "ln2": RMSNormalizationBridge(name="layer.1.layer_norm", config=self.cfg), "cross_attn": AttentionBridge( @@ -153,6 +155,8 @@ def __init__(self, cfg: Any) -> None: "v": LinearBridge(name="v"), "o": LinearBridge(name="o"), }, + requires_relative_position_bias=True, + is_cross_attention=True, ), "ln3": RMSNormalizationBridge(name="layer.2.layer_norm", config=self.cfg), "mlp": decoder_mlp, diff --git a/transformer_lens/tools/model_registry/__init__.py b/transformer_lens/tools/model_registry/__init__.py index 63d61d54c..dd79d58c2 100644 --- a/transformer_lens/tools/model_registry/__init__.py +++ b/transformer_lens/tools/model_registry/__init__.py @@ -89,6 +89,7 @@ "Qwen3_5ForCausalLM", "StableLmForCausalLM", "T5ForConditionalGeneration", + "MT5ForConditionalGeneration", "XGLMForCausalLM", } diff --git a/transformer_lens/tools/model_registry/data/architecture_gaps.json b/transformer_lens/tools/model_registry/data/architecture_gaps.json index b664ac22a..19723ef94 100644 --- a/transformer_lens/tools/model_registry/data/architecture_gaps.json +++ b/transformer_lens/tools/model_registry/data/architecture_gaps.json @@ -1,14 +1,36 @@ { - "generated_at": "2026-04-16", + "generated_at": "2026-05-08T15:02:52.940447Z", "scan_info": { - "total_scanned": 4839, - "task_filter": "text-generation", + "task_filter": [ + "text-generation", + "text2text-generation" + ], + "total_scanned": 5270, "min_downloads": 500, - "scan_duration_seconds": 4.9 + "merged_from_runs": 2 }, - "total_unsupported_architectures": 413, - "total_unsupported_models": 1364, + "total_unsupported_architectures": 439, + "total_unsupported_models": 1748, "gaps": [ + { + "architecture_id": "MarianMTModel", + "total_models": 169, + "total_downloads": 8456318, + "min_param_count": 14843019, + "sample_models": [ + "Helsinki-NLP/opus-mt-nl-en", + "Helsinki-NLP/opus-mt-en-de", + "Helsinki-NLP/opus-mt-fr-en", + "Helsinki-NLP/opus-mt-tr-en", + "Helsinki-NLP/opus-mt-de-en", + "Helsinki-NLP/opus-mt-ko-en", + "Helsinki-NLP/opus-mt-en-fr", + "Helsinki-NLP/opus-mt-ru-en", + "Helsinki-NLP/opus-mt-en-ru", + "Helsinki-NLP/opus-mt-en-es" + ], + "relevancy_score": 100.0 + }, { "architecture_id": "Qwen3_5ForConditionalGeneration", "total_models": 75, @@ -85,6 +107,25 @@ ], "relevancy_score": 74.5 }, + { + "architecture_id": "BartForConditionalGeneration", + "total_models": 83, + "total_downloads": 7451844, + "min_param_count": 28176, + "sample_models": [ + "KomeijiForce/bart-large-emojilm", + "antalvdb/bart-base-spelling-nl", + "lmqg/bart-large-squad-qg", + "kengurukleo/deutsch_a2_transformer", + "shibing624/bart4csc-base-chinese", + "SkitCon/gec-spanish-BARTO-SYNTHETIC", + "cive202/humanize-ai-text-bart-base", + "Tianlin668/MentalBART", + "Nargizi/screeve-lemmatizer", + "KomeijiForce/bart-large-emojilm-e2t" + ], + "relevancy_score": 73.7 + }, { "architecture_id": "DeepseekV32ForCausalLM", "total_models": 12, @@ -142,6 +183,25 @@ ], "relevancy_score": 60.7 }, + { + "architecture_id": "M2M100ForConditionalGeneration", + "total_models": 21, + "total_downloads": 4231967, + "min_param_count": 332735488, + "sample_models": [ + "dsfsi/nso-en-m2m100-gov", + "facebook/m2m100_1.2B", + "facebook/nllb-200-distilled-600M", + "facebook/m2m100_418M", + "facebook/nllb-200-3.3B", + "facebook/nllb-200-distilled-1.3B", + "facebook/nllb-200-1.3B", + "Babelscape/mrebel-base", + "Xenova/nllb-200-distilled-600M", + "alirezamsh/small100" + ], + "relevancy_score": 58.8 + }, { "architecture_id": "Glm4MoeForCausalLM", "total_models": 15, @@ -163,8 +223,8 @@ }, { "architecture_id": "T5GemmaForConditionalGeneration", - "total_models": 13, - "total_downloads": 1127923, + "total_models": 22, + "total_downloads": 2307747, "min_param_count": 312517632, "sample_models": [ "google/t5gemma-s-s-prefixlm", @@ -200,23 +260,23 @@ "relevancy_score": 57.3 }, { - "architecture_id": "BartForConditionalGeneration", - "total_models": 11, - "total_downloads": 695299, - "min_param_count": 6044480, + "architecture_id": "MBartForConditionalGeneration", + "total_models": 35, + "total_downloads": 689744, + "min_param_count": 131603038, "sample_models": [ - "KomeijiForce/bart-large-emojilm", - "antalvdb/bart-base-spelling-nl", - "lmqg/bart-large-squad-qg", - "kengurukleo/deutsch_a2_transformer", - "shibing624/bart4csc-base-chinese", - "SkitCon/gec-spanish-BARTO-SYNTHETIC", - "cive202/humanize-ai-text-bart-base", - "Tianlin668/MentalBART", - "Nargizi/screeve-lemmatizer", - "KomeijiForce/bart-large-emojilm-e2t" + "Pravopysnyk/best-unlp", + "DeepPavlov/mbart-large-50-ru-persona-chat", + "sn4kebyt3/ru-bart-large", + "MRNH/mbart-italian-grammar-corrector", + "MRNH/mbart-german-grammar-corrector", + "MRNH/mbart-russian-grammar-corrector", + "ai4bharat/IndicBART", + "facebook/mbart-large-50-one-to-many-mmt", + "moussaKam/mbarthez", + "facebook/mbart-large-50-many-to-many-mmt" ], - "relevancy_score": 55.8 + "relevancy_score": 57.2 }, { "architecture_id": "BaichuanForCausalLM", @@ -406,6 +466,23 @@ ], "relevancy_score": 50.6 }, + { + "architecture_id": "PegasusForConditionalGeneration", + "total_models": 8, + "total_downloads": 417097, + "min_param_count": 568796007, + "sample_models": [ + "google/pegasus-xsum", + "human-centered-summarization/financial-summarization-pegasus", + "tuner007/pegasus_paraphrase", + "google/pegasus-cnn_dailymail", + "google/pegasus-large", + "nsi319/legal-pegasus", + "zaemyung/DElIteraTeR-PEGASUS-Multi-Sent-Revision-Generator", + "google/pegasus-pubmed" + ], + "relevancy_score": 50.5 + }, { "architecture_id": "MT5ForConditionalGeneration", "total_models": 13, @@ -561,8 +638,8 @@ }, { "architecture_id": "BloomModel", - "total_models": 8, - "total_downloads": 40679, + "total_models": 9, + "total_downloads": 41582, "min_param_count": 16156544, "sample_models": [ "bigscience/bigscience-small-testing", @@ -594,6 +671,20 @@ ], "relevancy_score": 47.7 }, + { + "architecture_id": "FSMTForConditionalGeneration", + "total_models": 5, + "total_downloads": 123986, + "min_param_count": 271847424, + "sample_models": [ + "stas/tiny-wmt19-en-de", + "facebook/wmt19-ru-en", + "facebook/wmt19-en-de", + "facebook/wmt19-de-en", + "facebook/wmt19-en-ru" + ], + "relevancy_score": 47.1 + }, { "architecture_id": "NemotronForCausalLM", "total_models": 5, @@ -608,6 +699,19 @@ ], "relevancy_score": 47.0 }, + { + "architecture_id": "BlenderbotForConditionalGeneration", + "total_models": 5, + "total_downloads": 137109, + "min_param_count": 364810568, + "sample_models": [ + "thu-coai/blenderbot-400M-esconv", + "facebook/blenderbot-3B", + "facebook/blenderbot-400M-distill", + "nilotpaldhar2004/blenderbot-chatbot" + ], + "relevancy_score": 47.0 + }, { "architecture_id": "HyenaDNAForCausalLM", "total_models": 6, @@ -686,6 +790,24 @@ ], "relevancy_score": 46.0 }, + { + "architecture_id": "LEDForConditionalGeneration", + "total_models": 9, + "total_downloads": 44482, + "min_param_count": 161894745, + "sample_models": [ + "allenai/led-base-16384", + "allenai/led-large-16384", + "pszemraj/led-large-book-summary", + "pszemraj/led-base-book-summary", + "allenai/led-large-16384-arxiv", + "nsi319/legal-led-base-16384", + "allenai/PRIMERA", + "patrickvonplaten/led-large-16384-pubmed", + "soumitsr/led-base-article-digestor" + ], + "relevancy_score": 45.9 + }, { "architecture_id": "SDARForCausalLM", "total_models": 8, @@ -750,6 +872,16 @@ ], "relevancy_score": 45.4 }, + { + "architecture_id": "ProphetNetForConditionalGeneration", + "total_models": 1, + "total_downloads": 93577, + "min_param_count": 391321600, + "sample_models": [ + "microsoft/prophetnet-large-uncased" + ], + "relevancy_score": 45.4 + }, { "architecture_id": "ArceeForCausalLM", "total_models": 4, @@ -779,6 +911,33 @@ ], "relevancy_score": 45.1 }, + { + "architecture_id": "IndicTransForConditionalGeneration", + "total_models": 6, + "total_downloads": 44512, + "min_param_count": 228316160, + "sample_models": [ + "ai4bharat/indictrans2-en-indic-dist-200M", + "ai4bharat/indictrans2-indic-en-1B", + "ai4bharat/indictrans2-en-indic-1B", + "ai4bharat/indictrans2-indic-en-dist-200M", + "ai4bharat/indictrans2-indic-indic-dist-320M", + "ai4bharat/indictrans2-indic-indic-1B" + ], + "relevancy_score": 45.1 + }, + { + "architecture_id": "T5Gemma2ForConditionalGeneration", + "total_models": 3, + "total_downloads": 65800, + "min_param_count": 786029296, + "sample_models": [ + "google/t5gemma-2-1b-1b", + "google/t5gemma-2-270m-270m", + "google/t5gemma-2-4b-4b" + ], + "relevancy_score": 45.1 + }, { "architecture_id": "LlavaQwen2ForCausalLM", "total_models": 5, @@ -807,6 +966,21 @@ ], "relevancy_score": 44.8 }, + { + "architecture_id": "LongT5ForConditionalGeneration", + "total_models": 6, + "total_downloads": 35411, + "min_param_count": 222903552, + "sample_models": [ + "google/long-t5-tglobal-base", + "google/long-t5-tglobal-xl", + "google/long-t5-local-base", + "Stancld/longt5-tglobal-large-16384-pubmed-3k_steps", + "google/long-t5-tglobal-large", + "agemagician/mlong-t5-tglobal-base" + ], + "relevancy_score": 44.6 + }, { "architecture_id": "SeedOssForCausalLM", "total_models": 4, @@ -922,21 +1096,6 @@ ], "relevancy_score": 43.1 }, - { - "architecture_id": "MBartForConditionalGeneration", - "total_models": 6, - "total_downloads": 7712, - "min_param_count": 379691717, - "sample_models": [ - "Pravopysnyk/best-unlp", - "DeepPavlov/mbart-large-50-ru-persona-chat", - "sn4kebyt3/ru-bart-large", - "MRNH/mbart-italian-grammar-corrector", - "MRNH/mbart-german-grammar-corrector", - "MRNH/mbart-russian-grammar-corrector" - ], - "relevancy_score": 43.0 - }, { "architecture_id": "DeciLMForCausalLM", "total_models": 13, @@ -956,6 +1115,23 @@ ], "relevancy_score": 42.9 }, + { + "architecture_id": "EncoderDecoderModel", + "total_models": 8, + "total_downloads": 12696, + "min_param_count": 221337, + "sample_models": [ + "optimum-internal-testing/tiny-random-encoder-decoder-gpt2-bert", + "mrm8488/bert2bert_shared-spanish-finetuned-summarization", + "google/bert2bert_L-24_wmt_de_en", + "patrickvonplaten/bert2bert-cnn_dailymail-fp16", + "mohitsha/tiny-random-testing-bert2gpt2", + "cahya/bert2bert-indonesian-summarization", + "patrickvonplaten/bert2bert_cnn_daily_mail", + "google/roberta2roberta_L-24_bbc" + ], + "relevancy_score": 42.9 + }, { "architecture_id": "DogeForCausalLM", "total_models": 6, @@ -1086,6 +1262,16 @@ ], "relevancy_score": 41.6 }, + { + "architecture_id": "GlmAsrForConditionalGeneration", + "total_models": 1, + "total_downloads": 101459, + "min_param_count": 2257843200, + "sample_models": [ + "zai-org/GLM-ASR-Nano-2512" + ], + "relevancy_score": 41.6 + }, { "architecture_id": "OLMoForCausalLM", "total_models": 7, @@ -1356,6 +1542,16 @@ ], "relevancy_score": 38.8 }, + { + "architecture_id": "T5Model", + "total_models": 1, + "total_downloads": 4694, + "min_param_count": 222903552, + "sample_models": [ + "sonoisa/t5-base-japanese" + ], + "relevancy_score": 38.8 + }, { "architecture_id": "Lfm2MoeForCausalLM", "total_models": 7, @@ -1372,6 +1568,18 @@ ], "relevancy_score": 38.6 }, + { + "architecture_id": "Florence2ForConditionalGeneration", + "total_models": 3, + "total_downloads": 3407, + "min_param_count": 3549945, + "sample_models": [ + "onnx-community/Florence-2-base-ft", + "Xenova/tiny-random-Florence2ForConditionalGeneration", + "onnx-community/Florence-2-large-ft" + ], + "relevancy_score": 38.6 + }, { "architecture_id": "GatedDeltaNetForCausalLM", "total_models": 1, @@ -1466,8 +1674,8 @@ }, { "architecture_id": "T5EncoderModel", - "total_models": 1, - "total_downloads": 117289, + "total_models": 2, + "total_downloads": 200188, "min_param_count": 4762310656, "sample_models": [ "XLabs-AI/xflux_text_encoders" @@ -1511,15 +1719,15 @@ "relevancy_score": 37.6 }, { - "architecture_id": "Moondream", + "architecture_id": "PegasusXForConditionalGeneration", "total_models": 2, - "total_downloads": 11437, - "min_param_count": 1857482608, + "total_downloads": 2431, + "min_param_count": 568667136, "sample_models": [ - "vikhyatk/moondream1", - "zesquirrelnator/moondream2-finetuneV2" + "google/pegasus-x-base", + "pszemraj/pegasus-x-large-book-summary" ], - "relevancy_score": 37.5 + "relevancy_score": 37.6 }, { "architecture_id": "ForCausalLM", @@ -1533,32 +1741,47 @@ "relevancy_score": 37.5 }, { - "architecture_id": "Autoencoder", - "total_models": 1, - "total_downloads": 2349, - "min_param_count": 75832064, + "architecture_id": "Moondream", + "total_models": 2, + "total_downloads": 11437, + "min_param_count": 1857482608, "sample_models": [ - "cccczshao/CALM-Autoencoder" + "vikhyatk/moondream1", + "zesquirrelnator/moondream2-finetuneV2" + ], + "relevancy_score": 37.5 + }, + { + "architecture_id": "SwitchTransformersForConditionalGeneration", + "total_models": 3, + "total_downloads": 11807, + "min_param_count": 1978514688, + "sample_models": [ + "google/switch-base-8", + "google/switch-base-16", + "google/switch-base-32" ], "relevancy_score": 37.4 }, { - "architecture_id": "BlenderbotForConditionalGeneration", + "architecture_id": "Autoencoder", "total_models": 1, - "total_downloads": 2337, - "min_param_count": 364810568, + "total_downloads": 2349, + "min_param_count": 75832064, "sample_models": [ - "thu-coai/blenderbot-400M-esconv" + "cccczshao/CALM-Autoencoder" ], "relevancy_score": 37.4 }, { - "architecture_id": "TransformerForCausalLM", - "total_models": 1, - "total_downloads": 13828, - "min_param_count": 1364297728, + "architecture_id": "Qwen2AudioForConditionalGeneration", + "total_models": 3, + "total_downloads": 429859, + "min_param_count": 8388083712, "sample_models": [ - "fla-hub/transformer-1.3B-100B" + "Qwen/Qwen2-Audio-7B-Instruct", + "Qwen/Qwen2-Audio-7B", + "skoneru/qwen2_st_ft_v2" ], "relevancy_score": 37.3 }, @@ -1593,14 +1816,14 @@ "relevancy_score": 37.3 }, { - "architecture_id": "Plamo3ForCausalLM", + "architecture_id": "TransformerForCausalLM", "total_models": 1, - "total_downloads": 13053, - "min_param_count": 2603344384, + "total_downloads": 13828, + "min_param_count": 1364297728, "sample_models": [ - "pfnet/plamo-3-nict-2b-base" + "fla-hub/transformer-1.3B-100B" ], - "relevancy_score": 37.2 + "relevancy_score": 37.3 }, { "architecture_id": "D3LMForMaskedLM", @@ -1613,14 +1836,14 @@ "relevancy_score": 37.2 }, { - "architecture_id": "MoEGPTForCausalLM", + "architecture_id": "Plamo3ForCausalLM", "total_models": 1, - "total_downloads": 2001, - "min_param_count": 149603328, + "total_downloads": 13053, + "min_param_count": 2603344384, "sample_models": [ - "arnomatic/german-moe-gpt-v8-pretrained" + "pfnet/plamo-3-nict-2b-base" ], - "relevancy_score": 37.1 + "relevancy_score": 37.2 }, { "architecture_id": "LanceAI", @@ -1632,6 +1855,16 @@ ], "relevancy_score": 37.1 }, + { + "architecture_id": "MoEGPTForCausalLM", + "total_models": 1, + "total_downloads": 2001, + "min_param_count": 149603328, + "sample_models": [ + "arnomatic/german-moe-gpt-v8-pretrained" + ], + "relevancy_score": 37.1 + }, { "architecture_id": "HGRNBitForCausalLM", "total_models": 2, @@ -1799,22 +2032,22 @@ "relevancy_score": 35.4 }, { - "architecture_id": "SoraForSLM", + "architecture_id": "CircuitGPTForCausalLM", "total_models": 1, - "total_downloads": 915, - "min_param_count": 450707456, + "total_downloads": 908, + "min_param_count": 419124736, "sample_models": [ - "Conlanger-LLM-CLEM/Sorie" + "openai/circuit-sparsity" ], "relevancy_score": 35.4 }, { - "architecture_id": "CircuitGPTForCausalLM", + "architecture_id": "SoraForSLM", "total_models": 1, - "total_downloads": 908, - "min_param_count": 419124736, + "total_downloads": 915, + "min_param_count": 450707456, "sample_models": [ - "openai/circuit-sparsity" + "Conlanger-LLM-CLEM/Sorie" ], "relevancy_score": 35.4 }, @@ -1829,6 +2062,17 @@ ], "relevancy_score": 35.3 }, + { + "architecture_id": "AudioFlamingo3ForConditionalGeneration", + "total_models": 2, + "total_downloads": 197941, + "min_param_count": 8267215360, + "sample_models": [ + "nvidia/audio-flamingo-3-hf", + "nvidia/music-flamingo-hf" + ], + "relevancy_score": 35.3 + }, { "architecture_id": "DotLMForCausalLM", "total_models": 1, @@ -1965,6 +2209,17 @@ ], "relevancy_score": 34.3 }, + { + "architecture_id": "MusicFlamingoForConditionalGeneration", + "total_models": 2, + "total_downloads": 120851, + "min_param_count": 8267215360, + "sample_models": [ + "nvidia/music-flamingo-2601-hf", + "nvidia/music-flamingo-think-2601-hf" + ], + "relevancy_score": 34.2 + }, { "architecture_id": "GPT2CompetitiveMoE", "total_models": 1, @@ -2251,22 +2506,22 @@ "relevancy_score": 31.0 }, { - "architecture_id": "VeridianForCausalLM", + "architecture_id": "AeroForConditionalGeneration", "total_models": 1, - "total_downloads": 738, - "min_param_count": 1659913728, + "total_downloads": 717, + "min_param_count": 2416221184, "sample_models": [ - "MagistrTheOne/veridian-beta" + "lmms-lab/Aero-1-Audio" ], "relevancy_score": 30.9 }, { - "architecture_id": "AeroForConditionalGeneration", + "architecture_id": "VeridianForCausalLM", "total_models": 1, - "total_downloads": 717, - "min_param_count": 2416221184, + "total_downloads": 738, + "min_param_count": 1659913728, "sample_models": [ - "lmms-lab/Aero-1-Audio" + "MagistrTheOne/veridian-beta" ], "relevancy_score": 30.9 }, @@ -2282,22 +2537,22 @@ "relevancy_score": 30.6 }, { - "architecture_id": "Lfm2Prototype1ForCausalLM", + "architecture_id": "HymbaForCausalLM", "total_models": 1, - "total_downloads": 633, - "min_param_count": 1212304128, + "total_downloads": 629, + "min_param_count": 1522797824, "sample_models": [ - "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP" + "nvidia/Hymba-1.5B-Instruct" ], "relevancy_score": 30.6 }, { - "architecture_id": "HymbaForCausalLM", + "architecture_id": "Lfm2Prototype1ForCausalLM", "total_models": 1, - "total_downloads": 629, - "min_param_count": 1522797824, + "total_downloads": 633, + "min_param_count": 1212304128, "sample_models": [ - "nvidia/Hymba-1.5B-Instruct" + "nntsuzu/LFM2-SFT-Prototype01-1.2B-JP" ], "relevancy_score": 30.6 }, @@ -2336,26 +2591,24 @@ "relevancy_score": 30.4 }, { - "architecture_id": "TinyChartPhiForCausalLM", + "architecture_id": "GPTSanJapaneseForConditionalGeneration", "total_models": 1, - "total_downloads": 3544, - "min_param_count": 3189407648, + "total_downloads": 599, + "min_param_count": 2779000992, "sample_models": [ - "mPLUG/TinyChart-3B-768" + "Tanrei/GPTSAN-japanese" ], "relevancy_score": 30.3 }, { - "architecture_id": "PersimmonForCausalLM", - "total_models": 3, - "total_downloads": 12223, - "min_param_count": 8823735296, + "architecture_id": "TinyChartPhiForCausalLM", + "total_models": 1, + "total_downloads": 3544, + "min_param_count": 3189407648, "sample_models": [ - "adept/persimmon-8b-chat", - "adept/persimmon-8b-base", - "pszemraj/perSLIMmon-8b-base" + "mPLUG/TinyChart-3B-768" ], - "relevancy_score": 30.2 + "relevancy_score": 30.3 }, { "architecture_id": "FlexOlmoForCausalLM", @@ -2369,6 +2622,18 @@ ], "relevancy_score": 30.2 }, + { + "architecture_id": "PersimmonForCausalLM", + "total_models": 3, + "total_downloads": 12223, + "min_param_count": 8823735296, + "sample_models": [ + "adept/persimmon-8b-chat", + "adept/persimmon-8b-base", + "pszemraj/perSLIMmon-8b-base" + ], + "relevancy_score": 30.2 + }, { "architecture_id": "Phi3SmallForCausalLM", "total_models": 2, @@ -2604,22 +2869,22 @@ "relevancy_score": 27.0 }, { - "architecture_id": "JetMoEForCausalLM", + "architecture_id": "InternVLChatModel", "total_models": 1, - "total_downloads": 4808, - "min_param_count": 8522237952, + "total_downloads": 758, + "min_param_count": 3712637952, "sample_models": [ - "jetmoe/jetmoe-8b" + "numind/NuExtract-2-4B-experimental" ], "relevancy_score": 27.0 }, { - "architecture_id": "InternVLChatModel", + "architecture_id": "JetMoEForCausalLM", "total_models": 1, - "total_downloads": 758, - "min_param_count": 3712637952, + "total_downloads": 4808, + "min_param_count": 8522237952, "sample_models": [ - "numind/NuExtract-2-4B-experimental" + "jetmoe/jetmoe-8b" ], "relevancy_score": 27.0 }, @@ -2634,6 +2899,16 @@ ], "relevancy_score": 26.8 }, + { + "architecture_id": "T5GemmaVoiceForConditionalGeneration", + "total_models": 1, + "total_downloads": 736, + "min_param_count": 5314418949, + "sample_models": [ + "Aratako/T5Gemma-TTS-2b-2b" + ], + "relevancy_score": 26.8 + }, { "architecture_id": "InternLMForCausalLM", "total_models": 4, @@ -2647,6 +2922,19 @@ ], "relevancy_score": 26.7 }, + { + "architecture_id": "UMT5ForConditionalGeneration", + "total_models": 4, + "total_downloads": 115924, + "min_param_count": null, + "sample_models": [ + "google/umt5-xxl", + "google/umt5-base", + "google/umt5-small", + "google/umt5-xl" + ], + "relevancy_score": 26.7 + }, { "architecture_id": "SarvamMoEForCausalLM", "total_models": 2, @@ -2779,6 +3067,17 @@ ], "relevancy_score": 26.0 }, + { + "architecture_id": "PLBartForConditionalGeneration", + "total_models": 2, + "total_downloads": 102095, + "min_param_count": null, + "sample_models": [ + "uclanlp/plbart-base", + "uclanlp/plbart-java-cs" + ], + "relevancy_score": 25.8 + }, { "architecture_id": "Qwen2VLAudioForConditionalGeneration", "total_models": 1, @@ -2789,6 +3088,17 @@ ], "relevancy_score": 25.8 }, + { + "architecture_id": "BlenderbotSmallForConditionalGeneration", + "total_models": 2, + "total_downloads": 94677, + "min_param_count": null, + "sample_models": [ + "facebook/blenderbot_small-90M", + "facebook/blenderbot-90M" + ], + "relevancy_score": 25.7 + }, { "architecture_id": "Esm2LlamaInstructForCausalLM", "total_models": 1, @@ -2888,22 +3198,22 @@ "relevancy_score": 24.4 }, { - "architecture_id": "HCXVisionForCausalLM", + "architecture_id": "Param2MoEForCausalLM", "total_models": 1, - "total_downloads": 59628, - "min_param_count": null, + "total_downloads": 9131, + "min_param_count": 17151140480, "sample_models": [ - "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" + "bharatgenai/Param2-17B-A2.4B-Thinking" ], "relevancy_score": 24.4 }, { - "architecture_id": "Param2MoEForCausalLM", + "architecture_id": "HCXVisionForCausalLM", "total_models": 1, - "total_downloads": 9131, - "min_param_count": 17151140480, + "total_downloads": 59628, + "min_param_count": null, "sample_models": [ - "bharatgenai/Param2-17B-A2.4B-Thinking" + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" ], "relevancy_score": 24.4 }, @@ -2952,22 +3262,22 @@ "relevancy_score": 24.0 }, { - "architecture_id": "BunnyLlamaForCausalLM", + "architecture_id": "MiniCPMSALAForCausalLM", "total_models": 1, - "total_downloads": 1147, - "min_param_count": 8479990848, + "total_downloads": 1142, + "min_param_count": 9477203968, "sample_models": [ - "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview" + "openbmb/MiniCPM-SALA" ], "relevancy_score": 23.9 }, { - "architecture_id": "MiniCPMSALAForCausalLM", + "architecture_id": "BunnyLlamaForCausalLM", "total_models": 1, - "total_downloads": 1142, - "min_param_count": 9477203968, + "total_downloads": 1147, + "min_param_count": 8479990848, "sample_models": [ - "openbmb/MiniCPM-SALA" + "typhoon-ai/llama-3-typhoon-v1.5-8b-vision-preview" ], "relevancy_score": 23.9 }, @@ -3023,22 +3333,22 @@ "relevancy_score": 23.0 }, { - "architecture_id": "Qwen2Model", + "architecture_id": "Qwen2VLForConditionalGeneration", "total_models": 1, - "total_downloads": 717, - "min_param_count": 7070619136, + "total_downloads": 714, + "min_param_count": 8291375616, "sample_models": [ - "NewBeeKing/MemPO_Qwen2.5-SFT-RL" + "typhoon-ai/typhoon2-qwen2vl-7b-vision-instruct" ], "relevancy_score": 22.9 }, { - "architecture_id": "Qwen2VLForConditionalGeneration", + "architecture_id": "Qwen2Model", "total_models": 1, - "total_downloads": 714, - "min_param_count": 8291375616, + "total_downloads": 717, + "min_param_count": 7070619136, "sample_models": [ - "typhoon-ai/typhoon2-qwen2vl-7b-vision-instruct" + "NewBeeKing/MemPO_Qwen2.5-SFT-RL" ], "relevancy_score": 22.9 }, @@ -3064,6 +3374,16 @@ ], "relevancy_score": 22.8 }, + { + "architecture_id": "ICONNForCausalLM", + "total_models": 1, + "total_downloads": 669, + "min_param_count": 7833409536, + "sample_models": [ + "ICONNAI/ICONN-1-Mini-Beta" + ], + "relevancy_score": 22.7 + }, { "architecture_id": "Gemma4ForCausalLM", "total_models": 1, @@ -3084,16 +3404,6 @@ ], "relevancy_score": 22.7 }, - { - "architecture_id": "ICONNForCausalLM", - "total_models": 1, - "total_downloads": 669, - "min_param_count": 7833409536, - "sample_models": [ - "ICONNAI/ICONN-1-Mini-Beta" - ], - "relevancy_score": 22.7 - }, { "architecture_id": "BailingMoeV2_5ForCausalLM", "total_models": 1, @@ -3147,42 +3457,42 @@ "relevancy_score": 22.2 }, { - "architecture_id": "SolarForCausalLM", + "architecture_id": "RecaLLMLlamaForCausalLM", "total_models": 1, - "total_downloads": 20538, - "min_param_count": null, + "total_downloads": 514, + "min_param_count": 8030294016, "sample_models": [ - "upstage/solar-pro-preview-instruct" + "kswhitecross/RecaLLM-Llama-3.1-8B" ], "relevancy_score": 22.1 }, { - "architecture_id": "GptOssPuzzleForCausalLM", + "architecture_id": "RecaLLMQwen2ForCausalLM", "total_models": 1, - "total_downloads": 20294, - "min_param_count": 90837823680, + "total_downloads": 503, + "min_param_count": 7612785152, "sample_models": [ - "nvidia/gpt-oss-puzzle-88B" + "kswhitecross/RecaLLM-Qwen2.5-7B" ], "relevancy_score": 22.1 }, { - "architecture_id": "RecaLLMLlamaForCausalLM", + "architecture_id": "GptOssPuzzleForCausalLM", "total_models": 1, - "total_downloads": 514, - "min_param_count": 8030294016, + "total_downloads": 20294, + "min_param_count": 90837823680, "sample_models": [ - "kswhitecross/RecaLLM-Llama-3.1-8B" + "nvidia/gpt-oss-puzzle-88B" ], "relevancy_score": 22.1 }, { - "architecture_id": "RecaLLMQwen2ForCausalLM", + "architecture_id": "SolarForCausalLM", "total_models": 1, - "total_downloads": 503, - "min_param_count": 7612785152, + "total_downloads": 20538, + "min_param_count": null, "sample_models": [ - "kswhitecross/RecaLLM-Qwen2.5-7B" + "upstage/solar-pro-preview-instruct" ], "relevancy_score": 22.1 }, @@ -3254,16 +3564,6 @@ ], "relevancy_score": 21.3 }, - { - "architecture_id": "MiniCPM3ForCausalLM", - "total_models": 1, - "total_downloads": 13980, - "min_param_count": null, - "sample_models": [ - "openbmb/MiniCPM3-4B" - ], - "relevancy_score": 21.3 - }, { "architecture_id": "ArcticForCausalLM", "total_models": 1, @@ -3284,6 +3584,16 @@ ], "relevancy_score": 21.3 }, + { + "architecture_id": "MiniCPM3ForCausalLM", + "total_models": 1, + "total_downloads": 13980, + "min_param_count": null, + "sample_models": [ + "openbmb/MiniCPM3-4B" + ], + "relevancy_score": 21.3 + }, { "architecture_id": "Dots1ForCausalLM", "total_models": 2, @@ -3435,24 +3745,24 @@ "relevancy_score": 20.0 }, { - "architecture_id": "ModernBertDecoderForCausalLM", + "architecture_id": "GPT2Model", "total_models": 2, - "total_downloads": 5499, + "total_downloads": 5444, "min_param_count": null, "sample_models": [ - "jhu-clsp/ettin-decoder-400m", - "jhu-clsp/ettin-decoder-32m" + "keshan/sinhala-gpt2", + "cerebras/Cerebras-GPT-13B" ], "relevancy_score": 19.9 }, { - "architecture_id": "GPT2Model", + "architecture_id": "ModernBertDecoderForCausalLM", "total_models": 2, - "total_downloads": 5444, + "total_downloads": 5499, "min_param_count": null, "sample_models": [ - "keshan/sinhala-gpt2", - "cerebras/Cerebras-GPT-13B" + "jhu-clsp/ettin-decoder-400m", + "jhu-clsp/ettin-decoder-32m" ], "relevancy_score": 19.9 }, @@ -3546,24 +3856,24 @@ "relevancy_score": 19.5 }, { - "architecture_id": "RobertaForCausalLM", + "architecture_id": "MossForCausalLM", "total_models": 2, - "total_downloads": 4508, + "total_downloads": 4419, "min_param_count": null, "sample_models": [ - "uf-aice-lab/math-roberta", - "gokceuludogan/ChemBERTaLM" + "OpenMOSS-Team/moss-moon-003-sft", + "OpenMOSS-Team/moss-moon-003-base" ], "relevancy_score": 19.4 }, { - "architecture_id": "MossForCausalLM", + "architecture_id": "RobertaForCausalLM", "total_models": 2, - "total_downloads": 4419, + "total_downloads": 4508, "min_param_count": null, "sample_models": [ - "OpenMOSS-Team/moss-moon-003-sft", - "OpenMOSS-Team/moss-moon-003-base" + "uf-aice-lab/math-roberta", + "gokceuludogan/ChemBERTaLM" ], "relevancy_score": 19.4 }, @@ -3589,24 +3899,24 @@ "relevancy_score": 19.4 }, { - "architecture_id": "BartForCausalLM", + "architecture_id": "TranceptionLMHeadModel", "total_models": 2, - "total_downloads": 4230, + "total_downloads": 4204, "min_param_count": null, "sample_models": [ - "sanchit-gandhi/tiny-random-bart-fp16", - "hf-tiny-model-private/tiny-random-BartForCausalLM" + "PascalNotin/Tranception_Large", + "PascalNotin/Tranception_Small" ], "relevancy_score": 19.3 }, { - "architecture_id": "TranceptionLMHeadModel", + "architecture_id": "BartForCausalLM", "total_models": 2, - "total_downloads": 4204, + "total_downloads": 4230, "min_param_count": null, "sample_models": [ - "PascalNotin/Tranception_Large", - "PascalNotin/Tranception_Small" + "sanchit-gandhi/tiny-random-bart-fp16", + "hf-tiny-model-private/tiny-random-BartForCausalLM" ], "relevancy_score": 19.3 }, @@ -3682,22 +3992,22 @@ "relevancy_score": 18.6 }, { - "architecture_id": "LongcatCausalLM", + "architecture_id": "TransfoXLLMHeadModel", "total_models": 1, - "total_downloads": 3733, - "min_param_count": 561862880256, + "total_downloads": 3677, + "min_param_count": null, "sample_models": [ - "meituan-longcat/LongCat-Flash-Thinking-2601" + "transfo-xl/transfo-xl-wt103" ], "relevancy_score": 18.4 }, { - "architecture_id": "TransfoXLLMHeadModel", + "architecture_id": "LongcatCausalLM", "total_models": 1, - "total_downloads": 3677, - "min_param_count": null, + "total_downloads": 3733, + "min_param_count": 561862880256, "sample_models": [ - "transfo-xl/transfo-xl-wt103" + "meituan-longcat/LongCat-Flash-Thinking-2601" ], "relevancy_score": 18.4 }, @@ -3724,7 +4034,18 @@ "relevancy_score": 18.2 }, { - "architecture_id": "MyAwesomeModelForCausalLM", + "architecture_id": "BigBirdPegasusForConditionalGeneration", + "total_models": 2, + "total_downloads": 2950, + "min_param_count": null, + "sample_models": [ + "google/bigbird-pegasus-large-arxiv", + "google/bigbird-pegasus-large-pubmed" + ], + "relevancy_score": 18.1 + }, + { + "architecture_id": "MyAwesomeModelForCausalLM", "total_models": 1, "total_downloads": 3144, "min_param_count": null, @@ -3754,12 +4075,12 @@ "relevancy_score": 17.9 }, { - "architecture_id": "QHEARTForECGQA", + "architecture_id": "TAMELM", "total_models": 1, - "total_downloads": 2916, + "total_downloads": 2877, "min_param_count": null, "sample_models": [ - "Manhph2211/Q-HEART" + "reaperdoesntknow/TameForCasualLM" ], "relevancy_score": 17.9 }, @@ -3774,12 +4095,12 @@ "relevancy_score": 17.9 }, { - "architecture_id": "TAMELM", + "architecture_id": "QHEARTForECGQA", "total_models": 1, - "total_downloads": 2877, + "total_downloads": 2916, "min_param_count": null, "sample_models": [ - "reaperdoesntknow/TameForCasualLM" + "Manhph2211/Q-HEART" ], "relevancy_score": 17.9 }, @@ -3795,22 +4116,22 @@ "relevancy_score": 17.8 }, { - "architecture_id": "CoherenceMomentumModel", + "architecture_id": "CPMAntForCausalLM", "total_models": 1, - "total_downloads": 2795, + "total_downloads": 2778, "min_param_count": null, "sample_models": [ - "aisingapore/coherence-momentum" + "openbmb/cpm-ant-10b" ], "relevancy_score": 17.8 }, { - "architecture_id": "CPMAntForCausalLM", + "architecture_id": "ThinkerLM", "total_models": 1, - "total_downloads": 2778, + "total_downloads": 2726, "min_param_count": null, "sample_models": [ - "openbmb/cpm-ant-10b" + "prskid1000/micro-Omni" ], "relevancy_score": 17.8 }, @@ -3825,12 +4146,12 @@ "relevancy_score": 17.8 }, { - "architecture_id": "ThinkerLM", + "architecture_id": "CoherenceMomentumModel", "total_models": 1, - "total_downloads": 2726, + "total_downloads": 2795, "min_param_count": null, "sample_models": [ - "prskid1000/micro-Omni" + "aisingapore/coherence-momentum" ], "relevancy_score": 17.8 }, @@ -3845,42 +4166,32 @@ "relevancy_score": 17.7 }, { - "architecture_id": "MoEGPT2", - "total_models": 1, - "total_downloads": 2577, - "min_param_count": null, - "sample_models": [ - "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained" - ], - "relevancy_score": 17.6 - }, - { - "architecture_id": "GPT2", + "architecture_id": "TeleFLMForCausalLM", "total_models": 1, - "total_downloads": 2566, + "total_downloads": 2492, "min_param_count": null, "sample_models": [ - "NamrataThakur/Small_Language_Model_MHA_53M_Pretrained" + "CofeAI/Tele-FLM-1T" ], "relevancy_score": 17.6 }, { - "architecture_id": "GQAGPT2", + "architecture_id": "JiRackTernary1B", "total_models": 1, - "total_downloads": 2551, + "total_downloads": 2525, "min_param_count": null, "sample_models": [ - "NamrataThakur/Small_Language_Model_GQA_48M_Pretrained" + "kgrabko/JiRackTernary_1b" ], "relevancy_score": 17.6 }, { - "architecture_id": "JiRackTernary1B", + "architecture_id": "GPT2", "total_models": 1, - "total_downloads": 2525, + "total_downloads": 2566, "min_param_count": null, "sample_models": [ - "kgrabko/JiRackTernary_1b" + "NamrataThakur/Small_Language_Model_MHA_53M_Pretrained" ], "relevancy_score": 17.6 }, @@ -3895,24 +4206,24 @@ "relevancy_score": 17.6 }, { - "architecture_id": "TeleFLMForCausalLM", + "architecture_id": "MoEGPT2", "total_models": 1, - "total_downloads": 2492, + "total_downloads": 2577, "min_param_count": null, "sample_models": [ - "CofeAI/Tele-FLM-1T" + "NamrataThakur/Small_Language_Model_MOE_127M_Pretrained" ], "relevancy_score": 17.6 }, { - "architecture_id": "SeerAttnLlamaForCausalLM", + "architecture_id": "GQAGPT2", "total_models": 1, - "total_downloads": 2413, + "total_downloads": 2551, "min_param_count": null, "sample_models": [ - "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" + "NamrataThakur/Small_Language_Model_GQA_48M_Pretrained" ], - "relevancy_score": 17.5 + "relevancy_score": 17.6 }, { "architecture_id": "Speech2TextTransformerForConditionalGeneration", @@ -3925,14 +4236,14 @@ "relevancy_score": 17.5 }, { - "architecture_id": "WhisperMixStyleForConditionalGeneration", + "architecture_id": "SeerAttnLlamaForCausalLM", "total_models": 1, - "total_downloads": 2299, + "total_downloads": 2413, "min_param_count": null, "sample_models": [ - "wago5090/mixstyle_multi-s" + "SeerAttention/SeerAttention-Llama-3.1-8B-AttnGates" ], - "relevancy_score": 17.4 + "relevancy_score": 17.5 }, { "architecture_id": "Videollama2Qwen2ForCausalLM", @@ -3944,6 +4255,16 @@ ], "relevancy_score": 17.4 }, + { + "architecture_id": "WhisperMixStyleForConditionalGeneration", + "total_models": 1, + "total_downloads": 2299, + "min_param_count": null, + "sample_models": [ + "wago5090/mixstyle_multi-s" + ], + "relevancy_score": 17.4 + }, { "architecture_id": "DenseLLM", "total_models": 1, @@ -3985,32 +4306,32 @@ "relevancy_score": 17.3 }, { - "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration", + "architecture_id": "TFGPT2LMHeadModel", "total_models": 1, - "total_downloads": 2205, + "total_downloads": 2178, "min_param_count": null, "sample_models": [ - "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct" + "mymusise/gpt2-medium-chinese" ], "relevancy_score": 17.3 }, { - "architecture_id": "TFGPT2LMHeadModel", + "architecture_id": "GPTModelForTextGeneration", "total_models": 1, - "total_downloads": 2178, + "total_downloads": 2169, "min_param_count": null, "sample_models": [ - "mymusise/gpt2-medium-chinese" + "samkeet/GPT_124M-Instruct" ], "relevancy_score": 17.3 }, { - "architecture_id": "GPTModelForTextGeneration", + "architecture_id": "Typhoon2Audio2AudioForConditionalGeneration", "total_models": 1, - "total_downloads": 2169, + "total_downloads": 2205, "min_param_count": null, "sample_models": [ - "samkeet/GPT_124M-Instruct" + "typhoon-ai/llama3.1-typhoon2-audio-8b-instruct" ], "relevancy_score": 17.3 }, @@ -4026,114 +4347,114 @@ "relevancy_score": 17.2 }, { - "architecture_id": "LlaMAForCausalLM", + "architecture_id": "RobertaPreLayerNormForCausalLM", "total_models": 1, - "total_downloads": 2140, + "total_downloads": 2067, "min_param_count": null, "sample_models": [ - "circulus/alpaca-7b" + "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM" ], "relevancy_score": 17.2 }, { - "architecture_id": "GeoVForCausalLM", + "architecture_id": "EnergyTransformer", "total_models": 1, - "total_downloads": 2137, + "total_downloads": 2087, "min_param_count": null, "sample_models": [ - "GeoV/GeoV-9b" + "cccczshao/CALM-M" ], "relevancy_score": 17.2 }, { - "architecture_id": "ElectraForCausalLM", + "architecture_id": "BlenderbotForCausalLM", "total_models": 1, - "total_downloads": 2128, + "total_downloads": 2066, "min_param_count": null, "sample_models": [ - "smeoni/nbme-electra-large-generator" + "hf-tiny-model-private/tiny-random-BlenderbotForCausalLM" ], "relevancy_score": 17.2 }, { - "architecture_id": "XModelForCausalLM", + "architecture_id": "PegasusForCausalLM", "total_models": 1, - "total_downloads": 2098, + "total_downloads": 2077, "min_param_count": null, "sample_models": [ - "XiaoduoAILab/Xmodel_LM" + "hf-tiny-model-private/tiny-random-PegasusForCausalLM" ], "relevancy_score": 17.2 }, { - "architecture_id": "EnergyTransformer", + "architecture_id": "GeoVForCausalLM", "total_models": 1, - "total_downloads": 2087, + "total_downloads": 2137, "min_param_count": null, "sample_models": [ - "cccczshao/CALM-M" + "GeoV/GeoV-9b" ], "relevancy_score": 17.2 }, { - "architecture_id": "PegasusForCausalLM", + "architecture_id": "XModelForCausalLM", "total_models": 1, - "total_downloads": 2077, + "total_downloads": 2098, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-PegasusForCausalLM" + "XiaoduoAILab/Xmodel_LM" ], "relevancy_score": 17.2 }, { - "architecture_id": "RobertaPreLayerNormForCausalLM", + "architecture_id": "ElectraForCausalLM", "total_models": 1, - "total_downloads": 2067, + "total_downloads": 2128, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-RobertaPreLayerNormForCausalLM" + "smeoni/nbme-electra-large-generator" ], "relevancy_score": 17.2 }, { - "architecture_id": "BlenderbotForCausalLM", + "architecture_id": "LlaMAForCausalLM", "total_models": 1, - "total_downloads": 2066, + "total_downloads": 2140, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-BlenderbotForCausalLM" + "circulus/alpaca-7b" ], "relevancy_score": 17.2 }, { - "architecture_id": "MonkeyLMHeadModel", + "architecture_id": "PointLLMLlamaForCausalLM", "total_models": 2, - "total_downloads": 1519, + "total_downloads": 1534, "min_param_count": null, "sample_models": [ - "echo840/Monkey-Chat", - "echo840/Monkey" + "RunsenXu/PointLLM_7B_v1.1_init", + "RunsenXu/PointLLM_7B_v1.2" ], "relevancy_score": 17.1 }, { - "architecture_id": "PointLLMLlamaForCausalLM", + "architecture_id": "MonkeyLMHeadModel", "total_models": 2, - "total_downloads": 1534, + "total_downloads": 1519, "min_param_count": null, "sample_models": [ - "RunsenXu/PointLLM_7B_v1.1_init", - "RunsenXu/PointLLM_7B_v1.2" + "echo840/Monkey-Chat", + "echo840/Monkey" ], "relevancy_score": 17.1 }, { - "architecture_id": "MvpForCausalLM", + "architecture_id": "DebertaV2ForCausalLM", "total_models": 1, - "total_downloads": 2039, + "total_downloads": 1979, "min_param_count": null, "sample_models": [ - "hf-tiny-model-private/tiny-random-MvpForCausalLM" + "ltg/deberta-xxlarge-fixed" ], "relevancy_score": 17.1 }, @@ -4158,41 +4479,41 @@ "relevancy_score": 17.1 }, { - "architecture_id": "DebertaV2ForCausalLM", + "architecture_id": "MvpForCausalLM", "total_models": 1, - "total_downloads": 1979, + "total_downloads": 2039, "min_param_count": null, "sample_models": [ - "ltg/deberta-xxlarge-fixed" + "hf-tiny-model-private/tiny-random-MvpForCausalLM" ], "relevancy_score": 17.1 }, { - "architecture_id": "TelechatForCausalLM", + "architecture_id": "OtterForConditionalGeneration", "total_models": 2, - "total_downloads": 1456, + "total_downloads": 1489, "min_param_count": null, "sample_models": [ - "Tele-AI/telechat-7B", - "Tele-AI/TeleChat-12B" + "luodian/OTTER-Video-LLaMA7B-DenseCaption", + "luodian/OTTER-MPT1B-RPJama-Init" ], "relevancy_score": 17.0 }, { - "architecture_id": "OtterForConditionalGeneration", + "architecture_id": "TelechatForCausalLM", "total_models": 2, - "total_downloads": 1489, + "total_downloads": 1456, "min_param_count": null, "sample_models": [ - "luodian/OTTER-Video-LLaMA7B-DenseCaption", - "luodian/OTTER-MPT1B-RPJama-Init" + "Tele-AI/telechat-7B", + "Tele-AI/TeleChat-12B" ], "relevancy_score": 17.0 }, { "architecture_id": "LSGBartForConditionalGeneration", - "total_models": 1, - "total_downloads": 1887, + "total_models": 2, + "total_downloads": 2605, "min_param_count": null, "sample_models": [ "morenolq/LEGIT-BART-LSG-4096" @@ -4221,14 +4542,14 @@ "relevancy_score": 16.8 }, { - "architecture_id": "LlavaCrystalForCausalLM", + "architecture_id": "NorT5ForConditionalGeneration", "total_models": 1, - "total_downloads": 1620, + "total_downloads": 1754, "min_param_count": null, "sample_models": [ - "LLM360/CrystalChat-7B-Web2Code" + "ltg/nort5-base-en-no-translation" ], - "relevancy_score": 16.6 + "relevancy_score": 16.7 }, { "architecture_id": "InternLM2ForRewardModel", @@ -4241,14 +4562,14 @@ "relevancy_score": 16.6 }, { - "architecture_id": "MobilintEagle3Qwen2ForCausalLM", + "architecture_id": "LlavaCrystalForCausalLM", "total_models": 1, - "total_downloads": 1550, + "total_downloads": 1620, "min_param_count": null, "sample_models": [ - "mobilint/EAGLE3-JPharmatron-7B" + "LLM360/CrystalChat-7B-Web2Code" ], - "relevancy_score": 16.5 + "relevancy_score": 16.6 }, { "architecture_id": "MobileLLMForCausalLM", @@ -4260,6 +4581,16 @@ ], "relevancy_score": 16.5 }, + { + "architecture_id": "MobilintEagle3Qwen2ForCausalLM", + "total_models": 1, + "total_downloads": 1550, + "min_param_count": null, + "sample_models": [ + "mobilint/EAGLE3-JPharmatron-7B" + ], + "relevancy_score": 16.5 + }, { "architecture_id": "GeoChatLlamaForCausalLM", "total_models": 1, @@ -4301,22 +4632,22 @@ "relevancy_score": 16.1 }, { - "architecture_id": "CambrianLlamaForCausalLM", + "architecture_id": "JiRackTernaryModel", "total_models": 1, - "total_downloads": 1209, + "total_downloads": 1195, "min_param_count": null, "sample_models": [ - "nyu-visionx/cambrian-8b" + "kgrabko/JiRackTernary_70b" ], "relevancy_score": 16.0 }, { - "architecture_id": "JiRackTernaryModel", + "architecture_id": "CambrianLlamaForCausalLM", "total_models": 1, - "total_downloads": 1195, + "total_downloads": 1209, "min_param_count": null, "sample_models": [ - "kgrabko/JiRackTernary_70b" + "nyu-visionx/cambrian-8b" ], "relevancy_score": 16.0 }, @@ -4361,32 +4692,32 @@ "relevancy_score": 15.5 }, { - "architecture_id": "TransnormerForCausalLM", + "architecture_id": "SOVYN85M", "total_models": 1, "total_downloads": 957, "min_param_count": null, "sample_models": [ - "OpenNLPLab/TransNormerLLM-385M" + "SOVYN/SOVYN-85M" ], "relevancy_score": 15.5 }, { - "architecture_id": "SOVYN85M", + "architecture_id": "TransnormerForCausalLM", "total_models": 1, "total_downloads": 957, "min_param_count": null, "sample_models": [ - "SOVYN/SOVYN-85M" + "OpenNLPLab/TransNormerLLM-385M" ], "relevancy_score": 15.5 }, { - "architecture_id": "ShikraLlamaForCausalLM", + "architecture_id": "ZsGPT2LMHeadModel", "total_models": 1, - "total_downloads": 931, + "total_downloads": 913, "min_param_count": null, "sample_models": [ - "shikras/shikra-7b-delta-v1" + "claritylab/zero-shot-vanilla-gpt2" ], "relevancy_score": 15.4 }, @@ -4400,16 +4731,6 @@ ], "relevancy_score": 15.4 }, - { - "architecture_id": "ZsGPT2LMHeadModel", - "total_models": 1, - "total_downloads": 913, - "min_param_count": null, - "sample_models": [ - "claritylab/zero-shot-vanilla-gpt2" - ], - "relevancy_score": 15.4 - }, { "architecture_id": "LlamaModel", "total_models": 1, @@ -4421,14 +4742,14 @@ "relevancy_score": 15.4 }, { - "architecture_id": "AquilaDenseForCausalLM", + "architecture_id": "ShikraLlamaForCausalLM", "total_models": 1, - "total_downloads": 855, + "total_downloads": 931, "min_param_count": null, "sample_models": [ - "BAAI/AquilaDense-7B" + "shikras/shikra-7b-delta-v1" ], - "relevancy_score": 15.2 + "relevancy_score": 15.4 }, { "architecture_id": "HumanGPTForCausalLM", @@ -4441,22 +4762,22 @@ "relevancy_score": 15.2 }, { - "architecture_id": "Phi4FlashForCausalLM", + "architecture_id": "AquilaDenseForCausalLM", "total_models": 1, - "total_downloads": 841, + "total_downloads": 855, "min_param_count": null, "sample_models": [ - "microsoft/Phi-4-mini-flash-reasoning" + "BAAI/AquilaDense-7B" ], "relevancy_score": 15.2 }, { - "architecture_id": "EmuForCausalLM", + "architecture_id": "Phi4FlashForCausalLM", "total_models": 1, - "total_downloads": 822, + "total_downloads": 841, "min_param_count": null, "sample_models": [ - "BAAI/Emu2-Chat" + "microsoft/Phi-4-mini-flash-reasoning" ], "relevancy_score": 15.2 }, @@ -4471,14 +4792,24 @@ "relevancy_score": 15.2 }, { - "architecture_id": "FlamingoForCausalLM", + "architecture_id": "CodeT5pEncoderDecoderModel", "total_models": 1, - "total_downloads": 796, + "total_downloads": 891, "min_param_count": null, "sample_models": [ - "babylm/flamingo-2024" + "Salesforce/codet5p-2b" ], - "relevancy_score": 15.1 + "relevancy_score": 15.2 + }, + { + "architecture_id": "EmuForCausalLM", + "total_models": 1, + "total_downloads": 822, + "min_param_count": null, + "sample_models": [ + "BAAI/Emu2-Chat" + ], + "relevancy_score": 15.2 }, { "architecture_id": "VStreamLlamaForCausalLM", @@ -4500,6 +4831,16 @@ ], "relevancy_score": 15.1 }, + { + "architecture_id": "FlamingoForCausalLM", + "total_models": 1, + "total_downloads": 796, + "min_param_count": null, + "sample_models": [ + "babylm/flamingo-2024" + ], + "relevancy_score": 15.1 + }, { "architecture_id": "MoELLaVAQWenForCausalLM", "total_models": 1, @@ -4511,22 +4852,22 @@ "relevancy_score": 15.0 }, { - "architecture_id": "YayiForCausalLM", + "architecture_id": "GPT", "total_models": 1, - "total_downloads": 721, + "total_downloads": 720, "min_param_count": null, "sample_models": [ - "wenge-research/yayi2-30b" + "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M" ], "relevancy_score": 14.9 }, { - "architecture_id": "GPT", + "architecture_id": "YayiForCausalLM", "total_models": 1, - "total_downloads": 720, + "total_downloads": 721, "min_param_count": null, "sample_models": [ - "LH-Tech-AI/Apex-1.5-Coder-Instruct-350M" + "wenge-research/yayi2-30b" ], "relevancy_score": 14.9 }, @@ -4551,12 +4892,12 @@ "relevancy_score": 14.8 }, { - "architecture_id": "SDARMoeForCausalLM", + "architecture_id": "GPTBigCodeLMHeadModel", "total_models": 1, - "total_downloads": 675, - "min_param_count": 30532122624, + "total_downloads": 664, + "min_param_count": null, "sample_models": [ - "JetLM/SDAR-30B-A3B-Chat-b32" + "bigcode/santacoderpack" ], "relevancy_score": 14.7 }, @@ -4581,22 +4922,22 @@ "relevancy_score": 14.7 }, { - "architecture_id": "GPTBigCodeLMHeadModel", + "architecture_id": "MobiLlamaForCausalLM", "total_models": 1, - "total_downloads": 664, + "total_downloads": 663, "min_param_count": null, "sample_models": [ - "bigcode/santacoderpack" + "MBZUAI/MobiLlama-05B" ], "relevancy_score": 14.7 }, { - "architecture_id": "MobiLlamaForCausalLM", + "architecture_id": "SDARMoeForCausalLM", "total_models": 1, - "total_downloads": 663, - "min_param_count": null, + "total_downloads": 675, + "min_param_count": 30532122624, "sample_models": [ - "MBZUAI/MobiLlama-05B" + "JetLM/SDAR-30B-A3B-Chat-b32" ], "relevancy_score": 14.7 }, @@ -4631,22 +4972,22 @@ "relevancy_score": 14.6 }, { - "architecture_id": "CacaForCausalLM", + "architecture_id": "Llama2ForCausalLM", "total_models": 1, - "total_downloads": 607, + "total_downloads": 593, "min_param_count": null, "sample_models": [ - "Lyon28/caca-1B-untrained" + "llmware/dragon-llama-7b-v0" ], "relevancy_score": 14.5 }, { - "architecture_id": "LingoWhaleForCausalLM", + "architecture_id": "MPLUGOwl2LlamaForCausalLM", "total_models": 1, - "total_downloads": 599, + "total_downloads": 597, "min_param_count": null, "sample_models": [ - "deeplang-ai/LingoWhale-8B" + "q-future/q-align-quality" ], "relevancy_score": 14.5 }, @@ -4661,42 +5002,42 @@ "relevancy_score": 14.5 }, { - "architecture_id": "MPLUGOwl2LlamaForCausalLM", + "architecture_id": "CacaForCausalLM", "total_models": 1, - "total_downloads": 597, + "total_downloads": 607, "min_param_count": null, "sample_models": [ - "q-future/q-align-quality" + "Lyon28/caca-1B-untrained" ], "relevancy_score": 14.5 }, { - "architecture_id": "Llama2ForCausalLM", + "architecture_id": "LingoWhaleForCausalLM", "total_models": 1, - "total_downloads": 593, + "total_downloads": 599, "min_param_count": null, "sample_models": [ - "llmware/dragon-llama-7b-v0" + "deeplang-ai/LingoWhale-8B" ], "relevancy_score": 14.5 }, { - "architecture_id": "GLaMMForCausalLM", + "architecture_id": "OpenBAForConditionalGeneration", "total_models": 1, - "total_downloads": 588, + "total_downloads": 581, "min_param_count": null, "sample_models": [ - "MBZUAI/GLaMM-FullScope" + "OpenNLG/OpenBA-V1-Based" ], "relevancy_score": 14.4 }, { - "architecture_id": "OpenBAForConditionalGeneration", + "architecture_id": "GLaMMForCausalLM", "total_models": 1, - "total_downloads": 581, + "total_downloads": 588, "min_param_count": null, "sample_models": [ - "OpenNLG/OpenBA-V1-Based" + "MBZUAI/GLaMM-FullScope" ], "relevancy_score": 14.4 }, @@ -4731,42 +5072,42 @@ "relevancy_score": 14.3 }, { - "architecture_id": "M2M100ForConditionalGeneration", + "architecture_id": "HgrnForCausalLM", "total_models": 1, - "total_downloads": 545, + "total_downloads": 534, "min_param_count": null, "sample_models": [ - "dsfsi/nso-en-m2m100-gov" + "OpenNLPLab/HGRN-150M" ], - "relevancy_score": 14.3 + "relevancy_score": 14.2 }, { - "architecture_id": "AprielHForCausalLM", + "architecture_id": "LlavaMistralForCausalLM", "total_models": 1, - "total_downloads": 539, + "total_downloads": 530, "min_param_count": null, "sample_models": [ - "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT" + "NousResearch/Nous-Hermes-2-Vision-Alpha" ], "relevancy_score": 14.2 }, { - "architecture_id": "HgrnForCausalLM", + "architecture_id": "VSMForCausalLM", "total_models": 1, - "total_downloads": 534, + "total_downloads": 522, "min_param_count": null, "sample_models": [ - "OpenNLPLab/HGRN-150M" + "craigwu/seal_vsm_7b" ], "relevancy_score": 14.2 }, { - "architecture_id": "LlavaMistralForCausalLM", + "architecture_id": "AprielHForCausalLM", "total_models": 1, - "total_downloads": 530, + "total_downloads": 539, "min_param_count": null, "sample_models": [ - "NousResearch/Nous-Hermes-2-Vision-Alpha" + "ServiceNow-AI/Apriel-H1-15b-Thinker-SFT" ], "relevancy_score": 14.2 }, @@ -4781,14 +5122,14 @@ "relevancy_score": 14.2 }, { - "architecture_id": "VSMForCausalLM", + "architecture_id": "XLMProphetNetForConditionalGeneration", "total_models": 1, - "total_downloads": 522, + "total_downloads": 531, "min_param_count": null, "sample_models": [ - "craigwu/seal_vsm_7b" + "microsoft/xprophetnet-large-wiki100-cased" ], - "relevancy_score": 14.2 + "relevancy_score": 14.0 } ] } \ No newline at end of file diff --git a/transformer_lens/tools/model_registry/data/supported_models.json b/transformer_lens/tools/model_registry/data/supported_models.json index c0fb70907..46454e788 100644 --- a/transformer_lens/tools/model_registry/data/supported_models.json +++ b/transformer_lens/tools/model_registry/data/supported_models.json @@ -1,14 +1,14 @@ { - "generated_at": "2026-04-16", + "generated_at": "2026-05-08", "scan_info": { - "total_scanned": 4839, - "task_filter": "text-generation", + "total_scanned": 431, + "task_filter": "text2text-generation", "min_downloads": 500, - "scan_duration_seconds": 4.9 + "scan_duration_seconds": 0.4 }, - "total_architectures": 50, - "total_models": 9068, - "total_verified": 711, + "total_architectures": 51, + "total_models": 9352, + "total_verified": 716, "models": [ { "architecture_id": "Qwen3NextForCausalLM", @@ -125399,6 +125399,3982 @@ "phase4_score": 92.0, "phase7_score": null, "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google-t5/t5-small", + "status": 1, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 97.6, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google-t5/t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "E-MIMIC/inclusively-reformulation-it5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "prithivida/parrot_paraphraser_on_T5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "allenai/unifiedqa-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "IlyaGusev/rut5_base_headline_gen_telegram", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-t5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/madlad400-3b-mt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google-t5/t5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-v1_1-xxl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/byt5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "vennify/t5-base-grammar-correction", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Rostlab/prot_t5_xl_uniref50", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "iarfmoose/t5-base-question-generator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "patrickvonplaten/t5-tiny-random", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/byt5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "fabiochiu/t5-base-tag-generation", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "prithivida/grammar_error_correcter_v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-t5-xl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "autogluon/chronos-t5-tiny", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ElnaggarLab/ankh-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "jbochi/madlad400-3b-mt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "trl-internal-testing/tiny-T5ForConditionalGeneration", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-v1_1-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ybelkada/tiny-random-T5ForConditionalGeneration-calibrated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py311-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "optimum/t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Rostlab/ProstT5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "autogluon/chronos-t5-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "charsiu/g2p_multilingual_byT5_small_100", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-efficient-tiny", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "QizhiPei/biot5-plus-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Babelscape/t5-base-summarization-claim-extractor", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/byt5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "autogluon/chronos-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "peft-internal-testing/tiny-random-T5ForConditionalGeneration-calibrated", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "peft-internal-testing/tiny-T5ForConditionalGeneration", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "bigscience/T0pp", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py310-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "utrobinmv/t5_translate_en_ru_zh_small_1024", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-v1_1-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "autogluon/chronos-t5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-v1_1-xl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "peft-internal-testing/tiny-random-t5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-small-e2e-qg", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "gokaygokay/Lamini-Prompt-Enchance", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lytang/MiniCheck-Flan-T5-Large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py312-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ybelkada/flan-t5-xl-sharded-bf16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py313-pylingual-v1.1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "RussianNLP/FRED-T5-Summarizer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cnicu/t5-small-booksum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "gokaygokay/Lamini-Prompt-Enchance-Long", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-v1_1-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-t5-xxl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/flan-ul2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "sonoisa/t5-base-japanese-v1.1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Unbabel/gec-t5_small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "GT4SD/multitask-text-and-chemistry-t5-base-augm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Rostlab/ProstT5_fp16", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Falconsai/text_summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "charsiu/g2p_multilingual_byT5_tiny_16_layers_100", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "roborovski/superprompt-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5p-220m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "UrukHan/t5-russian-spell", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cahya/t5-base-indonesian-summarization-cased", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lizhuang144/flan-t5-base-VG-factual-sg", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "d0rj/rut5-base-summ", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "castorini/monot5-base-msmarco", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-small-qg-hl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "deep-learning-analytics/automatic-title-generation", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-sarcasm-twitter", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "trohith89/KDTS_T5_Summary_FineTune", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py39-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-small-finetuned-common_gen", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "sagawa/ReactionT5v2-forward", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cointegrated/rut5-base-absum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "FinText/Chronos_Tiny_2023_US", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Langboat/mengzi-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "laituan245/molt5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-emotion", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "VietAI/vit5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/ul2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "yatharth97/T5-base-10K-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ramsrigouthamg/t5_paraphraser", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "chronbmm/sanskrit5-multitask", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/flan-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MingZhong/unieval-fact", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-base-e2e-qg", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "kaiyuy/leandojo-lean4-retriever-tacgen-byt5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "autogluon/chronos-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "echarlaix/t5-small-openvino", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "deepvk/kazRush-ru-kk", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "peft-internal-testing/tiny-random-T5ForConditionalGeneration", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-base-qg-hl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "grammarly/coedit-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "deepvk/kazRush-kk-ru", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Exscientia/IgT5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/madlad400-7b-mt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MingZhong/unieval-sum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "RUNorm/RUNorm-kirillizator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-efficient-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "pyterrier-quality/qt5-tiny", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "kaiyuy/leandojo-lean4-tacgen-byt5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-question-generation-ap", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py38-pylingual-v1.3-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "royweiss1/T5_MiddleSentences", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mesolitica/translation-t5-small-standard-bahasa-cased-v2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "retrieva-jp/t5-small-short", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MingZhong/unieval-dialog", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5_xxl_true_nli_mixture", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/madlad400-10b-mt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py314-pylingual-v4-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "razent/SciFive-base-Pubmed_PMC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "RUNorm/RUNorm-normalizer-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-base-lm-adapt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Falconsai/medical_summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lizhuang144/flan-t5-base-VG-factual-sg-id", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "uer/t5-base-chinese-cluecorpussmall", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "plguillou/t5-base-fr-sum-cnndm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/LaMini-Flan-T5-783M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "IlyaGusev/rut5_base_sum_gazeta", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ElnaggarLab/ankh-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "csebuetnlp/banglat5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "svjack/comet-atomic-en", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "allegro/plt5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Ateeqq/news-title-generator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "retrieva-jp/t5-base-long", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ai-forever/ruT5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5p-770m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "CohereLabs/aya-101", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-span-sentiment-extraction", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py37-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ai-forever/sage-fredt5-distilled-95m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "VietAI/envit5-translation", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "bond005/ruT5-ASR-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "yigagilbert/salt_language_ID", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/flan-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "hossboll/clinical-t5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/LaMini-Flan-T5-77M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "pszemraj/flan-t5-large-grammar-synthesis", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "razent/SciFive-large-Pubmed", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "bigscience/T0_3B", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "recogna-nlp/ptt5-base-summ-xlsum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "pszemraj/grammar-synthesis-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-efficient-tiny-nl2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "gsarti/it5-base-news-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "chanind/frame-semantic-transformer-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cointegrated/rut5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "VietAI/vit5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cointegrated/rut5-small-normalizer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "jbochi/candle-coedit-quantized", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "csebuetnlp/banglat5_small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Abdou/arabic-tashkeel-flan-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "castorini/monot5-3b-msmarco-10k", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-small-lm-adapt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-small-qa-qg-hl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Xenova/LaMini-Flan-T5-248M", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "castorini/doc2query-t5-base-msmarco", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "dkarthikeyan1/tcrt5_ft_tcrdb", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cointegrated/rut5-base-multitask", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Wikidepia/IndoT5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "fangyuan/hotpotqa_abstractive", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5-base-multi-sum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-cola", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "nrl-ai/vn-diacritic-vit5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "czearing/article-title-generator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "VietAI/vit5-base-vietnews-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "uer/t5-small-chinese-cluecorpussmall", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "razent/SciFive-base-PMC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-common_gen", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "chanind/frame-semantic-transformer-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "VietAI/envit5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-xl-lm-adapt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ElnaggarLab/ankh3-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "laituan245/molt5-base-smiles2caption", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "togatogah/jinen-v1-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-mrpc", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-qnli", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-mnli", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Pushkar27/GriceBench-Repair", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "LazarusNLP/IndoNanoT5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-rte", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "philschmid/flan-t5-base-samsum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-qqp", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-sst2", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cjvt/t5-slo-word-spelling-corrector", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "laituan245/molt5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "DeepMount00/OCR_corrector", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "popkek00/mt5-small-valutazione-cadute", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "allegro/plt5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Soyoung97/RankT5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "allenai/tk-instruct-base-def-pos", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mesolitica/finetune-qa-t5-small-standard-bahasa-cased", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "tanganke/flan-t5-base_glue-stsb", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "allenai/unifiedqa-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "khairi/life2lang-small-it", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "GT4SD/multitask-text-and-chemistry-t5-base-standard", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "snrspeaks/t5-one-line-summary", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "visheratin/t5-efficient-mini-grammar-correction", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "minhtoan/t5-translate-vietnamese-nom", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "unicamp-dl/ptt5-v2-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "kevinscaria/atsc_tk-instruct-base-def-pos-neg-neut-combined", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-small-finetuned-text-simplification", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "syssec-utd/py36-pylingual-v1-statement", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "GPT2LMHeadModel", + "model_id": "togatogah/jinen-v1-xsmall", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ai-forever/FRED-T5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ldenoue/Title_Generation_T5Small_Model", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "mrm8488/t5-base-finetuned-wikiSQL", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-11b-ssm-nq", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "MaRiOrOsSi/t5-base-finetuned-question-answering", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "textplus-bbaw/transnormer-19c-beta-v02", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "QizhiPei/biot5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "KomeijiForce/t5-base-emojilm", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "penelitianpsmatematika/medical-text-generation-t5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "yunhuijang/7e62tq9m", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "microsoft/codereviewer", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ConvLab/t5-small-dst-multiwoz21", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "TeraSpace/dialofred", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "csebuetnlp/banglat5_nmt_bn_en", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Michau/t5-base-en-generate-headline", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "pengold/t5-vietnamese-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "utrobinmv/t5_translate_en_ru_zh_base_200", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Var3n/hmByT5_anno", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "knowledgator/flan-t5-large-for-classification", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lcw99/t5-base-korean-text-summary", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "stukenov/sozkz-fix-qwen-500m-kk-gec-v3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "Qwen2ForCausalLM", + "model_id": "stukenov/sozkz-fix-qwen-500m-kk-gec-v4", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-base-qa-qg-hl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "paust/pko-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "cointegrated/rut5-base-paraphraser", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "RUNorm/RUNorm-normalizer-medium", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "megagonlabs/t5-base-japanese-web", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ibm-research/regen-disambiguation", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "google/t5-efficient-mini", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ThomasNLG/t5-qa_squad2neg-en", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "Salesforce/codet5p-220m-py", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "ThomasNLG/t5-qg_squad1-en", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "d0rj/FRED-T5-large-instruct", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lmqg/t5-base-tweetqa-qa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "csebuetnlp/banglat5_nmt_en_bn", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "KETI-AIR/ke-t5-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "osunlp/attrscore-flan-t5-xl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "chentong00/propositionizer-wiki-flan-t5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "laituan245/molt5-large-caption2smiles", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "valhalla/t5-small-qg-prepend", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "lmqg/flan-t5-base-squad-qg", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "sagawa/ReactionT5v2-retrosynthesis", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "T5ForConditionalGeneration", + "model_id": "glonor/byt5-arabic-diacritization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "google/mt5-small", + "status": 1, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 89.3, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "google/mt5-large", + "status": 1, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 88.7, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "google/mt5-base", + "status": 1, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 76.3, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "csebuetnlp/mT5_multilingual_XLSum", + "status": 3, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 36/282 components failed (36 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 88.3, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "persiannlp/mt5-small-parsinlu-opus-translation_fa_en", + "status": 3, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)", + "phase1_score": 50.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 82.8, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "optimum-internal-testing/tiny-random-mt5", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "persiannlp/mt5-base-parsinlu-opus-translation_fa_en", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "google/mt5-xl", + "status": 1, + "verified_date": "2026-05-08", + "metadata": null, + "note": "Full verification completed", + "phase1_score": 100.0, + "phase2_score": 100.0, + "phase3_score": null, + "phase4_score": 88.4, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "knowledgator/SMILES2IUPAC-canonical-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "kriton/greek-text-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "cointegrated/rut5-small", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "deutsche-telekom/mt5-small-sum-de-en-v1", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-xl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "csebuetnlp/mT5_m2o_arabic_crossSum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-xxl", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "knowledgator/IUPAC2SMILES-canonical-base", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "indonlp/cendol-mt5-small-inst", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "HiTZ/Medical-mT5-large", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "intelia-lab-uah/mt0-base_AE_SQAC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "intelia-lab-uah/mt0-base_QG_SQAC", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "thanathorn/mt5-cpe-kmutt-thai-sentence-sum", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-xxl-mt", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "eunsour/en-ko-transliterator", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "Buseak/md_mt5_0109_v8", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "bigscience/mt0-xxl-p3", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "persiannlp/mt5-small-parsinlu-translation_en_fa", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null + }, + { + "architecture_id": "MT5ForConditionalGeneration", + "model_id": "ozcangundes/mt5-small-turkish-summarization", + "status": 0, + "verified_date": null, + "metadata": null, + "note": null, + "phase1_score": null, + "phase2_score": null, + "phase3_score": null, + "phase4_score": null, + "phase7_score": null, + "phase8_score": null } ] } diff --git a/transformer_lens/tools/model_registry/data/verification_history.json b/transformer_lens/tools/model_registry/data/verification_history.json index c87d21798..500076dc9 100644 --- a/transformer_lens/tools/model_registry/data/verification_history.json +++ b/transformer_lens/tools/model_registry/data/verification_history.json @@ -1,5 +1,5 @@ { - "last_updated": "2026-04-21T20:10:35.469418", + "last_updated": "2026-05-08T10:21:54.476921", "records": [ { "model_id": "Macropodus/macbert4mdcspell_v1", @@ -11850,6 +11850,86 @@ "notes": "Full verification completed", "invalidated": false, "invalidation_reason": null + }, + { + "model_id": "google/mt5-small", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "persiannlp/mt5-small-parsinlu-opus-translation_fa_en", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 24/190 components failed (24 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/mt5-xl", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Below threshold: P1=50.0% < 100.0% (failed: all_components) \u2014 72/558 components failed (72 critical)", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/mt5-small", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/mt5-base", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/mt5-large", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google/mt5-xl", + "architecture_id": "MT5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null + }, + { + "model_id": "google-t5/t5-small", + "architecture_id": "T5ForConditionalGeneration", + "verified_date": "2026-05-08", + "verified_by": "verify_models", + "transformerlens_version": null, + "notes": "Full verification completed", + "invalidated": false, + "invalidation_reason": null } ] } diff --git a/transformer_lens/tools/model_registry/hf_scraper.py b/transformer_lens/tools/model_registry/hf_scraper.py index df6b11bc9..19fb6bace 100644 --- a/transformer_lens/tools/model_registry/hf_scraper.py +++ b/transformer_lens/tools/model_registry/hf_scraper.py @@ -248,8 +248,13 @@ def scrape_all_models( # count data inline with the listing, avoiding per-model API calls. # With ~1000 models per page, a full scan of 200K+ models needs only # ~200 paginated requests (well within the 1000 req / 5 min limit). + # Use ``filter`` rather than ``pipeline_tag`` so encoder-decoder models + # are discoverable: HF assigns T5/mT5 a primary pipeline_tag of + # "translation" (or None for mT5) and only lists "text2text-generation" + # in the broader tag list. ``filter`` matches against tags, ``pipeline_tag`` + # only against the canonical primary tag. list_kwargs: dict = { - "pipeline_tag": task, + "filter": task, "sort": "downloads", "expand": ["config", "safetensors"], }