{
  "base_url": "/bench/leaderboard/",
  "categories": [
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/1c55072b9054.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 1166.0,
            "total_p50_ms": 974.8963371384889,
            "ttft_p50_ms": 12.396014062687755
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5d-8c8b-7fe2-bb9a-b6e1a3b5fd8a",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/20e28f89b2c6.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 789.0,
            "total_p50_ms": 1244.140700204298,
            "ttft_p50_ms": 57.572301011532545
          },
          "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
          "run_id": "019e3b6f-4f8a-7ac8-8794-193485f172df",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/508a99d9bb15.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 1589.0,
            "total_p50_ms": 1949.1334019694477,
            "ttft_p50_ms": 15.767814824357629
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b30-b8df-73c9-8a7e-02378b644943",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/5c67f4a9812f.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 944.0,
            "total_p50_ms": 1141.41068700701,
            "ttft_p50_ms": 15.184570802375674
          },
          "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct",
          "run_id": "019e3b3f-81a0-7b16-b64c-b9d3cd0ba115",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/658af3aec8ea.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 1281.0,
            "total_p50_ms": 1698.8007170148194,
            "ttft_p50_ms": 14.671998098492622
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-f3e2-7ab8-b53b-bf860dcbec21",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/7fe272247c5d.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 0.8,
            "pass_at_1_p05": 0.2,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 1653.0,
            "total_p50_ms": 4338.324517942965,
            "ttft_p50_ms": 28.08416704647243
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b93-5675-7669-88a2-3ab85410a4fc",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/b11d343924a6.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 0.8,
            "pass_at_1_p05": 0.2,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 824.0,
            "total_p50_ms": 1243.2377990335226,
            "ttft_p50_ms": 14.954894781112671
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b46-30cb-79cd-9322-13b7c0355a30",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/ffb60720b164.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 1521.0,
            "total_p50_ms": 2988.91265084967,
            "ttft_p50_ms": 17.903984989970922
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-ce7a-73eb-8960-f573246264ed",
          "signed": true
        }
      ],
      "suite_id": "code.generation.humaneval-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/91867f6c7bb5.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 971.0,
            "total_p50_ms": 1260.9717741142958,
            "ttft_p50_ms": 18.070803955197334
          },
          "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct",
          "run_id": "019e3b3f-a902-7a6c-b6cc-b8ec0c603d14",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/a2f9906686ce.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "pass_at_1": 1.0,
            "pass_at_1_p05": 1.0,
            "pass_at_1_p50": 1.0,
            "pass_at_1_p95": 1.0,
            "timeout_rate": 0.0,
            "tokens_out_total": 807.0,
            "total_p50_ms": 1212.7999980002642,
            "ttft_p50_ms": 55.03687891177833
          },
          "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
          "run_id": "019e3b6f-7d82-78a3-a1d4-738f27df3c58",
          "signed": true
        }
      ],
      "suite_id": "code.generation.mbpp-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/010e9504589b.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.989010989010989,
            "energy_joules_total": 18636.700326055117,
            "joules_per_token": 1.880785177722789,
            "ok_rate": 1.0,
            "power_avg_w": 900.9042982456141,
            "power_peak_w": 937.418,
            "req_per_s_all": 4.335816725824134,
            "req_per_s_passing": 4.288170388177714,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 472.12755973836636,
            "total_p50_ms": 817.1103978529572,
            "total_p99_ms": 1095.8109518047409,
            "tpot_p50_ms": 7.040545851222973,
            "tpot_p99_ms": 8.692564945322625,
            "ttft_p50_ms": 20.768586080521345,
            "ttft_p99_ms": 87.91703225578829
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b45-8552-7705-ab62-1cb802fe5680",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/1c20a3df24fe.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9714285714285714,
            "cost_source": "registry:groq",
            "cost_usd_per_million_tokens": 0.64,
            "energy_joules_total": 43248.29586514904,
            "joules_per_token": 10.067107976058901,
            "ok_rate": 1.0,
            "power_avg_w": 2003.0753615384615,
            "power_peak_w": 2152.506,
            "req_per_s_all": 1.5869725755785926,
            "req_per_s_passing": 1.5416305019906327,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 194.78954813387523,
            "total_p50_ms": 2059.169305022806,
            "total_p99_ms": 2664.16737897787,
            "tpot_p50_ms": 15.861974667948415,
            "tpot_p99_ms": 16.095096299125924,
            "ttft_p50_ms": 46.744899125769734,
            "ttft_p99_ms": 1332.7900999737844
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b91-fedc-74a2-a660-44ccb0c4352b",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/43a8c771ef48.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9895833333333334,
            "energy_joules_total": 18542.71069004833,
            "joules_per_token": 1.6545650655883226,
            "ok_rate": 1.0,
            "power_avg_w": 908.619375,
            "power_peak_w": 947.635,
            "req_per_s_all": 4.6332446593174295,
            "req_per_s_passing": 4.584981694116206,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 540.8830510101086,
            "total_p50_ms": 822.2015289356932,
            "total_p99_ms": 929.0652440511616,
            "tpot_p50_ms": 6.26723448320578,
            "tpot_p99_ms": 6.504023637427547,
            "ttft_p50_ms": 26.835817494429648,
            "ttft_p99_ms": 52.97808477189303
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-5d04-7afb-a506-fe3140835575",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/49e160db9a2a.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9736842105263158,
            "cost_source": "registry:groq",
            "cost_usd_per_million_tokens": 0.05750000000000001,
            "energy_joules_total": 15249.797054982093,
            "joules_per_token": 3.2626865757342944,
            "ok_rate": 1.0,
            "power_avg_w": 758.3005643274854,
            "power_peak_w": 943.2209999999999,
            "req_per_s_all": 1.8497336613654536,
            "req_per_s_passing": 1.8010564597505732,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 227.5172403479508,
            "total_p50_ms": 862.5175900524482,
            "total_p99_ms": 3020.528363280472,
            "tpot_p50_ms": 6.595119039205409,
            "tpot_p99_ms": 10.891121361793335,
            "ttft_p50_ms": 25.38680052384734,
            "ttft_p99_ms": 1677.528030264663
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b2f-ef77-7467-b840-7fde4b6c308f",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/581e12c88e55.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9895833333333334,
            "energy_joules_total": 18653.887259589592,
            "joules_per_token": 1.685236901218682,
            "ok_rate": 1.0,
            "power_avg_w": 904.9749848101266,
            "power_peak_w": 944.3760000000001,
            "req_per_s_all": 4.592168252862021,
            "req_per_s_passing": 4.544333166894709,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 529.4865665721845,
            "total_p50_ms": 816.1463094875216,
            "total_p99_ms": 1091.6163505637078,
            "tpot_p50_ms": 6.261395598490408,
            "tpot_p99_ms": 8.241694629390349,
            "ttft_p50_ms": 26.426325901411474,
            "ttft_p99_ms": 74.96867093723111
          },
          "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct",
          "run_id": "019e3b3f-5ae6-7b96-afc7-48969c043d25",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/835fae0e126c.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9767441860465116,
            "energy_joules_total": 18004.607031374166,
            "joules_per_token": 3.829952569958342,
            "ok_rate": 1.0,
            "power_avg_w": 836.8075469879518,
            "power_peak_w": 857.025,
            "req_per_s_all": 1.9724451867934403,
            "req_per_s_passing": 1.9265743684959185,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 215.6387168166503,
            "total_p50_ms": 1981.2402590177953,
            "total_p99_ms": 2016.463664029725,
            "tpot_p50_ms": 15.34785290512689,
            "tpot_p99_ms": 15.79729331392915,
            "ttft_p50_ms": 42.49977902509272,
            "ttft_p99_ms": 198.21805385872682
          },
          "model_id": "Qwen/Qwen2-VL-7B-Instruct",
          "run_id": "019e3b85-e34e-706c-826e-d44636d028c2",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/91eac8a2d0c4.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9779411764705882,
            "energy_joules_total": 17330.98909072082,
            "joules_per_token": 1.1875420783007278,
            "ok_rate": 1.0,
            "power_avg_w": 861.8660266666667,
            "power_peak_w": 897.9069999999999,
            "req_per_s_all": 6.672941061126164,
            "req_per_s_passing": 6.525743831836616,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 716.0654547505533,
            "total_p50_ms": 531.3797079725191,
            "total_p99_ms": 786.9970788131468,
            "tpot_p50_ms": 4.6615383048033845,
            "tpot_p99_ms": 6.5718238133945555,
            "ttft_p50_ms": 18.407866591587663,
            "ttft_p99_ms": 265.3733468730934
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5c-f170-7b43-b9b9-0362535c3e25",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/eb104ca63c1e.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.88,
            "energy_joules_total": 17249.046230131546,
            "joules_per_token": 5.937709545656298,
            "ok_rate": 1.0,
            "power_avg_w": 808.3084819277109,
            "power_peak_w": 854.62,
            "req_per_s_all": 1.1559036871105763,
            "req_per_s_passing": 1.0171952446573072,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 134.31600844224897,
            "total_p50_ms": 2661.4077310077846,
            "total_p99_ms": 3136.5972048975523,
            "tpot_p50_ms": 21.037106484835668,
            "tpot_p99_ms": 23.45798800388563,
            "ttft_p50_ms": 74.4268239941448,
            "ttft_p99_ms": 521.1921769287433
          },
          "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
          "run_id": "019e3b6f-22ca-7637-9ad5-66763c672117",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "0.21.0",
          "envelope_url": "/bench/leaderboard/envelopes/f0b1a3bfe520.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "compliance_rate": 0.9861111111111112,
            "energy_joules_total": 18850.937904138806,
            "joules_per_token": 2.308466556960422,
            "ok_rate": 1.0,
            "power_avg_w": 901.0111748768473,
            "power_peak_w": 937.794,
            "req_per_s_all": 3.39474627178891,
            "req_per_s_passing": 3.3475970180140644,
            "slo_hardware_class": "h100",
            "slo_template_resolved": "ttft<200ms, tpot<50ms, total<3000ms",
            "throughput_tok_per_s": 385.02080632539224,
            "total_p50_ms": 1095.4793834825978,
            "total_p99_ms": 1422.5878720986673,
            "tpot_p50_ms": 8.648013382794229,
            "tpot_p99_ms": 10.797570661576353,
            "ttft_p50_ms": 30.054199043661356,
            "ttft_p99_ms": 183.9538250933433
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-3b4f-73a5-84d6-49db6f8586b4",
          "signed": true
        }
      ],
      "suite_id": "llm.inference.chatbot-short"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/2ad2480d566e.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "chrf_mean": 0.8818945161707197,
            "chrf_p50": 0.9032628142278107,
            "chrf_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 115.0,
            "total_p50_ms": 296.17225495167077,
            "ttft_p50_ms": 32.03408746048808
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b92-df93-7fdb-ba42-8b16b14b08fe",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/46af0fe8b536.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "chrf_mean": 0.7669739942851114,
            "chrf_p50": 0.7445362364499831,
            "chrf_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 181.0,
            "total_p50_ms": 154.87324842251837,
            "ttft_p50_ms": 11.996885994449258
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b46-0885-7cdd-a372-f8a5930a2429",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/66c2f6b73d93.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "chrf_mean": 0.8911504607000867,
            "chrf_p50": 0.9439452289259735,
            "chrf_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 114.0,
            "total_p50_ms": 115.51182297989726,
            "ttft_p50_ms": 14.11191700026393
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-c5bb-7124-816d-7fc40457fe8f",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/d09f9fe94ca2.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "chrf_mean": 0.7462276977216808,
            "chrf_p50": 0.6871799290256919,
            "chrf_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 199.0,
            "total_p50_ms": 291.57503705937415,
            "ttft_p50_ms": 16.7668896028772
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-89b8-78d1-942f-d5fbdfe20496",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/f8c8df08d41d.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "chrf_mean": 0.774943339565713,
            "chrf_p50": 0.7613628666260246,
            "chrf_p95": 0.9600128369704749,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 151.0,
            "total_p50_ms": 140.47661738004535,
            "ttft_p50_ms": 14.155130949802697
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b30-7ea3-7549-9305-d47c44fa944c",
          "signed": true
        }
      ],
      "suite_id": "llm.mt.flores-200-mini-en-fr"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/1834d93414ca.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.875,
            "accuracy_p05": 0.35000000000000003,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 10.0,
            "total_p50_ms": 32.813978497870266,
            "ttft_p50_ms": 13.404008466750383
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-8700-741e-b68b-38daaf934d8e",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/449321e3d448.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.75,
            "accuracy_p05": 0.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 10.0,
            "total_p50_ms": 23.73979240655899,
            "ttft_p50_ms": 14.118077466264367
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b30-22d4-7b3f-8ea0-60c27cfa3a75",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/4fa1e1b0190b.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.75,
            "accuracy_p05": 0.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 208.0,
            "total_p50_ms": 31.71589458361268,
            "ttft_p50_ms": 11.241725413128734
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5d-2482-754c-b3f9-bc5d986b6775",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/56036b3d02d0.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 10.0,
            "total_p50_ms": 49.15515461470932,
            "ttft_p50_ms": 32.531968085095286
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b92-3a2c-72dc-ae4f-bc5725addc7e",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/9a76ba989bf7.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.5,
            "accuracy_p05": 0.0,
            "accuracy_p50": 0.5,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 18.0,
            "total_p50_ms": 37.80392243061215,
            "ttft_p50_ms": 12.406649067997932
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b45-b5dc-706d-8451-de6a21e02d4e",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/aa70cf22464c.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 8.0,
            "n_samples": 8.0,
            "ok_rate": 1.0,
            "tokens_out_total": 10.0,
            "total_p50_ms": 50.91781844384968,
            "ttft_p50_ms": 17.078845528885722
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-635f-7dcf-a7a7-ae8ab4f0e142",
          "signed": true
        }
      ],
      "suite_id": "llm.quality.arithmetic-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/172a64fbaa97.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 373.0,
            "total_p50_ms": 359.22502097673714,
            "ttft_p50_ms": 45.01181759405881
          },
          "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
          "run_id": "019e3b6f-9bdd-7f8c-bb0c-19b92c6cd379",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/77ee6cc698ab.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 150.0,
            "total_p50_ms": 176.79402697831392,
            "ttft_p50_ms": 29.440293554216623
          },
          "model_id": "Qwen/Qwen2-VL-7B-Instruct",
          "run_id": "019e3b85-fc73-7f06-b94d-d553f9b579a4",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/7f9d9b60b1f6.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 627.0,
            "total_p50_ms": 473.40473800431937,
            "ttft_p50_ms": 11.700316448695958
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b45-a60d-751c-8c16-6609935e9ab2",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/81af7eb9aff9.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 205.0,
            "total_p50_ms": 142.03195343725383,
            "ttft_p50_ms": 16.894257394596934
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-5288-72d6-919b-3df6be2e8a30",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/8d32c41ae6ca.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 219.0,
            "total_p50_ms": 101.81045962963253,
            "ttft_p50_ms": 14.079780085012317
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b30-0ded-75fb-88c1-f76a8f2a8d13",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/9d3ae7a8bf8e.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 943.0,
            "total_p50_ms": 499.619405483827,
            "ttft_p50_ms": 11.210122494958341
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5d-1176-7e5d-9ef9-98861cbb1681",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/b63b9504c2bd.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 267.0,
            "total_p50_ms": 435.95292198006064,
            "ttft_p50_ms": 32.6144389109686
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b92-2429-71e2-af7f-4be1a52577ae",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/edd4737070e8.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 217.0,
            "total_p50_ms": 90.95699898898602,
            "ttft_p50_ms": 13.955260976217687
          },
          "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct",
          "run_id": "019e3b3f-be50-771d-b8c6-af9899083050",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/fe5e3b276d77.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 421.0,
            "total_p50_ms": 328.78762506879866,
            "ttft_p50_ms": 14.120946056209505
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-7723-777d-84ff-686a8c659d19",
          "signed": true
        }
      ],
      "suite_id": "llm.quality.factual-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/15f036c3e160.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 1.0,
            "accuracy_p05": 1.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "drift_rate": 0.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 1.0,
            "persona_consistency_p50": 1.0,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1861.0,
            "total_p50_ms": 8591.797930421308,
            "ttft_p50_ms": 33.45352504402399
          },
          "model_id": "meta-llama/Llama-3.1-70B-Instruct",
          "run_id": "019e3b92-bf6e-7d98-9607-c9409191ccbc",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/341d320329a2.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.96,
            "accuracy_p05": 0.8400000000000001,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "drift_rate": 0.2,
            "mean_drift_turn": 4.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.96,
            "persona_consistency_p50": 1.0,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1620.0,
            "total_p50_ms": 1595.7139120437205,
            "ttft_p50_ms": 11.605069041252136
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5d-67aa-720e-a3ee-2d8947cbfb1d",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/37d10d0956b7.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.76,
            "accuracy_p05": 0.32000000000000006,
            "accuracy_p50": 0.8,
            "accuracy_p95": 1.0,
            "drift_rate": 0.6,
            "mean_drift_turn": 2.6666666666666665,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.76,
            "persona_consistency_p50": 0.8,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1086.0,
            "total_p50_ms": 1210.5122227221727,
            "ttft_p50_ms": 14.475372852757573
          },
          "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct",
          "run_id": "019e3b3f-e948-7805-a193-cab7df6d0960",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/65908e52253d.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.96,
            "accuracy_p05": 0.8400000000000001,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "drift_rate": 0.2,
            "mean_drift_turn": 4.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.96,
            "persona_consistency_p50": 1.0,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1842.0,
            "total_p50_ms": 2869.612350128591,
            "ttft_p50_ms": 14.77262913249433
          },
          "model_id": "meta-llama/Llama-3.1-8B-Instruct",
          "run_id": "019e3b30-65a9-7861-80f9-dca6bb363757",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/6d91ef5acf07.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.9199999999999999,
            "accuracy_p05": 0.6799999999999999,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "drift_rate": 0.2,
            "mean_drift_turn": 2.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.9199999999999999,
            "persona_consistency_p50": 1.0,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1124.0,
            "total_p50_ms": 1731.7324923351407,
            "ttft_p50_ms": 14.25657793879509
          },
          "model_id": "Qwen/Qwen2.5-7B-Instruct",
          "run_id": "019e3b39-b322-7a92-aad2-b6166fcb8126",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/988cf429b546.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.8800000000000001,
            "accuracy_p05": 0.8,
            "accuracy_p50": 0.8,
            "accuracy_p95": 1.0,
            "drift_rate": 0.6,
            "mean_drift_turn": 2.6666666666666665,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.8800000000000001,
            "persona_consistency_p50": 0.8,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1350.0,
            "total_p50_ms": 2672.806394053623,
            "ttft_p50_ms": 57.53244902007282
          },
          "model_id": "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
          "run_id": "019e3b6f-dda4-7491-b6c9-b23803059aba",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/b1ba8b72d527.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.96,
            "accuracy_p05": 0.8400000000000001,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "drift_rate": 0.2,
            "mean_drift_turn": 4.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "persona_consistency_mean": 0.96,
            "persona_consistency_p50": 1.0,
            "persona_consistency_p95": 1.0,
            "tokens_out_total": 1698.0,
            "total_p50_ms": 2371.0469531361014,
            "ttft_p50_ms": 12.365081114694476
          },
          "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
          "run_id": "019e3b45-f3f4-7559-af8e-69e3706e6c27",
          "signed": true
        },
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/ef9f4b42b9c2.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "n_ok": 0.0,
            "n_samples": 5.0,
            "ok_rate": 0.0
          },
          "model_id": "google/gemma-2-9b-it",
          "run_id": "019e3b97-72de-7e14-8576-f4e6f36c6c70",
          "signed": true
        }
      ],
      "suite_id": "llm.quality.persona-consistency-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/3db3db19d903.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.2,
            "accuracy_p05": 0.0,
            "accuracy_p50": 0.0,
            "accuracy_p95": 1.0,
            "n_ok": 10.0,
            "n_samples": 10.0,
            "ok_rate": 1.0,
            "tokens_out_total": 286.0,
            "total_p50_ms": 192.41042656358331,
            "ttft_p50_ms": 11.22528756968677
          },
          "model_id": "microsoft/Phi-3.5-mini-instruct",
          "run_id": "019e3b5d-39b2-7763-a9c2-b05da554850d",
          "signed": true
        }
      ],
      "suite_id": "llm.quality.reasoning-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/7b8d5bcf8a61.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.6,
            "accuracy_p05": 0.0,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "tokens_out_total": 13.0,
            "total_p50_ms": 84.88627313636243,
            "ttft_p50_ms": 84.88627313636243
          },
          "model_id": "Qwen/Qwen2-VL-7B-Instruct",
          "run_id": "019e3b85-85be-7975-8434-5e6de057cb2b",
          "signed": true
        }
      ],
      "suite_id": "vision.understanding.chart-qa-mini"
    },
    {
      "entries": [
        {
          "engine": "vllm",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/ca49934392e9.json",
          "hardware_class": "8x NVIDIA H100 80GB HBM3",
          "metrics": {
            "accuracy": 0.8,
            "accuracy_p05": 0.2,
            "accuracy_p50": 1.0,
            "accuracy_p95": 1.0,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "tokens_out_total": 36.0,
            "total_p50_ms": 160.05621291697025,
            "ttft_p50_ms": 160.05621291697025
          },
          "model_id": "Qwen/Qwen2-VL-7B-Instruct",
          "run_id": "019e3b85-73f3-7c52-81c3-0e2b87c0acc0",
          "signed": true
        }
      ],
      "suite_id": "vision.understanding.ocr-mini"
    },
    {
      "entries": [
        {
          "engine": "whisper-http",
          "engine_version": "unknown",
          "envelope_url": "/bench/leaderboard/envelopes/1cd8f74bac26.json",
          "hardware_class": "1x NVIDIA RTX 4000 Ada Generation Laptop GPU",
          "metrics": {
            "audio_path_resolved_count": 5.0,
            "energy_joules_total": 637.6923091058983,
            "joules_per_audio_second": 32.329141146053146,
            "n_ok": 5.0,
            "n_samples": 5.0,
            "ok_rate": 1.0,
            "power_avg_w": 11.512225364963504,
            "power_peak_w": 40.499,
            "total_audio_duration_s": 19.725,
            "total_p50_ms": 652.7784169884399,
            "wer_mean": 0.06999999999999999,
            "wer_p50": 0.0,
            "wer_p95": 0.21999999999999997
          },
          "model_id": "Systran/faster-whisper-large-v3",
          "run_id": "019e5ff4-79e4-7403-aa27-603a8fd37d02",
          "signed": true
        }
      ],
      "suite_id": "voice.transcription.librispeech-clean-mini"
    }
  ],
  "schema": "inferencebench-leaderboard.v1"
}