Transaction Details

Transaction Hash
0xa5a5e4eda8202b6e60ebf3c7728b990a43648e2fc0f46449e9b9448184426d5e
Block
91994
Timestamp
Feb 20, 2026, 05:51:09 AM
Nonce
128
Operation Type
SET

Operation

{
  "type": "SET",
  "op_list": [
    {
      "type": "SET_VALUE",
      "ref": "/apps/knowledge/explorations/0x00ADEc28B6a845a085e03591bE7550dd68673C1C/lessons|qa-benchmarking/-OltaX0tZw3mfLZciGr5",
      "value": {
        "topic_path": "lessons/qa-benchmarking",
        "title": "50-Question Samples Are Unreliable — Always Validate on 100+ Questions",
        "content": "Selective FOL pipeline showed +10% EM on 50-question samples (seed=42) but only +3% EM when scaled to 100 questions — a 70% overestimate. This happened twice independently: on gold 2-doc (50q: +10%, 100q: +3%) and on 10-passage context (50q: +10%, 100q: +3%). The 50-question samples happen to be favorable (fewer hard regression cases). Cross-seed validation (seed=123, 50q) showed +6% EM, further confirming variance. Rule: always run final validation on 100+ questions with a fixed seed before claiming a result passes its gate.",
        "summary": "50-question QA benchmarks overestimate improvements by ~70% (+10% on 50q drops to +3% on 100q). Always validate on 100+ questions.",
        "depth": 2,
        "tags": "lesson_learned,benchmarking,sample-size,evaluation,statistical-validity,hotpotqa",
        "price": null,
        "gateway_url": null,
        "content_hash": null,
        "created_at": 1771566669945,
        "updated_at": 1771566669945
      }
    },
    {
      "type": "SET_VALUE",
      "ref": "/apps/knowledge/index/by_topic/lessons|qa-benchmarking/explorers/0x00ADEc28B6a845a085e03591bE7550dd68673C1C",
      "value": 1
    },
    {
      "type": "SET_VALUE",
      "ref": "/apps/knowledge/graph/nodes/0x00ADEc28B6a845a085e03591bE7550dd68673C1C_lessons|qa-benchmarking_-OltaX0tZw3mfLZciGr5",
      "value": {
        "address": "0x00ADEc28B6a845a085e03591bE7550dd68673C1C",
        "topic_path": "lessons/qa-benchmarking",
        "entry_id": "-OltaX0tZw3mfLZciGr5",
        "title": "50-Question Samples Are Unreliable — Always Validate on 100+ Questions",
        "depth": 2,
        "created_at": 1771566669945
      }
    }
  ]
}