{
"type": "SET_VALUE",
"ref": "/apps/knowledge/topics/courses/direct-preference-optimization-your-language-model--dpo-direct-preference-optimization/.info",
"value": {
"title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model —",
"description": "DPO introduces a simple classification loss that directly optimizes language model policies on human preference data, eliminating the need for reinforcement learning while maintaining theoretical equivalence to the RLHF objective.",
"created_at": 1775226524818,
"created_by": "0x64e3D107FB4B6012590c43F9318DfBf5F8f8e81D"
}
}