[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"dataset":3},{"title":4,"desc":5,"bannerImg":6,"date":7,"bannerLinks":8,"weight":9,"category":10,"description":5,"content":11,"metaBannerImg":178},"OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations","OmniDocBench is a comprehensive benchmark for evaluating AI in document parsing and content extraction.","\u002Fdatasets-banner-images\u002Fomnidocbench-banner.jpg","2025-05-25","{ \"Blog\":\"https:\u002F\u002Fwww.2077ai.com\u002Fblog\u002FOmniDocBench\",\"Paper\":\"https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.07626\", \"Hugging Face\":\"https:\u002F\u002Fhuggingface.co\u002Fpapers\u002F2412.07626\" }",5,"VLM",{"data":12,"body":15,"toc":173},{"title":13,"description":14},"Introduction","OmniDocBench is a comprehensive benchmark for evaluating AI in document parsing and content extraction. Addressing the limitations of existing benchmarks—namely their narrow coverage and simplistic evaluations. It provides high-quality annotations across 9 diverse sources, including academic papers, handwritten notes, and densely typeset newspapers. OmniDocBench effectively reveals weaknesses in top-performing models when they process complex layouts and content structures, highlighting its challenging nature and its potential to drive future progress in Document AI.",{"type":16,"children":17},"root",[18,26,31,58,64,89,94,100,109,115,121,127,132,139,144,150,154,160],{"type":19,"tag":20,"props":21,"children":23},"element","h1",{"id":22},"introduction",[24],{"type":25,"value":13},"text",{"type":19,"tag":27,"props":28,"children":29},"p",{},[30],{"type":25,"value":14},{"type":19,"tag":32,"props":33,"children":39},"div",{"className":34,"style":38},[35,36,37],"img-wrap","has-caption","center","width: 100%; position: relative; margin-bottom: 62px",[40,42,49,50],{"type":25,"value":41},"\n  ",{"type":19,"tag":43,"props":44,"children":48},"img",{"src":45,"alt":46,"style":47},"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251103\u002Fdata_diversity.png","Datasets Overview","width: 100%; max-height: 60vh; object-fit: contain; background: #141414; border-radius: 8px",[],{"type":25,"value":41},{"type":19,"tag":27,"props":51,"children":55},{"className":52,"style":54},[53],"img-text","position: absolute; top: calc(100% + 16px); left: 0; right: 0;text-align: center; overflow: hidden; white-space: nowrap; text-overflow: ellipsis; line-height: 22px; color: #A1A1A1; font-size: 14px",[56],{"type":25,"value":57},"\n    Datasets Overview\n  ",{"type":19,"tag":20,"props":59,"children":61},{"id":60},"dataset-overview",[62],{"type":25,"value":63},"Dataset Overview",{"type":19,"tag":27,"props":65,"children":66},{},[67,73,75,80,82,87],{"type":19,"tag":68,"props":69,"children":70},"strong",{},[71],{"type":25,"value":72},"OmniDocBench",{"type":25,"value":74}," contains ",{"type":19,"tag":68,"props":76,"children":77},{},[78],{"type":25,"value":79},"1355 pages across 9 distinct document types",{"type":25,"value":81},", with ",{"type":19,"tag":68,"props":83,"children":84},{},[85],{"type":25,"value":86},"over 100,000 fine-grained annotations",{"type":25,"value":88},".",{"type":19,"tag":90,"props":91,"children":93},"donut-chart",{"data":92,"description":63,"title":63},"Academic Papers,129,Slides,241,Books,176,Textbooks,138,Exam Papers,184,Notes,116,Megazines,139,Financial Reports,81,Newspapers,151",[],{"type":19,"tag":20,"props":95,"children":97},{"id":96},"data-samples",[98],{"type":25,"value":99},"Data Samples",{"type":19,"tag":101,"props":102,"children":108},"iframe",{"src":103,"style":104,"frameBorder":105,"allowFullScreen":106,"loading":107},"https:\u002F\u002Fdataset.data4o.xyz\u002Fshare\u002Fdataset\u002Fpreview?datasetId=68cd25872815a79bf22539ec&env=en","width: 100%; height: 800px;","0",true,"lazy",[],{"type":19,"tag":20,"props":110,"children":112},{"id":111},"leaderboard",[113],{"type":25,"value":114},"Leaderboard",{"type":19,"tag":116,"props":117,"children":120},"doxhub-table",{"data":118,"head":119,"title":114},"91.93,PaddleOCR-VL,Specialized VLMs,0.9B,0.039,88.67,91.01,94.85,0.048,90.67,MinerU2.5,Specialized VLMs,1.2B,0.047,88.46,88.22,92.38,0.044,88.85,MonkeyOCR-pro-3B,Specialized VLMs,3B,0.075,87.25,86.78,90.63,0.128,88.41,dots.ocr,Specialized VLMs,3B,0.048,83.22,86.78,90.62,0.053,87.13,MonkeyOCR-3B,Specialized VLMs,3B,0.075,87.45,81.39,85.92,0.129,87.01,Deepseek-OCR,Specialized VLMs,3B,0.073,83.37,84.97,88.8,0.086,86.96,MonkeyOCR-pro-1.2B,Specialized VLMs,1.2B,0.084,85.02,84.24,89.02,0.13,85.59,Nanonets-OCR-s,Specialized VLMs,3B,0.093,85.9,80.14,85.57,0.108,85.56,MinerU2-VLM,Specialized VLMs,0.9B,0.078,80.95,83.54,87.66,0.086,81.79,olmOCR,Specialized VLMs,7B,0.096,86.04,68.92,74.77,0.121,83.21,Dolphin-1.5,Specialized VLMs,0.3B,0.092,80.78,78.06,84.1,0.08,80.98,POINTS-Reader,Specialized VLMs,3B,0.134,79.2,77.13,81.66,0.145,78.83,Mistral OCR,Specialized VLMs,-,0.164,82.84,70.03,78.04,0.144,74.82,OCRFlux,Specialized VLMs,3B,0.193,68.03,75.75,80.23,0.202,74.67,Dolphin,Specialized VLMs,0.3B,0.125,67.85,68.7,77.77,0.124,89.15,Qwen3-VL-235B-A22B-Instruct,General VLMs,235B,0.069,88.14,86.21,90.55,0.068,88.03,Gemini-2.5 Pro,General VLMs,-,0.075,85.82,85.71,90.29,0.097,87.02,Qwen2.5-VL,General VLMs,72B,0.094,88.27,82.15,86.22,0.102,82.67,InternVL3.5,General VLMs,241B,0.142,87.23,75,81.28,0.125,80.33,InternVL3,General VLMs,78B,0.131,83.42,70.64,77.74,0.113,75.02,GPT-4o,General VLMs,-,0.217,79.7,67.07,76.09,0.148,86.73,PP-StructureV3,Pipeline Tools,-,0.073,85.79,81.68,89.48,0.073,75.51,Mineru2-pipeline,Pipeline Tools,-,0.209,76.55,70.9,79.11,0.225,71.3,Marker-1.8.2,Pipeline Tools,-,0.206,76.66,57.88,71.17,0.25","Overall,Model,Model Type,Size,TextEdit,FormulaCDM,TableTEDS,TableTEDS-S,Read OrderEdit",[],{"type":19,"tag":20,"props":122,"children":124},{"id":123},"further-analysis",[125],{"type":25,"value":126},"Further Analysis",{"type":19,"tag":27,"props":128,"children":129},{},[130],{"type":25,"value":131},"In order to gain a deeper understanding of the performance of our model, this section presents the results of a series of detailed analysis experiments.",{"type":19,"tag":133,"props":134,"children":136},"h2",{"id":135},"vary-standards",[137],{"type":25,"value":138},"Vary Standards",{"type":19,"tag":140,"props":141,"children":143},"image-slider",{"images":142},"[{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F101%20The%20Vary%20Standards%20in%20parsing%20Header%2C%20Footers%2C%20and%20so%20on.png\", \"alt\": \"The Vary Standards in parsing Header, Footers, and so on\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F102%20The%20Vary%20Standards%20in%20parsing%20Captions.png\",\"alt\": \"The Vary Standards in parsing Captions\" }]",[],{"type":19,"tag":133,"props":145,"children":147},{"id":146},"data-display",[148],{"type":25,"value":149},"Data Display",{"type":19,"tag":140,"props":151,"children":153},{"images":152},"[{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F01%20Academic%20Paper.png\", \"alt\": \"Academic Paper\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F02%20Books.png\",\"alt\": \"Books\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F03%20Colorful%20Textbook.png\",\"alt\": \"Colorful Textbook\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F04%20Notes.png\",\"alt\": \"Notes\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F05%20Magazines.png\",\"alt\": \"Magazines\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F06%20Financial%20Report.png\",\"alt\": \"Financial Report\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F07%20Newspaper.png\",\"alt\": \"Newspaper\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F08%20Exam%20Paper.png\",\"alt\": \"Exam Paper\" },{ \"src\": \"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002F20251027\u002F09%20Slides.png\",\"alt\": \"Slides\" }]",[],{"type":19,"tag":20,"props":155,"children":157},{"id":156},"bibtex",[158],{"type":25,"value":159},"BibTeX",{"type":19,"tag":161,"props":162,"children":167},"pre",{"className":163,"code":165,"language":156,"meta":166},[164],"language-bibtex","@misc{ouyang2024omnidocbench,\n  title         =     \"OmniDocBench: Benchmarking Diverse PDF Document Parsing with Comprehensive Annotations\",\n  author        =     \"Linke Ouyang and Yuan Qu and Hongbin Zhou and Jiawei Zhu and Rui Zhang and Qunshu Lin and Bin Wang and Zhiyuan Zhao and Man Jiang and Xiaomeng Zhao and Jin Shi and Fan Wu and Pei Chu and Minghao Liu and Zhenxiang Li and Chao Xu and Bo Zhang and Botian Shi and Zhongying Tu and Conghui He\",\n  eprint        =     \"2412.07626\",\n  archivePrefix =     \"arXiv\",\n  year          =     \"2024\",\n  primaryClass  =     \"cs.CV\",\n  url           =     \"https:\u002F\u002Farxiv.org\u002Fabs\u002F2412.07626\"\n}\n","",[168],{"type":19,"tag":169,"props":170,"children":171},"code",{"__ignoreMap":166},[172],{"type":25,"value":165},{"title":166,"searchDepth":174,"depth":174,"links":175},2,[176,177],{"id":135,"depth":174,"text":138},{"id":146,"depth":174,"text":149},"https:\u002F\u002Fdoxhub.s3.us-east-1.amazonaws.com\u002F2077ai\u002FBanner_dataset\u002Fbanner_omnidocbench.png"]