From d2256338b82730fef647a3cd8b65bc9040a7d73e Mon Sep 17 00:00:00 2001 From: Andrew Murray Date: Thu, 20 Apr 2023 23:15:20 +1000 Subject: [PATCH] Use later value for duplicate xref entries --- Tests/images/duplicate_xref_entry.pdf | Bin 0 -> 3326 bytes Tests/test_pdfparser.py | 6 ++++++ src/PIL/PdfParser.py | 9 +++------ 3 files changed, 9 insertions(+), 6 deletions(-) create mode 100644 Tests/images/duplicate_xref_entry.pdf diff --git a/Tests/images/duplicate_xref_entry.pdf b/Tests/images/duplicate_xref_entry.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f57a57d61c6ad0649448af316f689db4e7f1322e GIT binary patch literal 3326 zcmeH}S#Z-v7{?{cr{WAa2{dFvvpL)(9cpQ1`C?K!#KsU>hQvt=rPBu=n?xDMN@V3? zrVr4YH+r{EK<~cMmR?V#rRN(xfxeX9?GyB-54hh-K4N>ADRh|WbXZv$>)UU4zfY_G z_mg)x7QYVrWZsL?8cFITgHlUqSjlG91%yQ(Ju+loMBs-qnleu`UPdBPQ&R&2yfC&j zLy!dCA+!3)F536e(v=uhw)HjrEf+<1Cht5G`bra+-sVeSG25c>$rMtTYEd|@%5sv zb=~dleWCt!B9>*jqc?JWSQ_y8WrcSs#sUif`UBv~_gs z=392`ymePPL&hg2m8rZwH@~pBwENDx?!M>V`|jWOz=IDx{K%t^J^sX@C!c!ynP;DS z{)HD`dilsJufF#B8*jaR?45Vtd;fzEKl=ESPe1$o#K}{qzxw){Z@>HghaZ1B`|~fq z{`UJHf1dk`=EX9cnHF5l#A@>LKwcKBm9si%UaVySPR?Vsbz*zd#t}zywz*5%<7^q+ zfAH{8SGPZLW>rc%&adu~PkYbO)QrsjSz>!HDYJ57mApuq>@K|!@nqas$i;ijjE_d?$oguT~S4{zapYpI=RtG zD^+gN(?~Fbi>YXYuTMN!nR-movjp{>FEBbom^`ERL#(ubDQYeTWeoMj)=Q$~7iCGr zQb3eLyTD_cnz>+Sxn3=5WSkdKh&SV;R}>5c{6RF1N;VvTSd5-r*%pz*wKj~!dOtA$ z8(1?|j6`Z}7)O?k)wQIOh1yHEIiadLqD43Xl~CDDAXH%}H?AW3e2hKK>q$*F^1xCg z!h5}&w#U7fQ0ei1>{Y|1x3;JROyAZjw$z}gZa^JVs$e9QVPs^UK(M3LObXR5c!fz9 zM%5K2RkQYg%p@w5f6XK+u8Uz3)J$3f&5zJ3Ce`YmdR`_d;bp+P{8Yjv0*Rtjp^^mO zz6Mh01;_vmQ(DPLs@Q?kB|H6Nwu&tXpTnjN$OAh;8!FQchh?a>)ix>UuOpchia?32 z_QN4*8Oe-J5r;x%Jj>Sz1nc~yaCq4obrduJP{JUV=_e4GM+WpvBpSO@!$c_(4wy{* E8-kd> ]" assert pdf_repr(PdfBinary(b"\x90\x1F\xA0")) == b"<901FA0>" + + +def test_duplicate_xref_entry(): + pdf = PdfParser("Tests/images/duplicate_xref_entry.pdf") + assert pdf.xref_table.existing_entries[6][0] == 1197 + pdf.close() diff --git a/src/PIL/PdfParser.py b/src/PIL/PdfParser.py index 1b3cb52a2..dc1012f54 100644 --- a/src/PIL/PdfParser.py +++ b/src/PIL/PdfParser.py @@ -957,14 +957,11 @@ class PdfParser: check_format_condition(m, "xref entry not found") offset = m.end() is_free = m.group(3) == b"f" - generation = int(m.group(2)) if not is_free: + generation = int(m.group(2)) new_entry = (int(m.group(1)), generation) - check_format_condition( - i not in self.xref_table or self.xref_table[i] == new_entry, - "xref entry duplicated (and not identical)", - ) - self.xref_table[i] = new_entry + if i not in self.xref_table: + self.xref_table[i] = new_entry return offset def read_indirect(self, ref, max_nesting=-1):