@@ -54,6 +54,15 @@ void Pac_Convert(Ztring& ToConvert, const codepage& CodePage) {
54
54
}
55
55
}
56
56
57
+ // ---------------------------------------------------------------------------
58
+ enum Pac_format {
59
+ Format_8bit,
60
+ Format_Skip3,
61
+ Format_Skip4,
62
+ Format_Utf8,
63
+ Format_16bit,
64
+ };
65
+
57
66
// ***************************************************************************
58
67
// Constructor/Destructor
59
68
// ***************************************************************************
@@ -253,107 +262,151 @@ void File_Pac::Data_Parse()
253
262
}
254
263
}
255
264
ZtringList Content;
256
- while (Element_Size - Element_Offset >= 1 ) {
257
- int64u Current;
258
- if (!Element_Code) {
259
- int8u Start;
260
- Peek_L1 (Start);
261
- if (Start != 0xFE ) {
262
- Trusted_IsNot (" 0xFE" );
263
- Skip_XX (Element_Size-Element_Offset, " (Unknown)" );
264
- break ;
265
- }
266
- Skip_L1 ( " 0xFE" );
267
- Skip_L1 ( " Horizontal alignment" );
268
- Skip_L1 ( " (Unknown)" );
269
- for (Current = Element_Offset; Current < Element_Size; Current++) {
270
- if (Buffer[Buffer_Offset + Current] == 0xFE ) {
265
+ auto NextPartIsStart = true ;
266
+ auto NextPartFormat = Format_8bit;
267
+ while (Element_Offset < Element_Size) {
268
+ if (NextPartIsStart) {
269
+ if (!Element_Code) {
270
+ int8u Start;
271
+ Peek_L1 (Start);
272
+ auto Size = Element_Size - Element_Offset;
273
+ if (Start != 0xFE || Size <= 2 ) {
274
+ Trusted_IsNot (" 0xFE" );
275
+ Skip_XX (Size , " (Unknown)" );
271
276
break ;
272
277
}
278
+ Skip_L1 ( " Line start marker" );
279
+ Skip_L1 ( " Horizontal alignment" );
280
+ Skip_L1 ( " (Unknown)" );
273
281
}
282
+ Content.resize (Content.size () + 1 );
283
+ NextPartIsStart = false ;
274
284
}
275
- else {
276
- Current = Element_Size;
277
- }
278
- Content.resize (Content.size () + 1 );
279
- auto Size = Current - Element_Offset;
280
- bool IsUtf8 = false ;
281
- bool IsW16 = false ;
282
- while (Size ) {
283
- if (Size > 2 ) {
284
- size_t Pos = Buffer_Offset + (size_t )Element_Offset;
285
- if (Buffer[Pos] == 0x1F && Buffer[Pos + 1 ] == ' C' && Buffer[Pos + 2 ] >= ' 0' && Buffer[Pos + 2 ] <= ' 9' ) {
286
- Skip_B3 ( " (Unknown)" );
287
- Size -= 3 ;
288
- }
289
- }
290
- if (Size >= 5 ) {
291
- int64u Probe;
292
- Peek_B5 (Probe);
293
- if (Probe >> 8 == 0x1FEFBBBF ) {
294
- Skip_B4 ( " UTF-8 start" );
295
- Size -= 4 ;
296
- IsUtf8 = true ;
297
- }
298
- if (Probe == 0x1F5731362ELL ) {
299
- Skip_B5 ( " W16 start" );
300
- Size -= 5 ;
301
- IsW16 = true ;
302
- }
303
- }
304
- if (IsUtf8) {
305
- int64u End = Element_Offset;
306
- for (; End < Element_Size; End++) {
307
- const auto Data = Buffer[Buffer_Offset + End];
308
- if (Data == 0x2E || Data == 0xFF ) {
285
+
286
+ auto Element_Middle = Element_Offset;
287
+ auto Format = NextPartFormat;
288
+ NextPartFormat = Format_8bit;
289
+ for (Element_Middle = Element_Offset; Element_Middle < Element_Size; Element_Middle += Format == Format_16bit ? 2 : 1 ) {
290
+ const auto Value = Buffer[Buffer_Offset + Element_Middle];
291
+ if (Value == 0x1F ) {
292
+ auto Buffer_Current = Buffer + Buffer_Offset + (size_t )Element_Middle + 1 ;
293
+ auto NextPartMaxSize = Element_Size - Element_Middle;
294
+ if (NextPartMaxSize >= 3 ) {
295
+ auto Probe = BigEndian2int16u (Buffer_Current);
296
+ if (Probe >= 0x4330 && Probe <= 0x4339 ) { // C0 - C9
297
+ NextPartFormat = Format_Skip3;
309
298
break ;
310
299
}
311
300
}
312
- Get_UTF8 (End - Element_Offset, Content.back (), " Content" );
313
- if (Element_Offset < Element_Size) {
314
- if (Buffer[Buffer_Offset + Element_Offset] == 0xFF ) {
315
- Skip_L1 ( " Dot" );
316
- Content.back () += __T (' .' );
301
+ if (NextPartMaxSize > 3 ) {
302
+ auto Probe = BigEndian2int24u (Buffer_Current);
303
+ if (Probe >= 0x522E30 && Probe <= 0x522E39 ) { // R.0 - R.9
304
+ NextPartFormat = Format_Skip4;
305
+ break ;
306
+ }
307
+ if (Probe == 0xEFBBBF ) {
308
+ NextPartFormat = Format_Utf8;
309
+ break ;
317
310
}
318
- else { // 0x2E
319
- Skip_B1 ( " UTF-8 end" );
320
- IsUtf8 = false ;
311
+ }
312
+ if (NextPartMaxSize >= 5 ) {
313
+ auto Probe = BigEndian2int32u (Buffer_Current);
314
+ if (Probe == 0x5731362E ) { // W16.
315
+ NextPartFormat = Format_16bit;
316
+ break ;
321
317
}
322
- Size -= 1 ;
323
318
}
324
- Count_UTF8++;
325
319
}
326
- else if (IsW16) {
327
- Skip_XX (Size , " Content" );
328
- Content.back ().resize (Size / 2 );
329
- Count_2byte++;
320
+ if (Value == 0xFE && !Element_Code) {
321
+ NextPartIsStart = true ;
322
+ break ;
330
323
}
331
- else {
332
- Get_ISO_8859_1 (Size , Content.back (), " Content" );
333
- bool Is8bit = false ;
334
- for (const auto & Character : Content.back ()) {
335
- if ((wchar_t )Character >= 0x80 ) {
336
- Is8bit = true ;
337
- }
324
+ }
325
+
326
+ // Current
327
+ auto Size = Element_Middle - Element_Offset;
328
+ switch (Format) {
329
+ case Format_8bit: {
330
+ Ztring Value;
331
+ Get_ISO_8859_1 (Size , Value, " Content" );
332
+ bool Is8bit = false ;
333
+ for (const auto & Character : Value) {
334
+ if ((unsigned )Character >= 0x80 ) {
335
+ Is8bit = true ;
338
336
}
339
- if (Is8bit) {
340
- Count_1byte8++;
341
- auto & Line = Content.back ();
342
- if (Line.size () < numeric_limits<int >::max ()) {
343
- int Max = (int )Line.size ();
344
- for (int i = Max; i > 0 ; i--) {
345
- const auto Character = Line[i];
346
- if ((wchar_t )Character >= 0xE0 ) {
347
- Line.erase (i, 1 ); // Not supported but we currently don't need the real text
348
- }
349
- }
337
+ }
338
+ if (Is8bit) {
339
+ Count_1byte8++;
340
+ for (auto i = Value.size () - 1 ; i; --i) {
341
+ const auto Character = Value[i];
342
+ if ((unsigned )Character >= 0xE0 ) {
343
+ Value.erase (i, 1 ); // Content reading not supported but no need for counting
350
344
}
351
345
}
352
- else {
353
- Count_1byte7++;
346
+ }
347
+ else if (Size ) {
348
+ Count_1byte7++;
349
+ }
350
+ Content.back () += Value;
351
+ break ;
352
+ }
353
+ case Format_Skip4:
354
+ case Format_Skip3: {
355
+ break ;
356
+ }
357
+ case Format_Utf8: {
358
+ auto Element_UTF8End = Element_Offset;
359
+ for (; Element_UTF8End < Element_Middle; ++Element_UTF8End) {
360
+ const auto Data = Buffer[Buffer_Offset + Element_UTF8End];
361
+ if (Data == 0x2E || Data == 0xFF ) {
362
+ Ztring Value;
363
+ Get_UTF8 (Element_UTF8End - Element_Offset, Value, " Content" );
364
+ Content.back () += Value;
365
+ if (Data == 0xFF ) {
366
+ Skip_L1 ( " Dot" );
367
+ Content.back () += __T (' .' );
368
+ continue ;
369
+ }
370
+ Skip_C1 ( " UTF-8 end" );
371
+ NextPartFormat = Format_8bit;
354
372
}
355
373
}
356
- Size = Current - Element_Offset;
374
+ Count_UTF8++;
375
+ break ;
376
+ }
377
+ case Format_16bit: {
378
+ auto Current = (int16u*)(Buffer + Buffer_Offset + (size_t )Element_Offset);
379
+ auto End = Current + Size / 2 ;
380
+ for (; Current < End; ++Current) {
381
+ Content.back () += (wchar_t )*Current; // Content reading not supported but no need for counting, we need only the values < 0x80
382
+ }
383
+ Skip_XX (Size , " W16 content" );
384
+ Count_2byte++;
385
+ break ;
386
+ }
387
+ }
388
+
389
+ // Next
390
+ switch (NextPartFormat) {
391
+ case Format_8bit: {
392
+ break ;
393
+ }
394
+ case Format_Skip3: {
395
+ Skip_C3 ( " (Unknown)" );
396
+ break ;
397
+ }
398
+ case Format_Skip4: {
399
+ Skip_C4 ( " (Unknown)" );
400
+ break ;
401
+ }
402
+ case Format_Utf8: {
403
+ Skip_C4 ( " UTF-8 start" );
404
+ break ;
405
+ }
406
+ case Format_16bit:{
407
+ Skip_C5 ( " W16 start" );
408
+ break ;
409
+ }
357
410
}
358
411
}
359
412
@@ -481,16 +534,18 @@ void File_Pac::Data_Parse()
481
534
for (const auto & Line : Content) {
482
535
auto CountOfCharsPerLine = Line.size ();
483
536
size_t Pos = 0 ;
484
- for (;;) {
485
- Pos = Line.find (' <' , Pos);
486
- if (Pos == string::npos) {
487
- break ;
537
+ bool ItalicBeginFound = false ;
538
+ for (const auto & Value : Line) {
539
+ if (Value < 0x20 ) { // Non printable
540
+ CountOfCharsPerLine--;
541
+ }
542
+ if (Value == ' <' ) {
543
+ ItalicBeginFound = true ;
488
544
}
489
- Pos = Line. find ( ' > ' , Pos + 1 );
490
- if (Pos == string::npos) {
491
- break ;
545
+ if (Value == ' < ' ) {
546
+ CountOfCharsPerLine -= 2 ; // < and > are markers of italic
547
+ ItalicBeginFound = false ;
492
548
}
493
- CountOfCharsPerLine += 2 ; // < and > are marker of italic
494
549
}
495
550
if (MaxCountOfCharsPerLine < CountOfCharsPerLine) {
496
551
MaxCountOfCharsPerLine = CountOfCharsPerLine;
0 commit comments