Skip to content

Commit 7ca99e3

Browse files
[WIP] Add an alternative method for reading files
Using mmap only seems to be very slow with some NAS
1 parent fce3a36 commit 7ca99e3

File tree

8 files changed

+231
-18
lines changed

8 files changed

+231
-18
lines changed

Source/CLI/Global.cpp

+43
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,40 @@ int global::SetHash(bool Value)
242242
return 0;
243243
}
244244

245+
//---------------------------------------------------------------------------
246+
int global::SetFileOpenMethod(const char* Value)
247+
{
248+
if (strcmp(Value, "mmap") == 0)
249+
{
250+
FileOpenMethod = filemap::method::mmap;
251+
return 0;
252+
}
253+
if (strcmp(Value, "fstream") == 0)
254+
{
255+
FileOpenMethod = filemap::method::fstream;
256+
return 0;
257+
}
258+
if (strcmp(Value, "fopen") == 0)
259+
{
260+
FileOpenMethod = filemap::method::fopen;
261+
return 0;
262+
}
263+
if (strcmp(Value, "open") == 0)
264+
{
265+
FileOpenMethod = filemap::method::open;
266+
return 0;
267+
}
268+
#if defined(_WIN32) || defined(_WINDOWS)
269+
if (strcmp(Value, "createfile") == 0)
270+
{
271+
FileOpenMethod = filemap::method::createfile;
272+
return 0;
273+
}
274+
#endif //defined(_WIN32) || defined(_WINDOWS)
275+
cerr << "Error: unknown io value '" << Value << "'." << endl;
276+
return 1;
277+
}
278+
245279
//---------------------------------------------------------------------------
246280
int global::SetAll(bool Value)
247281
{
@@ -432,6 +466,7 @@ int global::ManageCommandLine(const char* argv[], int argc)
432466
IgnoreLicenseKey = !License.IsSupported_License();
433467
SubLicenseId = 0;
434468
SubLicenseDur = 1;
469+
FileOpenMethod = (filemap::method)-1;
435470
ShowLicenseKey = false;
436471
StoreLicenseKey = false;
437472
DisplayCommand = false;
@@ -748,6 +783,14 @@ int global::ManageCommandLine(const char* argv[], int argc)
748783
if (auto Value = SetAcceptFiles())
749784
return Value;
750785
}
786+
else if (strcmp(argv[i], "--io") == 0)
787+
{
788+
if (i + 1 == argc)
789+
return Error_Missing(argv[i]);
790+
int Value = SetFileOpenMethod(argv[++i]);
791+
if (Value)
792+
return Value;
793+
}
751794
else if (!strcmp(argv[i], "-framerate"))
752795
{
753796
if (OptionsForOtherFiles)

Source/CLI/Global.h

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class global
3838
string LicenseKey;
3939
uint64_t SubLicenseId;
4040
uint64_t SubLicenseDur;
41+
filemap::method FileOpenMethod;
4142
bool IgnoreLicenseKey;
4243
bool ShowLicenseKey;
4344
bool StoreLicenseKey;
@@ -100,6 +101,7 @@ class global
100101
int SetFrameMd5An(bool Value);
101102
int SetFrameMd5FileName(const char* FileName);
102103
int SetHash(bool Value);
104+
int SetFileOpenMethod(const char* Value);
103105
int SetAll(bool Value);
104106

105107
private:

Source/CLI/Main.cpp

+9-7
Original file line numberDiff line numberDiff line change
@@ -89,19 +89,19 @@ struct parse_info
8989
bool IsContainer = false;
9090
bool Problem = false;
9191

92-
bool ParseFile_Input(input_base& Input, bool OverrideCheckPadding = false);
92+
bool ParseFile_Input(input_base& Input, bool OverrideCheckPadding = false, const string* FileName = nullptr);
9393
bool ParseFile_Input(input_base_uncompressed& SingleFile, input& Input, size_t Files_Pos);
9494
};
9595

9696
//---------------------------------------------------------------------------
97-
bool parse_info::ParseFile_Input(input_base& SingleFile, bool OverrideCheckPadding)
97+
bool parse_info::ParseFile_Input(input_base& SingleFile, bool OverrideCheckPadding, const string* FileName)
9898
{
9999
// Init
100100
SingleFile.Actions = Global.Actions;
101101
if (OverrideCheckPadding)
102102
SingleFile.Actions.set(Action_CheckPadding);
103103
SingleFile.Hashes = &Global.Hashes;
104-
SingleFile.FileName = &RAWcooked.OutputFileName;
104+
SingleFile.FileName = (!RAWcooked.OutputFileName.empty() || !FileName) ? &RAWcooked.OutputFileName : FileName;
105105
SingleFile.InputInfo = &InputInfo;
106106

107107
// Parse
@@ -481,7 +481,7 @@ int ParseFile_Uncompressed(parse_info& ParseInfo, size_t Files_Pos)
481481
}
482482

483483
//---------------------------------------------------------------------------
484-
int ParseFile_Compressed(parse_info& ParseInfo)
484+
int ParseFile_Compressed(parse_info& ParseInfo, const string* FileName)
485485
{
486486
// Init
487487
string OutputDirectoryName;
@@ -522,7 +522,8 @@ int ParseFile_Compressed(parse_info& ParseInfo)
522522
matroska* M = new matroska(OutputDirectoryName, &Global.Mode, Ask_Callback, Thread_Pool, &Global.Errors);
523523
M->Quiet = Global.Quiet;
524524
M->NoOutputCheck = NoOutputCheck;
525-
if (ParseInfo.ParseFile_Input(*M))
525+
M->OpenStyle = Global.FileOpenMethod;
526+
if (ParseInfo.ParseFile_Input(*M, false, FileName))
526527
{
527528
ReturnValue = 1;
528529
}
@@ -591,7 +592,7 @@ int ParseFile(size_t Files_Pos)
591592
return 1;
592593

593594
// Compressed content
594-
if (int Value = ParseFile_Compressed(ParseInfo))
595+
if (int Value = ParseFile_Compressed(ParseInfo, ParseInfo.Name))
595596
return Value;
596597
if (ParseInfo.IsDetected)
597598
return 0;
@@ -755,6 +756,7 @@ int main(int argc, const char* argv[])
755756
if (!Value)
756757
{
757758
// Configure for a 2nd pass
759+
auto OutputFileName = Global.OutputFileName;
758760
ParseInfo.Name = NULL;
759761
Global.OutputFileName = Global.Inputs[0];
760762
if (!Global.Actions[Action_Hash]) // If hashes are present in the file, output is checked by using hashes
@@ -772,7 +774,7 @@ int main(int argc, const char* argv[])
772774
// Parse (check mode)
773775
Global.Actions.set(Action_QuickCheckAfterEncode, !Global.Actions[Action_Check]);
774776
Global.Actions.set(Action_Decode, false); // Override config
775-
Value = ParseFile_Compressed(ParseInfo);
777+
Value = ParseFile_Compressed(ParseInfo, &OutputFileName);
776778
if (!Value && !ParseInfo.IsDetected)
777779
{
778780
cout << '\n' << "Error: " << Global.OutputFileName << endl;

Source/Lib/Compressed/Matroska/Matroska.cpp

+21-4
Original file line numberDiff line numberDiff line change
@@ -359,13 +359,16 @@ void matroska::ParseBuffer()
359359
// Check if we can indicate the system that we'll not need anymore memory below this value, without indicating it too much
360360
if (Buffer_Offset > Buffer_Offset_LowerLimit + 1024 * 1024 && Buffer_Offset < Buffer.Size()) // TODO: when multi-threaded frame decoding is implemented, we need to check that all thread don't need anymore memory below this value
361361
{
362-
FileMap->Remap();
362+
FileMap->Remap(Buffer_Offset, Buffer_Offset + 256 * 1024 * 1024);
363363
Buffer = *FileMap;
364+
if (OpenStyle == filemap::method::mmap)
365+
{
364366
if (ReversibilityData)
365367
ReversibilityData->SetBaseData(Buffer.Data());
366368
for (const auto& TrackInfo_Current : TrackInfo)
367369
if (TrackInfo_Current && TrackInfo_Current->ReversibilityData)
368370
TrackInfo_Current->ReversibilityData->SetBaseData(Buffer.Data());
371+
}
369372
Buffer_Offset_LowerLimit = Buffer_Offset;
370373
}
371374

@@ -376,13 +379,16 @@ void matroska::ParseBuffer()
376379
Buffer_Offset = Cluster_Offset;
377380
Cluster_Level = (size_t)-1;
378381

379-
FileMap->Remap();
382+
FileMap->Remap(Buffer_Offset, 256 * 1024 * 1024);
380383
Buffer = *FileMap;
384+
if (OpenStyle == filemap::method::mmap)
385+
{
381386
if (ReversibilityData)
382387
ReversibilityData->SetBaseData(Buffer.Data());
383388
for (const auto& TrackInfo_Current : TrackInfo)
384389
if (TrackInfo_Current && TrackInfo_Current->ReversibilityData)
385390
TrackInfo_Current->ReversibilityData->SetBaseData(Buffer.Data());
391+
}
386392
Buffer_Offset_LowerLimit = Buffer_Offset;
387393
}
388394
}
@@ -799,6 +805,11 @@ void matroska::Segment_Attachments_AttachedFile_FileData_RawCookedTrack_LibraryV
799805
//---------------------------------------------------------------------------
800806
void matroska::Segment_Cluster()
801807
{
808+
IsList = true;
809+
810+
if (FileMap2)
811+
return;
812+
802813
if (RAWcooked_LibraryName.empty())
803814
{
804815
memcpy(Cluster_Levels, Levels, sizeof(Levels));
@@ -808,8 +819,6 @@ void matroska::Segment_Cluster()
808819
return;
809820
}
810821

811-
IsList = true;
812-
813822
// Check if Hashes check is useful
814823
if (Hashes_FromRAWcooked)
815824
{
@@ -850,6 +859,14 @@ void matroska::Segment_Cluster()
850859
Errors->Error(IO_FileChecker, error::type::Undecodable, (error::generic::code)filechecker_issue::undecodable::Format_Undetected, string());
851860
if (ReversibilityData && !FrameWriter_Template->Compound)
852861
InitOutput_Find();
862+
863+
FileMap2 = FileMap;
864+
if (OpenStyle != filemap::method::mmap && this->FileName)
865+
{
866+
FileMap = new filemap;
867+
FileMap->Open_ReadMode(*this->FileName, OpenStyle, 0, 256 * 1024 * 1024);
868+
Buffer = *FileMap;
869+
}
853870
}
854871

855872
//---------------------------------------------------------------------------

Source/Lib/Utils/FileIO/FileIO.cpp

+137-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,11 @@
1111
#include "Lib/Utils/FileIO/FileIO.h"
1212
#include <iostream>
1313
#include <sstream>
14+
#include <fstream>
1415
#if defined(_WIN32) || defined(_WINDOWS)
1516
#include "windows.h"
17+
#include <stdio.h>
18+
#include <fcntl.h>
1619
#include <io.h> // File existence
1720
#include <direct.h> // Directory creation
1821
#define access _access_s
@@ -29,10 +32,96 @@
2932
//---------------------------------------------------------------------------
3033

3134
//---------------------------------------------------------------------------
32-
int filemap::Open_ReadMode(const char* FileName)
35+
struct private_buffered
36+
{
37+
void* F;
38+
size_t Data_Shift = 0;
39+
size_t MaxSize = 0;
40+
};
41+
42+
//---------------------------------------------------------------------------
43+
int filemap::Open_ReadMode(const char* FileName, method NewStyle, size_t Begin, size_t End)
3344
{
3445
Close();
3546

47+
if (NewStyle != method::mmap)
48+
{
49+
Method = NewStyle;
50+
private_buffered* P = new private_buffered;
51+
P->MaxSize = End - Begin;
52+
size_t FileSize;
53+
54+
switch (Method)
55+
{
56+
default: // case style::fstream:
57+
{
58+
auto F = new ifstream(FileName, ios::binary);
59+
F->seekg(0, F->end);
60+
FileSize = F->tellg();
61+
F->seekg(Begin, F->beg);
62+
P->F = F;
63+
break;
64+
}
65+
case method::fopen:
66+
{
67+
auto F = fopen(FileName, "rb");
68+
fseek(F, 0, SEEK_END);
69+
FileSize = ftell(F);
70+
fseek(F, (long)Begin, SEEK_SET);
71+
P->F = F;
72+
break;
73+
}
74+
case method::open:
75+
{
76+
struct stat Fstat;
77+
if (stat(FileName, &Fstat))
78+
return 1;
79+
FileSize = Fstat.st_size;
80+
#if defined(_WIN32) || defined(_WINDOWS)
81+
auto F = _open(FileName, _O_BINARY | _O_RDONLY | _O_SEQUENTIAL, _S_IREAD);
82+
#else //defined(_WIN32) || defined(_WINDOWS)
83+
auto F = open(FileName, O_RDONLY);
84+
#endif //defined(_WIN32) || defined(_WINDOWS)
85+
if (F == -1)
86+
return 1;
87+
P->F = (void*)F;
88+
break;
89+
}
90+
#if defined(_WIN32) || defined(_WINDOWS)
91+
case method::createfile:
92+
{
93+
DWORD FileSizeHigh;
94+
auto NewFile = CreateFileA(FileName, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
95+
auto FileSizeLow = GetFileSize(NewFile, &FileSizeHigh);
96+
if ((FileSizeLow != INVALID_FILE_SIZE || GetLastError() == NO_ERROR) // If no error (special case with 32-bit max value)
97+
&& (!FileSizeHigh || sizeof(size_t) >= 8)) // Mapping 4+ GiB files is not supported in 32-bit mode
98+
{
99+
FileSize = ((size_t)FileSizeHigh) << 32 | FileSizeLow;
100+
}
101+
else
102+
return 1;
103+
if (Begin)
104+
{
105+
LARGE_INTEGER GoTo;
106+
GoTo.QuadPart = Begin;
107+
if (!SetFilePointerEx(NewFile, GoTo, nullptr, 0))
108+
return 1;
109+
P->Data_Shift = Begin;
110+
}
111+
P->F = NewFile;
112+
break;
113+
}
114+
#endif //defined(_WIN32) || defined(_WINDOWS)
115+
}
116+
117+
auto Buffer = new uint8_t[P->MaxSize];
118+
P->Data_Shift -= P->MaxSize;
119+
AssignBase(Buffer - P->Data_Shift, FileSize);
120+
Private2 = (decltype(Private2))P;
121+
122+
return Remap(Begin, End);
123+
}
124+
36125
size_t NewSize;
37126
#if defined(_WIN32) || defined(_WINDOWS)
38127
auto NewFile = CreateFileA(FileName, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
@@ -99,12 +188,58 @@ inline int munmap_const(const void* addr, size_t length)
99188
#pragma GCC diagnostic pop
100189
#endif
101190
#endif
102-
int filemap::Remap()
191+
int filemap::Remap(size_t Begin, size_t End)
103192
{
104193
// Special case for 0-byte files
105194
if (Empty())
106195
return 0;
107196

197+
if (Method != method::mmap)
198+
{
199+
auto P = (private_buffered*)Private2;
200+
auto Buffer = Data() + P->Data_Shift;
201+
auto Buffer_MaxSize = P->MaxSize;
202+
Begin -= P->Data_Shift;
203+
if (!End)
204+
End = Size();
205+
End -= P->Data_Shift;
206+
auto Buffer_Middle = Buffer + Begin;
207+
auto Buffer_Middle_Size = Buffer_MaxSize - Begin;
208+
memmove((void*)Buffer, (void*)Buffer_Middle, Buffer_Middle_Size);
209+
P->Data_Shift += Begin;
210+
AssignKeepSizeBase(Buffer - P->Data_Shift);
211+
Buffer += Buffer_Middle_Size;
212+
Buffer_MaxSize -= Buffer_Middle_Size;
213+
214+
switch (Method)
215+
{
216+
default: // case style::fstream:
217+
{
218+
auto F = (ifstream*)P->F;
219+
F->read((char*)Buffer, Buffer_MaxSize);
220+
break;
221+
}
222+
case method::fopen:
223+
{
224+
auto F = (FILE*)P->F;
225+
if (fread((char*)Buffer, Buffer_MaxSize, 1, F) != 1)
226+
return 1;
227+
break;
228+
}
229+
case method::open:
230+
return 1;
231+
#if defined(_WIN32) || defined(_WINDOWS)
232+
case method::createfile:
233+
{
234+
ReadFile(P->F, (LPVOID)Buffer, (DWORD)Buffer_MaxSize, nullptr, 0);
235+
break;
236+
}
237+
#endif //defined(_WIN32) || defined(_WINDOWS)
238+
}
239+
240+
return 0;
241+
}
242+
108243
// Close previous map
109244
if (Data())
110245
{

0 commit comments

Comments
 (0)