@@ -51,6 +51,8 @@ def runProcess(self):
51
51
52
52
self .saveRecords ()
53
53
self .commitChanges ()
54
+
55
+ logger .info (f'Ingested { len (self .records )} LOC records' )
54
56
55
57
56
58
def importLOCRecords (self , startTimeStamp = None ):
@@ -83,6 +85,9 @@ def importOpenAccessRecords(self, count, customTimeStamp):
83
85
# An HTTP error will occur when the sp parameter value
84
86
# passes the last page number of the collection search reuslts
85
87
while sp < 100000 :
88
+ if self .ingestLimit and count >= self .ingestLimit :
89
+ break
90
+
86
91
openAccessURL = '{}&sp={}' .format (LOC_ROOT_OPEN_ACCESS , sp )
87
92
jsonData = self .fetchPageJSON (openAccessURL )
88
93
LOCData = jsonData .json ()
@@ -129,6 +134,9 @@ def importDigitizedRecords(self, count, customTimeStamp):
129
134
# An HTTP error will occur when the sp parameter value
130
135
# passes the last page number of the collection search reuslts
131
136
while sp < 100000 :
137
+ if self .ingestLimit and count >= self .ingestLimit :
138
+ break
139
+
132
140
digitizedURL = '{}&sp={}' .format (LOC_ROOT_DIGIT , sp )
133
141
jsonData = self .fetchPageJSON (digitizedURL )
134
142
LOCData = jsonData .json ()
@@ -170,14 +178,17 @@ def processLOCRecord(self, record):
170
178
try :
171
179
LOCRec = LOCMapping (record )
172
180
LOCRec .applyMapping ()
181
+
182
+ if LOCRec .record .authors is None :
183
+ logger .warning (f'Unable to map author in LOC record { LOCRec .record } ' )
184
+ return
185
+
173
186
self .addHasPartMapping (record , LOCRec .record )
174
187
self .storePDFManifest (LOCRec .record )
175
188
self .storeEpubsInS3 (LOCRec .record )
176
189
self .addDCDWToUpdateList (LOCRec )
177
-
178
- except (MappingError , HTTPError , ConnectionError , IndexError , TypeError ) as e :
179
- logger .exception (e )
180
- logger .warn (LOCError ('Unable to process LOC record' ))
190
+ except Exception :
191
+ logger .exception (f'Unable to process LOC record' )
181
192
182
193
def addHasPartMapping (self , resultsRecord , record ):
183
194
if 'pdf' in resultsRecord ['resources' ][0 ].keys ():
0 commit comments