modify scripts
This commit is contained in:
@ -335,6 +335,11 @@ class RasterisedDocumentParser(DocumentParser):
|
||||
os.environ["OMP_THREAD_LIMIT"] = "1"
|
||||
VALID_TEXT_LENGTH = 50
|
||||
|
||||
# skip ocr process entirely to save time.
|
||||
self.text = "defautl text"
|
||||
self.log.debug("skipping reading file entirely.")
|
||||
return
|
||||
|
||||
if mime_type == "application/pdf":
|
||||
text_original = self.extract_text(None, document_path)
|
||||
original_has_text = (
|
||||
|
||||
Reference in New Issue
Block a user