modify scripts
This commit is contained in:
@ -335,6 +335,11 @@ class RasterisedDocumentParser(DocumentParser):
|
|||||||
os.environ["OMP_THREAD_LIMIT"] = "1"
|
os.environ["OMP_THREAD_LIMIT"] = "1"
|
||||||
VALID_TEXT_LENGTH = 50
|
VALID_TEXT_LENGTH = 50
|
||||||
|
|
||||||
|
# skip ocr process entirely to save time.
|
||||||
|
self.text = "defautl text"
|
||||||
|
self.log.debug("skipping reading file entirely.")
|
||||||
|
return
|
||||||
|
|
||||||
if mime_type == "application/pdf":
|
if mime_type == "application/pdf":
|
||||||
text_original = self.extract_text(None, document_path)
|
text_original = self.extract_text(None, document_path)
|
||||||
original_has_text = (
|
original_has_text = (
|
||||||
|
|||||||
Reference in New Issue
Block a user