modify scripts

This commit is contained in:
2025-11-07 09:03:35 +08:00
parent 808dbaa985
commit 17356c79f9

View File

@ -335,6 +335,11 @@ class RasterisedDocumentParser(DocumentParser):
os.environ["OMP_THREAD_LIMIT"] = "1"
VALID_TEXT_LENGTH = 50
# skip ocr process entirely to save time.
self.text = "defautl text"
self.log.debug("skipping reading file entirely.")
return
if mime_type == "application/pdf":
text_original = self.extract_text(None, document_path)
original_has_text = (