Merge pull request #73 from RapidAI/develop

SWHL · web-flow · commit aa0fd63e8076 · 2025-04-08T09:23:59.000+08:00
chore: optimize code
diff --git a/demo.py b/demo.py
@@ -7,7 +7,7 @@
 
 from rapid_table import RapidTable, RapidTableInput, VisTable
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # Init
     ocr_engine = RapidOCR()
     vis_ocr = VisRes()
@@ -16,32 +16,45 @@
     table_engine = RapidTable(input_args)
     viser = VisTable()
 
-    img_path = "tests/test_files/table.jpg"
+    img_path = "https://raw.githubusercontent.com/RapidAI/RapidTable/refs/heads/main/tests/test_files/table.jpg"
 
     # OCR
-
-    rapid_ocr_output = ocr_engine(img_path, return_word_box=True)
+    rapid_ocr_output = ocr_engine(img_path)
     ocr_result = list(
         zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores)
     )
     table_results = table_engine(img_path, ocr_result)
+
     # 使用单字识别
     # word_results = rapid_ocr_output.word_results
     # ocr_result = [
     #     [word_result[2], word_result[0], word_result[1]] for word_result in word_results
     # ]
     # table_results = table_engine(img_path, ocr_result)
 
-    table_html_str, table_cell_bboxes = table_results.pred_html, table_results.cell_bboxes
+    table_html_str, table_cell_bboxes = (
+        table_results.pred_html,
+        table_results.cell_bboxes,
+    )
     # Save
     save_dir = Path("outputs")
     save_dir.mkdir(parents=True, exist_ok=True)
 
     save_html_path = save_dir / f"{Path(img_path).stem}.html"
-    save_drawed_path = save_dir / f"{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
-    save_logic_points_path = save_dir / f"{Path(img_path).stem}_table_col_row_vis{Path(img_path).suffix}"
+    save_drawed_path = (
+        save_dir / f"{Path(img_path).stem}_table_vis{Path(img_path).suffix}"
+    )
+    save_logic_points_path = (
+        save_dir / f"{Path(img_path).stem}_table_col_row_vis{Path(img_path).suffix}"
+    )
 
     # Visualize table rec result
-    vis_imged = viser(img_path, table_results, save_html_path, save_drawed_path, save_logic_points_path)
+    vis_imged = viser(
+        img_path,
+        table_results,
+        save_html_path,
+        save_drawed_path,
+        save_logic_points_path,
+    )
 
     print(f"The results has been saved {save_dir}")
diff --git a/rapid_table/__init__.py b/rapid_table/__init__.py
@@ -2,4 +2,4 @@
 # @Author: SWHL
 # @Contact: liekkaskono@163.com
 from .main import RapidTable, RapidTableInput
-from .utils.utils import VisTable
+from .utils import VisTable
diff --git a/rapid_table/main.py b/rapid_table/main.py
@@ -13,14 +13,12 @@
 import cv2
 import numpy as np
 
-from rapid_table.utils.download_model import DownloadModel
-from rapid_table.utils.logger import get_logger
-from rapid_table.utils.utils import LoadImage, VisTable
+from rapid_table.utils import DownloadModel, LoadImage, Logger, VisTable
 
 from .table_matcher import TableMatch
 from .table_structure import TableStructurer, TableStructureUnitable
 
-logger = get_logger("main")
+logger = Logger(logger_name=__name__).get_log()
 root_dir = Path(__file__).resolve().parent
 
 
@@ -78,7 +76,7 @@ def __init__(self, config: RapidTableInput):
         self.table_matcher = TableMatch()
 
         try:
-            self.ocr_engine = importlib.import_module("rapidocr_onnxruntime").RapidOCR()
+            self.ocr_engine = importlib.import_module("rapidocr").RapidOCR()
         except ModuleNotFoundError:
             self.ocr_engine = None
 
@@ -91,7 +89,7 @@ def __call__(
     ) -> RapidTableOutput:
         if self.ocr_engine is None and ocr_result is None:
             raise ValueError(
-                "One of two conditions must be met: ocr_result is not empty, or rapidocr_onnxruntime is installed."
+                "One of two conditions must be met: ocr_result is not empty, or rapidocr is installed."
             )
 
         img = self.load_img(img_content)
@@ -100,7 +98,14 @@ def __call__(
         h, w = img.shape[:2]
 
         if ocr_result is None:
-            ocr_result, _ = self.ocr_engine(img)
+            ocr_result = self.ocr_engine(img)
+            ocr_result = list(
+                zip(
+                    ocr_result.boxes,
+                    ocr_result.txts,
+                    ocr_result.scores,
+                )
+            )
         dt_boxes, rec_res = self.get_boxes_recs(ocr_result, h, w)
 
         pred_structures, cell_bboxes, _ = self.table_structure(copy.deepcopy(img))
@@ -197,18 +202,21 @@ def main(arg_list: Optional[List[str]] = None):
     args = parse_args(arg_list)
 
     try:
-        ocr_engine = importlib.import_module("rapidocr_onnxruntime").RapidOCR()
+        ocr_engine = importlib.import_module("rapidocr").RapidOCR()
     except ModuleNotFoundError as exc:
         raise ModuleNotFoundError(
-            "Please install the rapidocr_onnxruntime by pip install rapidocr_onnxruntime."
+            "Please install the rapidocr by pip install rapidocr"
         ) from exc
 
     input_args = RapidTableInput(model_type=args.model_type)
     table_engine = RapidTable(input_args)
 
     img = cv2.imread(args.img_path)
 
-    ocr_result, _ = ocr_engine(img)
+    rapid_ocr_output = ocr_engine(img)
+    ocr_result = list(
+        zip(rapid_ocr_output.boxes, rapid_ocr_output.txts, rapid_ocr_output.scores)
+    )
     table_results = table_engine(img, ocr_result)
     print(table_results.pred_html)
 
diff --git a/rapid_table/table_structure/utils.py b/rapid_table/table_structure/utils.py
@@ -31,7 +31,7 @@
     get_device,
 )
 
-from rapid_table.utils.logger import get_logger
+from rapid_table.utils import Logger
 
 
 class EP(Enum):
@@ -42,7 +42,7 @@ class EP(Enum):
 
 class OrtInferSession:
     def __init__(self, config: Dict[str, Any]):
-        self.logger = get_logger("OrtInferSession")
+        self.logger = Logger(logger_name=__name__).get_log()
 
         model_path = config.get("model_path", None)
         self._verify_model(model_path)
diff --git a/rapid_table/utils/__init__.py b/rapid_table/utils/__init__.py
@@ -1,3 +1,8 @@
 # -*- encoding: utf-8 -*-
 # @Author: SWHL
 # @Contact: liekkaskono@163.com
+from .download_model import DownloadModel
+from .load_image import LoadImage
+from .logger import Logger
+from .utils import is_url
+from .vis import VisTable
diff --git a/rapid_table/utils/download_model.py b/rapid_table/utils/download_model.py
@@ -5,15 +5,15 @@
 import requests
 from tqdm import tqdm
 
-from .logger import get_logger
-
-logger = get_logger("DownloadModel")
+from .logger import Logger
 
 PROJECT_DIR = Path(__file__).resolve().parent.parent
 DEFAULT_MODEL_DIR = PROJECT_DIR / "models"
 
 
 class DownloadModel:
+    logger = Logger(logger_name=__name__).get_log()
+
     @classmethod
     def download(
         cls,
@@ -31,11 +31,11 @@ def download(
 
         save_file_path = save_dir / save_model_name
         if save_file_path.exists():
-            logger.debug("%s already exists", save_file_path)
+            cls.logger.info("%s already exists", save_file_path)
             return str(save_file_path)
 
         try:
-            logger.info("Download %s to %s", model_full_url, save_dir)
+            cls.logger.info("Download %s to %s", model_full_url, save_dir)
             file = cls.download_as_bytes_with_progress(model_full_url, save_model_name)
             cls.save_file(save_file_path, file)
         except Exception as exc:
diff --git a/rapid_table/utils/load_image.py b/rapid_table/utils/load_image.py
@@ -0,0 +1,131 @@
+# -*- encoding: utf-8 -*-
+# @Author: SWHL
+# @Contact: liekkaskono@163.com
+from io import BytesIO
+from pathlib import Path
+from typing import Any, Union
+
+import cv2
+import numpy as np
+import requests
+from PIL import Image, UnidentifiedImageError
+
+from .utils import is_url
+
+root_dir = Path(__file__).resolve().parent
+InputType = Union[str, np.ndarray, bytes, Path, Image.Image]
+
+
+class LoadImage:
+    def __init__(self):
+        pass
+
+    def __call__(self, img: InputType) -> np.ndarray:
+        if not isinstance(img, InputType.__args__):
+            raise LoadImageError(
+                f"The img type {type(img)} does not in {InputType.__args__}"
+            )
+
+        origin_img_type = type(img)
+        img = self.load_img(img)
+        img = self.convert_img(img, origin_img_type)
+        return img
+
+    def load_img(self, img: InputType) -> np.ndarray:
+        if isinstance(img, (str, Path)):
+            if is_url(img):
+                img = Image.open(requests.get(img, stream=True, timeout=60).raw)
+            else:
+                self.verify_exist(img)
+                img = Image.open(img)
+
+            try:
+                img = self.img_to_ndarray(img)
+            except UnidentifiedImageError as e:
+                raise LoadImageError(f"cannot identify image file {img}") from e
+            return img
+
+        if isinstance(img, bytes):
+            img = self.img_to_ndarray(Image.open(BytesIO(img)))
+            return img
+
+        if isinstance(img, np.ndarray):
+            return img
+
+        if isinstance(img, Image.Image):
+            return self.img_to_ndarray(img)
+
+        raise LoadImageError(f"{type(img)} is not supported!")
+
+    def img_to_ndarray(self, img: Image.Image) -> np.ndarray:
+        if img.mode == "1":
+            img = img.convert("L")
+            return np.array(img)
+        return np.array(img)
+
+    def convert_img(self, img: np.ndarray, origin_img_type: Any) -> np.ndarray:
+        if img.ndim == 2:
+            return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+        if img.ndim == 3:
+            channel = img.shape[2]
+            if channel == 1:
+                return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+            if channel == 2:
+                return self.cvt_two_to_three(img)
+
+            if channel == 3:
+                if issubclass(origin_img_type, (str, Path, bytes, Image.Image)):
+                    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                return img
+
+            if channel == 4:
+                return self.cvt_four_to_three(img)
+
+            raise LoadImageError(
+                f"The channel({channel}) of the img is not in [1, 2, 3, 4]"
+            )
+
+        raise LoadImageError(f"The ndim({img.ndim}) of the img is not in [2, 3]")
+
+    @staticmethod
+    def cvt_two_to_three(img: np.ndarray) -> np.ndarray:
+        """gray + alpha → BGR"""
+        img_gray = img[..., 0]
+        img_bgr = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
+
+        img_alpha = img[..., 1]
+        not_a = cv2.bitwise_not(img_alpha)
+        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
+
+        new_img = cv2.bitwise_and(img_bgr, img_bgr, mask=img_alpha)
+        new_img = cv2.add(new_img, not_a)
+        return new_img
+
+    @staticmethod
+    def cvt_four_to_three(img: np.ndarray) -> np.ndarray:
+        """RGBA → BGR"""
+        r, g, b, a = cv2.split(img)
+        new_img = cv2.merge((b, g, r))
+
+        not_a = cv2.bitwise_not(a)
+        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
+
+        new_img = cv2.bitwise_and(new_img, new_img, mask=a)
+
+        mean_color = np.mean(new_img)
+        if mean_color <= 0.0:
+            new_img = cv2.add(new_img, not_a)
+        else:
+            new_img = cv2.bitwise_not(new_img)
+        return new_img
+
+    @staticmethod
+    def verify_exist(file_path: Union[str, Path]):
+        if not Path(file_path).exists():
+            raise LoadImageError(f"{file_path} does not exist.")
+
+
+class LoadImageError(Exception):
+    pass
diff --git a/rapid_table/utils/logger.py b/rapid_table/utils/logger.py
@@ -2,20 +2,36 @@
 # @Author: Jocker1212
 # @Contact: xinyijianggo@gmail.com
 import logging
-from functools import lru_cache
 
+import colorlog
 
-@lru_cache(maxsize=32)
-def get_logger(name: str) -> logging.Logger:
-    logger = logging.getLogger(name)
-    logger.setLevel(logging.DEBUG)
 
-    fmt = "%(asctime)s - %(name)s - %(levelname)s: %(message)s"
-    format_str = logging.Formatter(fmt)
+class Logger:
+    def __init__(self, log_level=logging.DEBUG, logger_name=None):
+        self.logger = logging.getLogger(logger_name)
+        self.logger.setLevel(log_level)
+        self.logger.propagate = False
 
-    sh = logging.StreamHandler()
-    sh.setLevel(logging.DEBUG)
+        formatter = colorlog.ColoredFormatter(
+            "%(log_color)s[%(levelname)s] %(asctime)s [RapidTable] %(filename)s:%(lineno)d: %(message)s",
+            log_colors={
+                "DEBUG": "cyan",
+                "INFO": "green",
+                "WARNING": "yellow",
+                "ERROR": "red",
+                "CRITICAL": "red,bg_white",
+            },
+        )
 
-    logger.addHandler(sh)
-    sh.setFormatter(format_str)
-    return logger
+        if not self.logger.handlers:
+            console_handler = logging.StreamHandler()
+            console_handler.setFormatter(formatter)
+
+            for handler in self.logger.handlers:
+                self.logger.removeHandler(handler)
+
+            console_handler.setLevel(log_level)
+            self.logger.addHandler(console_handler)
+
+    def get_log(self):
+        return self.logger
diff --git a/rapid_table/utils/utils.py b/rapid_table/utils/utils.py
diff --git a/rapid_table/utils/vis.py b/rapid_table/utils/vis.py
diff --git a/requirements.txt b/requirements.txt
diff --git a/tests/test_main.py b/tests/test_main.py