Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 52 additions & 19 deletions backends/cuda_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

# DeepSeek-OCR-2 default parameters (from official config)
DEFAULT_MODEL_PATH = "deepseek-ai/DeepSeek-OCR-2"
DEFAULT_BASE_SIZE = 1024
DEFAULT_IMAGE_SIZE = 768
DEFAULT_CROP_MODE = True
DEFAULT_BASE_SIZE = int(os.environ.get("OCR_BASE_SIZE", "1024"))
DEFAULT_IMAGE_SIZE = int(os.environ.get("OCR_IMAGE_SIZE", "768"))
DEFAULT_CROP_MODE = os.environ.get("OCR_CROP_MODE", "true").strip().lower() in ("1", "true", "yes", "on")

class CUDABackend:
def __init__(self, model_path: str = DEFAULT_MODEL_PATH):
Expand Down Expand Up @@ -72,22 +72,55 @@ def load_model(self, source: str = "huggingface", timeout: int = 300):

def infer(self, prompt: str, image_path: str, **kwargs) -> str:
"""Run inference on CUDA"""
try:
result = self.model.infer(
tokenizer=self.tokenizer,
prompt=prompt,
image_file=image_path,
output_path='./output',
base_size=DEFAULT_BASE_SIZE,
image_size=DEFAULT_IMAGE_SIZE,
crop_mode=DEFAULT_CROP_MODE,
save_results=False,
eval_mode=True
)
return result if result else ""
except Exception as e:
print(f"❌ Inference failed: {e}")
raise
fallback_profiles = [
(DEFAULT_BASE_SIZE, DEFAULT_IMAGE_SIZE, DEFAULT_CROP_MODE),
(DEFAULT_BASE_SIZE, DEFAULT_IMAGE_SIZE, True),
(640, 448, True),
(576, 384, True),
(512, 320, True),
]
seen = set()
last_error = None

for base_size, image_size, crop_mode in fallback_profiles:
profile = (base_size, image_size, crop_mode)
if profile in seen:
continue
seen.add(profile)

try:
print(
f"🧪 Inference profile: base_size={base_size}, image_size={image_size}, crop_mode={crop_mode}"
)
result = self.model.infer(
tokenizer=self.tokenizer,
prompt=prompt,
image_file=image_path,
output_path='./output',
base_size=base_size,
image_size=image_size,
crop_mode=crop_mode,
save_results=False,
eval_mode=True
)
return result if result else ""
except Exception as e:
last_error = e
msg = str(e)
# Upstream OCR-2 bug path: param_img is undefined when crop_mode=False.
if "param_img" in msg and not crop_mode:
print(f"⚠️ Upstream param_img bug with crop_mode={crop_mode}, retrying with crop_mode=True")
continue
if isinstance(e, torch.OutOfMemoryError) or "out of memory" in msg.lower():
print(f"⚠️ CUDA OOM with profile {profile}, trying smaller profile...")
if torch.cuda.is_available():
torch.cuda.empty_cache()
continue
print(f"❌ Inference failed: {e}")
raise

print(f"❌ Inference failed after all fallback profiles: {last_error}")
raise last_error

@staticmethod
def is_available() -> bool:
Expand Down
20 changes: 13 additions & 7 deletions docker-compose.gpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,25 @@ services:
- CUDA_VISIBLE_DEVICES=0
- GPU_IDLE_TIMEOUT=${GPU_IDLE_TIMEOUT:-60}
- PYTHONUNBUFFERED=1
- PYTORCH_CUDA_ALLOC_CONF=${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True,max_split_size_mb:128}
- OCR_BASE_SIZE=${OCR_BASE_SIZE:-768}
- OCR_IMAGE_SIZE=${OCR_IMAGE_SIZE:-448}
- OCR_CROP_MODE=${OCR_CROP_MODE:-true}

volumes:
- ./models:/root/.cache/huggingface
- ./logs:/app/logs

shm_size: "8g"
restart: unless-stopped


deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8001/health"]
interval: 30s
Expand All @@ -33,10 +44,5 @@ services:

mem_limit: 32g
memswap_limit: 32g

networks:
- ocr-network

networks:
ocr-network:
driver: bridge
network_mode: bridge
12 changes: 6 additions & 6 deletions web_service_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,19 +230,19 @@ async def ocr_endpoint(

# 步骤1: 懒加载模型
if gpu_manager:
model, processor = gpu_manager.get_model(load_func=load_model_func)
model, tokenizer = gpu_manager.get_model(load_func=load_model_func)
else:
# CPU 模式
from backends.cpu_backend import CPUBackend
backend = CPUBackend()
backend.load_model()
model, processor = backend.model, backend.processor
model, tokenizer = backend.model, backend.tokenizer

# 步骤2: 推理
from backends.cuda_backend import CUDABackend
backend = CUDABackend()
backend.model = model
backend.processor = processor
backend.tokenizer = tokenizer
text = backend.infer(prompt=prompt, image_path=tmp_file)

# 步骤3: 立即卸载(关键!)
Expand Down Expand Up @@ -368,17 +368,17 @@ async def ocr_pdf_endpoint(

# 获取模型(只加载一次)
if gpu_manager:
model, processor = gpu_manager.get_model(load_func=load_model_func)
model, tokenizer = gpu_manager.get_model(load_func=load_model_func)
else:
from backends.cpu_backend import CPUBackend
backend = CPUBackend()
backend.load_model()
model, processor = backend.model, backend.processor
model, tokenizer = backend.model, backend.tokenizer

from backends.cuda_backend import CUDABackend
backend = CUDABackend()
backend.model = model
backend.processor = processor
backend.tokenizer = tokenizer

# 构建提示词
prompt = build_prompt(prompt_type, custom_prompt, find_term)
Expand Down