2024-09-23 19:11:06 +03:00
|
|
|
from enum import Enum
|
2024-06-26 10:58:55 +03:00
|
|
|
|
2024-10-21 18:03:14 +03:00
|
|
|
import yaml
|
|
|
|
from pydantic import BaseModel
|
2024-06-26 10:58:55 +03:00
|
|
|
|
2024-09-23 19:11:06 +03:00
|
|
|
|
2024-10-21 18:03:14 +03:00
|
|
|
class PdfParser(str, Enum):
|
|
|
|
LLAMA_PARSE = "llama_parse"
|
|
|
|
UNSTRUCTURED = "unstructured"
|
|
|
|
MEGAPARSE_VISION = "megaparse_vision"
|
|
|
|
|
|
|
|
|
|
|
|
class MegaparseBaseConfig(BaseModel):
|
|
|
|
@classmethod
|
|
|
|
def from_yaml(cls, file_path: str):
|
|
|
|
# Load the YAML file
|
|
|
|
with open(file_path, "r") as stream:
|
|
|
|
config_data = yaml.safe_load(stream)
|
|
|
|
|
|
|
|
# Instantiate the class using the YAML data
|
|
|
|
return cls(**config_data)
|
|
|
|
|
|
|
|
|
|
|
|
class MegaparseConfig(MegaparseBaseConfig):
|
|
|
|
strategy: str = "fast"
|
|
|
|
llama_parse_api_key: str | None = None
|
|
|
|
pdf_parser: PdfParser = PdfParser.UNSTRUCTURED
|