Files
bertalign/tests/chunk_test.py
2025-02-18 03:39:44 +06:00

52 lines
1.6 KiB
Python

from typing_extensions import override
import unittest
import os
from bertalign.chunk import create_aligned_chunks, TextChunk, AlignmentError
class TestChunk(unittest.TestCase):
data_dir: str
source_file: str
target_file: str
source_text: str = ""
target_text: str = ""
def __init__(self, methodName: str = "runTest") -> None:
super().__init__(methodName)
self.data_dir = os.path.join(os.path.dirname(__file__), "data")
self.source_file = os.path.join(self.data_dir, "ri_4.zh") # Source text file
self.target_file = os.path.join(self.data_dir, "ri_4.en") # Target text file
@override
def setUp(self):
# Load source and target text from files
with open(self.source_file, "r", encoding="utf-8") as f:
self.source_text = f.read()
with open(self.target_file, "r", encoding="utf-8") as f:
self.target_text = f.read()
def test_create_aligned_chunks(self):
chunks = create_aligned_chunks(
self.source_text, self.target_text, max_chars=500
)
self.assertIsInstance(chunks, list)
self.assertTrue(all(isinstance(chunk, TextChunk) for chunk in chunks))
for chunk in chunks:
self.assertIsInstance(chunk.source_text, str)
self.assertIsInstance(chunk.target_text, str)
self.assertGreater(len(chunks), 0)
@unittest.skip("no")
def test_create_aligned_chunks_empty_input(self):
c = create_aligned_chunks("", self.target_text)
self.assertIsNone(c)
if __name__ == "__main__":
_ = unittest.main()