fix: faiss stuff, api
This commit is contained in:
@@ -1,7 +1,18 @@
|
||||
from typing_extensions import override
|
||||
import unittest
|
||||
import os
|
||||
from bertalign.chunk import create_aligned_chunks, TextChunk, AlignmentError
|
||||
from typing_extensions import override
|
||||
from bertalign.aligner import Bertalign
|
||||
from bertalign.chunk import TextChunk, split_into_paragraphs
|
||||
|
||||
|
||||
def print_chunks(chunks: list[TextChunk]) -> None:
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f"\nChunk {i}:")
|
||||
print(f"Source text ({len(chunk.source_text)} chars):")
|
||||
print(chunk.source_text)
|
||||
print(f"\nTarget text ({len(chunk.target_text)} chars):")
|
||||
print(chunk.target_text)
|
||||
print("-" * 80)
|
||||
|
||||
|
||||
class TestChunk(unittest.TestCase):
|
||||
@@ -28,22 +39,27 @@ class TestChunk(unittest.TestCase):
|
||||
self.target_text = f.read()
|
||||
|
||||
def test_create_aligned_chunks(self):
|
||||
chunks = create_aligned_chunks(
|
||||
self.source_text, self.target_text, max_chars=500
|
||||
)
|
||||
src_ps = split_into_paragraphs(self.source_text)
|
||||
tgt_ps = split_into_paragraphs(self.target_text)
|
||||
aligner = Bertalign(src_ps, tgt_ps, src_lang="zh", tgt_lang="en")
|
||||
chunks = aligner.chunk(300)
|
||||
|
||||
self.assertIsInstance(chunks, list)
|
||||
self.assertTrue(all(isinstance(chunk, TextChunk) for chunk in chunks))
|
||||
|
||||
for chunk in chunks:
|
||||
self.assertIsInstance(chunk.source_text, str)
|
||||
self.assertIsInstance(chunk.target_text, str)
|
||||
|
||||
print_chunks(chunks)
|
||||
|
||||
self.assertGreater(len(chunks), 0)
|
||||
|
||||
@unittest.skip("no")
|
||||
def test_create_aligned_chunks_empty_input(self):
|
||||
c = create_aligned_chunks("", self.target_text)
|
||||
src_ps = split_into_paragraphs(self.source_text)
|
||||
tgt_ps = split_into_paragraphs(self.target_text)
|
||||
aligner = Bertalign(src_ps, tgt_ps, src_lang="zh", tgt_lang="en")
|
||||
c = aligner.chunk()
|
||||
self.assertIsNone(c)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user