diff --git a/eoa/models.py b/eoa/models.py index 13e6039..937d3f6 100644 --- a/eoa/models.py +++ b/eoa/models.py @@ -2,6 +2,7 @@ from django.utils import timezone from django.core.validators import URLValidator, RegexValidator from django.contrib.postgres.fields import HStoreField +from django.contrib.postgres.validators import KeysValidator ################################################# # @@ -49,6 +50,13 @@ def filepath(instance, filename): ('it', 'Italian'), ) +default_char = { + 'max_length':200, + 'default':None, + 'blank':True, + 'null':True, + } + tagname_choice = ( ('hi', 'Highlight'), ('foreign', 'Foreign'), @@ -59,13 +67,6 @@ def filepath(instance, filename): ('graphic','Graphic'), ) -default_char = { - 'max_length':200, - 'default':None, - 'blank':True, - 'null':True, - } - validkeys = [ 'place', 'type', @@ -146,10 +147,17 @@ class Citation(models.Model): note = models.TextField(blank=True) def __str__(self): - return self.idstring + ': ' + self.title + if self.titlea: + ret = self.idstring + ': ' + self.titlea + elif self.titlem: + ret = self.idstring + ': ' + self.titlem + else: + ret = self.idstring + return ret + class Meta: - ordering = ('title',) + ordering = ('titlea',) class Author(models.Model): @@ -250,37 +258,45 @@ class Subsubsection(CommonTextPart): section = models.ForeignKey(Section, blank=False) subsection = models.ForeignKey(Subsection, blank=False) +########################################## +# +# All paragraph like objects contain , possibly nested and highlighted, text. To capture +# this highlighting, we use the MixedContent class. Each paragraph is first a MixedContent object, +# with a number of MixedContent children. The text itself is saved in the Text object. +# +########################################## + + class MixedContent(models.Model): - parent = models.ForeignKey(MixedContent, blank=True) - position = model.PositiveIntegerField(blank=False) + parent = models.ForeignKey('self', blank=True, null=True, + related_name='children') + position = models.PositiveIntegerField(blank=False) tagname = models.CharField(**default_char, - choices=tagname_choice, - default='de') + choices=tagname_choice) attributes = HStoreField( - validators=[ - KeysValidator( - validkeys, - strict=True, - messages = { - 'missing_keys': _('Some keys were missing: %(keys)s'), - 'extra_keys': _('Some unknown keys were provided: %(keys)s'), - } - ) - ] + blank=True, + null=True, + # validators=[ + # KeysValidator( + # validkeys, + # strict=True, + # messages = { + # 'missing_keys': 'Some keys were missing: %(keys)s', + # 'extra_keys': 'Some unknown keys were provided: %(keys)s', + # } + # ) + # ], ) - class Meta: - abstract = True - class Paragraph(models.Model): - publication = models.ForeignKey(Publication) + publication = models.ForeignKey(Publication,related_name='paragraphs') part = models.ForeignKey(Part, blank=True, null=True) chapter = models.ForeignKey(Chapter, blank=True, null=True) section = models.ForeignKey(Section, blank=True, null=True) subsection = models.ForeignKey(Subsection, blank=True, null=True) subsubsection = models.ForeignKey(Subsubsection, blank=True, null=True) idstring = models.CharField(max_length=100,blank=True) - text = models.ForeignKey(MixedContent,blank=False) + content = models.ForeignKey(MixedContent,blank=False) def __str__(self): return (self.publication.__str__() + ' ' + str(self.idstring)) @@ -289,7 +305,7 @@ class Meta: ordering = ('idstring',) class Text(models.Model): - parent = models.ForeignKey(MixedContent) + parent = models.ForeignKey(MixedContent,related_name='textcontent') position = models.PositiveIntegerField(blank=False) text = models.TextField(blank=True) pass diff --git a/eoa/tests.py b/eoa/tests.py index 30b307a..b49209a 100644 --- a/eoa/tests.py +++ b/eoa/tests.py @@ -4,6 +4,7 @@ from django.core.management import call_command from django.core.exceptions import ValidationError, NON_FIELD_ERRORS +from itertools import chain from io import StringIO from .models import * @@ -86,13 +87,110 @@ def test_add_publications(self): autList = [a.__str__() for a in pub1.authors.all()] self.assertEqual(sorted(autList),[a.__str__() for a in [aut2,aut1]]) -class ImporterCommandTests(TestCase): - def test_tei_importer(self): - " Test import of TEI docs." - out = StringIO() - call_command('tei_import', f='baernighausen.xml', stdout=out) - pub = Publication.objects.get(title='Into The Archive') - aut = pub.authors.get(lastname='Bärnighausen') - sec = Section.objects.get(order=1) - para = Paragraph.objects.filter(section=sec) - self.assertIn("Successfully read TEI file, by", out.getvalue()) +class CitationModelTest(TestCase): + def setUp(self): + cit1 = Citation.objects.create( + pubtype = 'book', + titlem = 'Testing citations', + date = 1909, + pupPlace = 'Berlin', + idstring = 'Mop1909', + ) + aut1 = Author.objects.create( + firstname='Test', + lastname='Ußer', + middlenames ='D.', + homepage='www.data.de', + email='test@test.ert') + + aut1.citations.add(cit1) + + def testCit(self): + aut1 = Author.objects.get(firstname='Test') + cit1 = Citation.objects.get(idstring = 'Mop1909') + self.assertEqual(aut1.citations.get(pk = cit1.pk),cit1) + self.assertEqual(cit1.__str__(), 'Mop1909: Testing citations') + + +class MixedContentModelTest(TestCase): + def setUp(self): + pub1 = Publication.objects.create( + series='studies', + language='de', + published=True, + featured=True, + publication_id=4, + publisher='Edition Open Access', + title='Test case publication', + subtitle='The revenge of editors', + description='Testing a longer sentence in a TextField requiers more typing or lorem ipsum.', + pages=20, + price=19.90, + created_date=timezone.now(), + published_date=timezone.now() + ) + mix1 = MixedContent.objects.create( + position = 0, + ) + mix2 = MixedContent.objects.create( + parent = mix1, + position = mix1.position, + tagname = 'hi', + attributes = {'rend':'bold'}, + ) + mix2text = Text.objects.create( + parent = mix2, + position = mix2.position, + text = 'Part 1:', + ) + txt1 = Text.objects.create( + parent = mix1, + position = 2, + text = 'Testing mixed content model', + ) + par1 = Paragraph.objects.create( + publication = pub1, + idstring = 'p1', + content = mix1, + ) + + def testMixed(self): + pub1 = Publication.objects.get(publication_id=4) + par1 = pub1.paragraphs.get(idstring='p1') + mix1 = MixedContent.objects.filter(position=0).exclude(parent=True) + con1 = par1.content + self.assertEqual(mix1[0].pk,con1.pk) + self.assertEqual( + [x.attributes for x in con1.children.filter(position=0)][0], + {'rend': 'bold'} + ) + self.assertEqual( + con1.children.filter(position=0)[0].textcontent.all()[0].text, + 'Part 1:') + res = [] + result_list = list(chain(con1.children.all(), con1.textcontent.all())) + for child in result_list: + try: + child.tagname + tag = child.tagname + value = child.attributes + for i in value: + attr = i + val = value[i] + text = child.textcontent.filter(position=child.position)[0].text + res.append('<{0} {1}="{2}">{3}'.format(tag,attr,val,text)) + except: + child.text + res.append((child.text)) + self.assertEqual(''.join(res),'Part 1:Testing mixed content model') + +# class ImporterCommandTests(TestCase): +# def test_tei_importer(self): +# " Test import of TEI docs." +# out = StringIO() +# call_command('tei_import', f='baernighausen.xml', stdout=out) +# pub = Publication.objects.get(title='Into The Archive') +# aut = pub.authors.get(lastname='Bärnighausen') +# sec = Section.objects.get(order=1) +# para = Paragraph.objects.filter(section=sec) +# self.assertIn("Successfully read TEI file, by", out.getvalue()) diff --git a/eoasite/settings.py b/eoasite/settings.py index 9e351b9..09a4c38 100644 --- a/eoasite/settings.py +++ b/eoasite/settings.py @@ -38,6 +38,7 @@ 'django.contrib.messages', 'django.contrib.staticfiles', 'django.contrib.sites', + 'django.contrib.postgres', 'cms', 'menus', 'treebeard', diff --git a/eoasite/settings_local.py b/eoasite/settings_local.py index ade2a84..bf0f97c 100644 --- a/eoasite/settings_local.py +++ b/eoasite/settings_local.py @@ -38,6 +38,7 @@ 'django.contrib.messages', 'django.contrib.staticfiles', 'django.contrib.sites', + 'django.contrib.postgres', 'cms', 'menus', 'treebeard', diff --git a/publications/management/commands/_importer_class.py b/publications/management/commands/_importer_class.py index d92f72d..249dbf4 100644 --- a/publications/management/commands/_importer_class.py +++ b/publications/management/commands/_importer_class.py @@ -378,21 +378,25 @@ def createSections(text,publication): def createParagraphs(text,publication): parList = [] for paragraph in text.paragraphs(): + print(paragraph) chapter = Chapter.objects.get(idstring=paragraph[0]) if paragraph[1]: section = Section.objects.get(idstring=paragraph[1]) + content = MixedContent.objects.update_or_create( + + ) parTemp, created = Paragraph.objects.update_or_create( chapter = chapter, section = section, idstring = paragraph[2], - text = paragraph[3], + #text = paragraph[3], publication = publication ) else: parTemp, created = Paragraph.objects.update_or_create( chapter = chapter, idstring = paragraph[2], - text = paragraph[3], + #text = paragraph[3], publication = publication ) parList.append(parTemp) @@ -400,9 +404,26 @@ def createParagraphs(text,publication): def createCitations(text,publication): citList = [] + autList = [] for cit in text.biblio(): + print(cit['author']) + try: + cit['author'].split(' ') + autTemp, created = Author.objects.update_or_create( + firstname = cit['author'].split(' ')[1], + lastname = cit['author'].split(' ')[-1], + middlename = cit['author'].split(' ')[1:-1], + ) + except: + pass + cit.pop('author') citTemp, created = Citation.objects.update_or_create( **cit ) + try: + autTemp.citations.add(citTemp) + autList.append(autTemp) + except: + pass citList.append(citTemp) - return citList + return citList, autList diff --git a/publications/management/commands/tei_import.py b/publications/management/commands/tei_import.py index 2637f58..8b25c0e 100644 --- a/publications/management/commands/tei_import.py +++ b/publications/management/commands/tei_import.py @@ -37,7 +37,8 @@ def handle(self,*args, **options): } # REformat highlighting from ... to {rend: italic, text} # otherwise it makes the dictionary structure unreadable - readfile = re.sub('(.+?)','{rend: \g<1>, \g<2>}',infile.read()) + #readfile = re.sub('(.+?)','{rend: \g<1>, \g<2>}',infile.read()) + readfile = infile.read() # parsing to dict tei_dict = xmltodict.parse(readfile, process_namespaces=True, namespaces=namespaces) @@ -55,7 +56,7 @@ def handle(self,*args, **options): sections = createSections(text,pub) - paragraphs = createParagraphs(text,pub) + #paragraphs = createParagraphs(text,pub) citations = createCitations(text,pub) diff --git a/scripts/run_local_tests.sh b/scripts/run_local_tests.sh index 552a21b..c9f50f6 100755 --- a/scripts/run_local_tests.sh +++ b/scripts/run_local_tests.sh @@ -3,11 +3,15 @@ pth=$(pwd) prjdir=${pth%/*} +# Reset migrations for database +python manage.py migrate --settings=eoasite.settings_local --fake eoa +python manage.py migrate --settings=eoasite.settings_local --fake +rm -rf eoa/migrations/* +touch eoa/migrations/__init__.py #cd $prjdir python manage.py makemigrations eoa --settings=eoasite.settings_local python manage.py migrate eoa --settings=eoasite.settings_local -python manage.py makemigrations publications --settings=eoasite.settings_local -python manage.py migrate publications --settings=eoasite.settings_local +python manage.py makemigrations --settings=eoasite.settings_local python manage.py migrate --settings=eoasite.settings_local python manage.py test --settings=eoasite.settings_local