Proper model relationships for many models

I am trying to build an app which will allow me to collect different types of media and use it with other apps. The models.py for this app looks like this:

from django.db import models as m
from django.utils.translation import ugettext as _
from django.contrib.auth.models import User
from eventmanager.models import Concert


########################################################################################################################
# Prototypes                                                                                                           #
########################################################################################################################
class BaseFile(m.Model):
    """ """
    class Meta:
        abstract = True
    # choices
    AUDIO = 'a'
    VIDEO = 'v'
    IMAGE = 'i'
    OTHER = 'o'
    file_type_choices = (
        (AUDIO, _('audio')),
        (VIDEO, _('video')),
        (IMAGE, _('image')),
        (OTHER, _('other')),)
    # properties
    added_by = m.ForeignKey(User, null=True, default=None, on_delete=m.SET_NULL)
    file_type = m.CharField(max_length=1, choices=file_type_choices)
    size = m.IntegerField()  # file size
    extension = m.CharField(max_length=5)  # file extension
    downloadable = m.BooleanField(default=False)


class File(BaseFile):
    """ """
    class Meta:
        abstract = True
    # properties
    internal = m.BooleanField()
    file = m.FileField()
    host = m.CharField(max_length=100, default='', blank=True)
    url = m.URLField(blank=True)


class BaseImage(BaseFile):
    """ """
    class Meta:
        abstract = True
    # properties
    image = m.ImageField()


class DataContainer(m.Model):
    """ """
    class Meta:
        abstract = True
    # choices
    TRACK = 'tr'
    LIVERECORDING = 'lr'
    IMAGE = 'im'
    FLYER = 'fl'
    MUSICVIDEO = 'mv'
    LIVEVIDEO = 'lv'
    OTHER = 'ot'
    media_type_choices = (
        (TRACK, _('track')),
        (LIVERECORDING, _('live recording')),
        (IMAGE, _('image')),
        (FLYER, _('flyer')),
        (MUSICVIDEO, _('music video')),
        (LIVEVIDEO, _('live video')),
        (OTHER, _('other')),)
    # properties
    name = m.CharField(max_length=255)
    media_type = m.CharField(max_length=2, choices=media_type_choices)
    cr_date = m.DateTimeField(auto_now_add=True, editable=False, verbose_name=_('Date of Creation'))  # date of creation


########################################################################################################################
# Single Items                                                                                                         #
########################################################################################################################
class Image(BaseImage, DataContainer):
    """ An image file with a one-to-one relationship with a caption. """
    pass


class Track(File, DataContainer):
    """ A music track. """
    pass


class Video(File, DataContainer):
    """ A video file. """
    pass


class Flyer(BaseImage, DataContainer):
    """ A concert flyer. """
    concertmedia = m.ForeignKey('ConcertMedia', related_name='flyer')


########################################################################################################################
# Collections                                                                                                          #
########################################################################################################################
class ConcertMedia(m.Model):
    """ A collection of media associated with a concert: flyer, poster, photo gallery, audio recording, video recording,
        etc... """
    concert = m.ForeignKey(Concert, default=None, null=True, on_delete=m.SET_NULL)

Iā€™m trying to figure out how to best connect different models to one another. Take for instance the ConcertMedia. Itā€™s a container that has a relationship to concert which lives in the eventmanager app. A flyer and a poster would both be inhereting from image. So basically my question is this: What is a better way to provide the concert datacontainer with the flyer and the poster?

a. Create a submodel for flyer and one for poster inhereting from image. Give both of them a foreign key relationship to concert media.
b. Create a submodel for flyer and one for poster inhereting from image. Give image a foreign key relationship to concert media.
c. Only use the image model. Give the image model foreign key relationships to all the other models, where images will be used, for instance concert media, but also user avater, etcā€¦
d. Only use the image model. Give the image model a GenericForeignKey. Define a relationship to image on all the models that I want to have images.

Which option do you prefer and why? Please bear in mind that I have I will have to add additional models to this class during initial development but also later on. So the structure should be easily extendable.
Is there any other ways to relate models to one another?

<opinion>
I get the immediate feeling that these classes are ā€œover-specifiedā€ - they almost have a C#/Java ā€œfeelā€ to them.

Iā€™d start by rethinking this structure. When I look at this, I see basically a file storage facility and some metadata around collections of those files. So my first step would be to reduce the number of classes being created, and therefore the number of tables being created.

For example, and Iā€™m just winging this, so Iā€™m sure thereā€™s more than one error here, but Iā€™d probably be looking at this approach:

class Files:
    name = m.CharField(...)
    media_type = m.CharField(...)
    ...
    objects = m.Manager()
    tracks = TrackManager()
    image = ImageManager()
    video = VideoManager()

class TrackManager(m.Manager):
    def get_queryset(self):
        return super().get_queryset().filter(media_type='TRACK')

class ImageManager(m.Manager):
    def get_queryset(self):
        return super().get_queryset().filter(media_type='IMAGE')

and so on.

Take advantage of Pythonā€™s and Djangoā€™s features to reduce the amount of code you write. Write what is needed, not what you think you might need.
<opinion>

Ken

I get the immediate feeling that these classes are ā€œover-specifiedā€ - they almost have a C#/Java ā€œfeelā€ to them.

I thought so as well. Iā€™ll do some reading up on managers. Thanks for the hint. However, Iā€™m thinking the managers only help me to manage access to types of models. It doesnā€™t really help me with connecting models to one another or is there an aspect that Iā€™m overlooking?

Correct, it doesnā€™t address the ForeignKey relationships.

However, Iā€™ve generally found that once Iā€™ve identified the proper model structures, the relationships become more clear.

For example:

In the context of the previous discussion, why should there be different types / models for flyers and posters? Is that a distinction that really needs to be made at the model layer? If not, then this question becomes moot.

And thatā€™s where Iā€™m going with this. Simplify the models, only make distinctions where those distinctions need to be made, and some number of your questions just go away.

Ken

1 Like

I have to admit that I was originally taught programming at engineering school with Java.

Is that a distinction that really needs to be made at the model layer?

This question was useful to me. And then I read up a little bit on GenericForeignKey relationships. Letā€™s say I have a class images for all the image applications in my project. So image would need to be associated with blog post entry, with concert event, and so on. The knot in my head: I was thinking about this in terms of foreign key relationships and concluded it would make sense to have a model for each application so that I donā€™t end up with a model that has 10 foreign key fields but only ever uses one.
Now I have decided to give image a GenericForeignKey and make the type distinction a property rather then to resolve it at the model layer which leaves me with two concrete classes: file and image. I can just use those everywhere where they are needed with GenericForeignKey.

So Iā€™m going to toss out one more possibility to you:

Superficially, it seems like it might be possible to use an image with more than one ā€œdocumentā€. In other words, what you really end up with is a many-to-many relationship between (say) blog posts and images, or where an image might be used in both a flyer and a blog post.

In this situation, this is a case where I would consider having individual many-to-many relationship tables between the ā€œdocumentsā€ and ā€œmediaā€ classes.

Again, winging this on the fly, I could see creating a base abstract ā€œdocumentā€ class defining the many-to-many with the images class and any other common fields. The implementations of that class (blogs, flyers, posters, etc) would contain those elements specific to that type of document.

<opinion>
Iā€™m not a huge fan of GFKs, except in a very small number of special circumstances where theyā€™re really needed. (Among other sources, this blog post expresses a lot of what I think about the topic - https://lukeplant.me.uk/blog/posts/avoid-django-genericforeignkey/)
The issues around data integrity have bitten me in the past - and so Iā€™m more reluctant to use them now than I used to be.
</opinion>

Hmā€¦
The article is interesting, however it has a relationship between one model that can be related to two different models. That is rather basic. My image model would be relating to way more other models. Thereā€™s one thing I want in my project: I want all the media files, (music, video, images) to be piling up so that they can all be automatically become part of an archive which I want to make available for users.

The image model in the final project would relate to (the name in brackets is the associated app):

  • concerts as flyer and/or poster (eventmanager)
  • blog entries as content (blog)
  • image collections, for instance photo galleries (datacontainer)
  • records as front and back cover (datacontainer)
  • profile as avatar (user_profile)
  • contact entry as avatar (contact manager)
  • band as the logo (contact manager)
  • archive entries (archive, I have no idea yet what this is gonna look like)

I suppose I could write a class ImageAttachedModel() and let all the models that will have images attached to them inherit from that. But that would mean that I would have to have a non-abstract base class from which they all inherit. So thereā€™s already to tables being queried.
But then I think that the way I want to use files and images is very comparable to the tagging app from the django documentation which is also one of the examples that the article describes as a proper use case for GFK. A tag is something you can attach to all sorts of elements on a site. Just like my files and images are additional things that are attached to all sorts of elements.
I think Iā€™ll hav to think some more about this. :slightly_smiling_face:

Actually, your ā€œImageAttachedModelā€ could be an abstract base class, but either way would work.

I donā€™t shy away from creating tables - database engines are designed to optimize join operations between tables. Iā€™d much rather have a set of many-to-many tables relating ā€œdocumentā€ types to ā€œimagesā€ than GFKs. Using a GFK prevents those types of optimizations from being used.

What I try to avoid is building models based on what I think the functionality is going to be rather than building them based on the entities that theyā€™re representing.

I was under the impression that less tables is always better, but struggled with applying this idea properly during design.

I think my main problem is that I lack some experience and that my project is somewhat ambitious for the level of my skill. Going back to the flyer and the poster, they represent two different entities which is why I originally decided to have them as two seperate modelsā€¦

A question on the many-to-many-relationship defined at BaseModelLevel: Say I have one between BaseImage and BaseDocument, during runtime I would of coure work with BlogPost and Image, when I use the manager, will it return BaseImage instances or will it return Image instances?

Fewer unnecessary tables are always better. Or, as one of my DB architect friends once explained it to me, focus on the commonalities, not the differences.

But do they really? (Thatā€™s an honest question, I donā€™t know your requirements well enough to make that determination.) Have you identified all the attributes for both to know what the union and intersections are of the columns needed for each?

The direct M2M link is going to return the references to the base classes. But, remember that the base classes are linked to the child classes via a one-to-one field, allowing you direct reference to the child class.

However, doing this type of multi-m2m style relationships between abstract class structures actually becomes a bit awkward in practice because the relationship doesnā€™t track which child class is related to the parent class. You end up having to either track it as additional data in the m2m table (which can then be used in filters) or checking to see if the ā€˜_setā€™ relationships have any entries.

Upon further reflection then, I guess Iā€™d want to see a more detailed description of what these different ā€œdocumentsā€ and ā€œimagesā€ need to contain in the way of information needing to be stored, before trying to create the models for them. It doesnā€™t appear trivial under any set of conditions, but you could end up with a real mess if youā€™re not careful.

When you look at trees for too long, you might forget that there is a forest. :slightly_smiling_face:

So I had an original design plan, but it changed a lot while trying to implement it. For instance in the beginning I thought of the multilanguage text as its own app, but now I found a simpler way to do it with JSON-fields. So after your advice with the ManyToMany-relationships I went back to the drawing board. I now have a much simpler base design pattern:

Archive might not even have to be its own app anymore. My DataContainer model got a lot bigger though. I decided instead of having separate models for File and Image, Iā€™ll just have one model with a FileField and an ImageField. That is the only part that bothers me. My DataContainer model has three groups of fields where only one group will be used at a time.
Hereā€™s what my datacontainer/models.py looks like if you want to see it. In case you notice anything, that looks like bad design, feel free to utter it. :wink:

import datetime
from django.db import models as m
from django.utils.translation import ugettext as _
from django.contrib.auth.models import User
from eventmanager.models import Concert
from blog.models import BaseEntry


########################################################################################################################
# Single Item                                                                                                         #
########################################################################################################################
def archive_path(instance, filename):
    # file will be uploaded to MEDIA_ROOT/archive/<file_type>s/<filename>
    return 'archive/{0}s/{1}_{2}'.format(instance.file_type, datetime.datetime.now().strftime('%Y%m%d%H%M%S'), filename)


class BaseDataContainer(m.Model):
    class Meta:
        abstract = True
    # TODO relationships to Event?
    # events = m.ManyToManyField()
    blog_entries = m.ManyToManyField(BaseEntry)


class DataContainer(m.Model):
    # choices
    # type of file
    AUDIO = 'a'
    VIDEO = 'v'
    IMAGE = 'i'
    OTHER = 'o'
    file_type_choices = (
        (AUDIO, _('audio')),
        (VIDEO, _('video')),
        (IMAGE, _('image')),
        (OTHER, _('other')),)
    # render information for this hoster
    YOUTUBE = 'yt'
    render_type_choices = (
        (YOUTUBE, _('YouTube')),
    )
    # type of media
    TRACK = 'track'
    LIVERECORDING = 'liverecording'
    IMAGE = 'image'
    FLYER = 'flyer'
    POSTER = 'poster'
    MUSICVIDEO = 'musicvideo'
    LIVEVIDEO = 'livevideo'
    OTHER = 'other'
    media_type_choices = (
        (TRACK, _('track')),
        (LIVERECORDING, _('live recording')),
        (IMAGE, _('image')),
        (FLYER, _('flyer')),
        (POSTER, _('poster')),
        (MUSICVIDEO, _('music video')),
        (LIVEVIDEO, _('live video')),
        (OTHER, _('other')),)
    # automatic properties
    cr_date = m.DateTimeField(auto_now_add=True, editable=False, verbose_name=_('Date of Creation'))  # date of creation
    # data type
    # common properties
    internal = m.BooleanField()
    file_type = m.CharField(max_length=1, choices=file_type_choices)
    media_type = m.CharField(max_length=30, choices=media_type_choices)
    # external - externally hosted data
    render_type = m.CharField(max_length=2, choices=render_type_choices)  # a renderer for the hoster of this file
    url = m.URLField(blank=True)
    # internal
    size = m.IntegerField()  # file size
    extension = m.CharField(max_length=5)  # file extension
    downloadable = m.BooleanField(default=False)
    # file - internally hosted file
    file = m.FileField(upload_to=archive_path)
    # image - internally hosted file
    image = m.ImageField(upload_to=archive_path)
    # meta information
    name = m.CharField(max_length=255)
    added_by = m.ForeignKey(User, null=True, default=None, on_delete=m.SET_NULL)
    created_on = m.DateTimeField(blank=True)  # When was this media created?
    created_by = m.CharField(max_length=255)  # Who created it?
    created_at = m.CharField(max_length=255)  # Where was it created?
    notes = m.TextField()  # Notes for internal use


########################################################################################################################
# Collections                                                                                                          #
########################################################################################################################
class DataCollection(BaseDataContainer):
    class Meta:
        abstract = True
    cr_date = m.DateTimeField(auto_now_add=True, editable=False, verbose_name=_('Date of Creation'))  # date of creation


class PhotoGallery(DataCollection):
    name = m.CharField(max_length=255)
    photos = m.ManyToManyField(DataContainer, related_name='photo_gallery')


class ConcertMedia(DataCollection):
    concert = m.ForeignKey(Concert, on_delete=m.SET_NULL, null=True)
    flyer = m.ManyToManyField(DataContainer, related_name='cm_flyer')
    poster = m.ManyToManyField(DataContainer, related_name='cm_poster')
    photo_gallery = m.ManyToManyField(DataContainer, related_name='cm_photo_gallery')
    audio_recording = m.ManyToManyField(DataContainer, related_name='cm_audio_recording')
    video_recording = m.ManyToManyField(DataContainer, related_name='cm_video_recording')

Thanks for all the advice so far, it has been very useful.