Avoiding Duplicate Entries When One or More Fields Are None

Hi everyone,

I’m working on a Django model for storing addresses, and I need to enforce uniqueness across five fields: street, house_number, city, state, and zip_code. The challenge is that one or more of these fields can be None, and I want to avoid duplicates even in such cases.

Here’s my model:

class Address(models.Model):
    street = models.CharField(max_length=255, null=True, blank=True)
    house_number = models.CharField(max_length=50, null=True, blank=True)
    city = models.CharField(max_length=100, null=True, blank=True)
    state = models.CharField(max_length=2, null=True, blank=True)
    zip_code = models.CharField(max_length=10, null=True, blank=True)
    
    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=['street', 'house_number', 'city', 'state', 'zip_code'],
                name='unique_address_with_nulls',
            )
        ]

What I’m Trying to Achieve:

  • Prevent duplicate entries where all non-None fields match.
  • For example, these should be considered duplicates:
    1. street="123 Elm", house_number=None, city="Springfield", state="CA", zip_code="12345"
    2. street="123 Elm", house_number=None, city="Springfield", state="CA", zip_code="12345"

What is the best and most efficient way to achieve this?

Thank you!

If you’re using Postgres I’d suggest having a look at nulls_distinct otherwise you’ll likely have to coalese your nullable fields to a sentinel value to force collisions of nulls.

Also, you could use a functional unique constraint: Constraints reference | Django documentation | Django

class Address(models.Model):
    street = models.CharField(max_length=255, null=True, blank=True)
    house_number = models.CharField(max_length=50, null=True, blank=True)
    city = models.CharField(max_length=100, null=True, blank=True)
    state = models.CharField(max_length=2, null=True, blank=True)
    zip_code = models.CharField(max_length=10, null=True, blank=True)
    
    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=['street', 'house_number', 'city', 'state', 'zip_code'],
                name='unique_address_with_nulls',
                condition=models.Q(),  # Ensure that the constraint applies to all rows.
                deferrable=models.Deferrable.DEFERRED,  # Optional: it helps with bulk operations in case you need it
                expressions=[
                    models.functions.Coalesce('street', models.Value('')),
                    models.functions.Coalesce('house_number', models.Value('')),
                    models.functions.Coalesce('city', models.Value('')),
                    models.functions.Coalesce('state', models.Value('')),
                    models.functions.Coalesce('zip_code', models.Value(''))
                ]
            )
        ]

Thank you both charettes and anefta and apologies for the late reply. I’m on 4.2 and unlikely to upgrade to 5 soon.

I did the coalescing to nullable fields and it works. Here’s the model

class Address(models.Model):
    street = models.CharField(max_length=255, null=True, blank=True)
    house_number = models.CharField(max_length=150, null=True, blank=True)
    city = models.CharField(max_length=255, blank=True, null=True)
    state = models.CharField(max_length=2, null=True, blank=True)
    zip_code = USZipCodeField(null=True, blank=True)
    country = models.CharField(max_length=255, null=True, blank=True)
    latitude = models.FloatField(validators=[
        MinValueValidator(-90.0),
        MaxValueValidator(90.0),
    ], null=True, blank=True)
    longitude = models.FloatField(validators=[
        MinValueValidator(-180.0),
        MaxValueValidator(180.0),
    ], null=True, blank=True)
    
    class Meta:
        constraints = [
            models.UniqueConstraint(fields=['street', 'house_number', 'city', 'state', 'zip_code'], name='unique_address_new')
        ]
        verbose_name_plural = 'Adresses'
        indexes = [
            models.Index(fields=['zip_code', 'city', 'state']),
            models.Index(fields=['latitude', 'longitude']),
        ]

    def __str__(self):
        address_parts = [self.street, self.house_number, self.city, self.state, self.zip_code, self.country]
        return ', '.join(filter(None, address_parts))
    
    @property
    def full_address(self):
        return self.__str__()

    def get_coordinates(self):
        return (self.latitude, self.longitude)
    
    def clean(self):
        if self.city:
            self.city = self.city.title()
        if self.street:
            self.street = self.street.title()
        if self.country:
            self.country = self.country.title()
        if self.house_number:
            self.house_number = self.house_number.title()
            
        # Replace None with an empty string for uniqueness checks
        self.street = self.street or ''
        self.house_number = self.house_number or ''
        self.city = self.city or ''
        self.state = self.state or ''
        self.zip_code = self.zip_code or ''
        self.country = self.country or ''
            

    def save(self, *args, **kwargs):
        self.full_clean()  # Ensures clean() is called before saving
        super().save(*args, **kwargs)