Am working on a django project which allows a user to upload an excel file containing data and the system reads, cleans, saves the data in a json format and gives insihts from the saved data.
Now, i want if a user uploads an excel file containing the same columns as the columns in the db, the system should update or create new fields.
Below is my models.py
from django.db import models
class UploadedFile(models.Model):
file = models.FileField(upload_to="data-files/")
uploaded_at = models.DateTimeField(auto_now_add=True)
def __str__(self):
return f"{self.file} uploaded at {self.uploaded_at}"
class CleanedData(models.Model):
data = models.JSONField()
uploaded_file = models.ForeignKey(UploadedFile, on_delete=models.CASCADE)
created_at = models.DateTimeField(auto_now_add=True)
And below are the required views
def upload_data(request):
if request.method == "POST":
form = forms.UploadFileForm(request.POST, request.FILES)
if form.is_valid():
uploaded_file = form.save()
return redirect("process_data", pk=uploaded_file.pk)
else:
form = forms.UploadFileForm()
context = {
"form": form,
}
return render(request, "upload_data.html", context)
def process_data(request, pk):
uploaded_file = get_object_or_404(models.UploadedFile, pk=pk)
file_path = os.path.join(settings.MEDIA_ROOT, uploaded_file.file.name)
# reading the excel file
df = pd.read_excel(file_path)
# cleaning the data
df_cleaned = clean_data(df)
# saving the cleaned data to the db
save_data_to_db(df_cleaned, uploaded_file)
return render(request, "success.html")
def clean_data(df):
# replacing NaN with empty strings
df = df.fillna("")
return df
def save_data_to_db(df, uploaded_file):
cleaned_data = models.CleanedData(data=df.to_dict(orient="records"), uploaded_file=uploaded_file)
cleaned_data.save()
def visualize_data(request, pk):
cleaned_data = models.CleanedData.objects.get(pk=pk)
df = pd.DataFrame(cleaned_data.data)
# Calculate insights
training_received_counts = df['Training Received '].value_counts().to_dict()
form_of_land_access_counts = df['Form of land access'].value_counts().to_dict()
# Example visualization using Plotly
if 'Training Received ' in df.columns:
fig = px.histogram(df, x='Training Received ')
plot_div = plot(fig, output_type='div')
else:
plot_div = "Column 'Training Received ' not found in the data"
context = {
'plot_div': plot_div,
'training_received_counts': training_received_counts,
'form_of_land_access_counts': form_of_land_access_counts,
}
return render(request, 'visualization.html', context)
What am getting is that whenever a user uploads a file, its data is saved independently regardless of whether there is another data with the same columns and rows