# Load data into Azure Cosmos DB for NosQL API

This notebook demonstrates how to load data into Azure Cosmos DB from Cosmic Works JSON files from Azure Storage into the database using the NoSQL API.

In [1]:
import os
import json
import requests
from models import Product, ProductList, Customer, CustomerList, SalesOrder, SalesOrderList
from azure.cosmos import CosmosClient, DatabaseProxy, ContainerProxy
from dotenv import load_dotenv

## Establish a connection to the database

In [None]:
load_dotenv()
CONNECTION_STRING = os.environ.get("COSMOS_DB_CONNECTION_STRING")

# Initialize the Azure Cosmos DB client
client = CosmosClient.from_connection_string(CONNECTION_STRING)

# Create or load the cosmic_works_pv database
database_name = "cosmic_works_pv"
db = client.create_database_if_not_exists(id=database_name)

## Load products

In [3]:
# Add product data to database using upsert
# Get cosmic works product data from github
product_raw_data = "https://cosmosdbcosmicworks.blob.core.windows.net/cosmic-works-small/product.json"
product_data = ProductList(items=[Product(**data) for data in requests.get(product_raw_data).json()])

# Create or retrieve the product container
product_container: ContainerProxy = db.create_container_if_not_exists(
           id="product",
           partition_key={"paths": ["/categoryId"], "kind": "Hash"}
       )

# Upsert the product data to the container
for product in product_data.items:
    product_container.upsert_item(product.model_dump(by_alias=True))

## Load customers and sales raw data

In this repository, the customer and sales data are stored in the same file. The `type` field is used to differentiate between the two types of documents.

In [4]:
customer_sales_raw_data = "https://cosmosdbcosmicworks.blob.core.windows.net/cosmic-works-small/customer.json"
response = requests.get(customer_sales_raw_data)
# override decoding
response.encoding = 'utf-8-sig'
response_json = response.json()
# filter where type is customer
customers = [cust for cust in response_json if cust["type"] == "customer"]
# filter where type is salesOrder
sales_orders = [sales for sales in response_json if sales["type"] == "salesOrder"]

## Load customers

In [5]:
customer_data = CustomerList(items=[Customer(**data) for data in customers])
# Create or retrieve the customer container
customer_container: ContainerProxy = db.create_container_if_not_exists(
           id="customer",
           partition_key={"paths": ["/customerId"], "kind": "Hash"}
       )

# Upsert the customer data to the container
for customer in customer_data.items:
    # Use json encoding to work around issue with datetime serialization
    customer_json = customer.model_dump_json(by_alias=True)
    customer_dict = json.loads(customer_json)
    customer_container.upsert_item(customer_dict)

## Load sales orders

In [6]:
sales_order_data = SalesOrderList(items=[SalesOrder(**data) for data in sales_orders])
# Create or retrieve the salesOrder container
sales_order_container: ContainerProxy = db.create_container_if_not_exists(
           id="salesOrder",
           partition_key={"paths": ["/customerId"], "kind": "Hash"}
       )

# Upsert the sales order data to the container, this will take approximately 1.5 minutes to run
for sales_order in sales_order_data.items:
    # Use json encoding to work around issue with datetime serialization
    sales_order_json = sales_order.model_dump_json(by_alias=True)
    sales_order_dict = json.loads(sales_order_json)
    sales_order_container.upsert_item(sales_order_dict)

## Clean up

No clean up is necessary as this data is used in subsequent labs.