..

mongodb

TODO

  • Basic queries
  • Concepts of Arrays
  • Aggregates
  • cursors
  • MapReduce
  • functions
  • innovative queries

Implementation

Loading dataset

Pasted image 20231121130843 Pasted image 20231121130941

Average rating per state

db.business.aggregate([
    {
        $group: {
            _id: "$state",
            avgStars: { $avg: "$stars" }
        }
    }
])

Pasted image 20231121131453

Number of restaurants per state

db.business.aggregate([
    {
        $group: {
            _id: "$state",
            count: { $sum: 1 }
        }
    },
    {
        $project: {
            _id: 0,
            state: "$_id",
            businessCount: "$count"
        }
    }
])

Pasted image 20231121131947

Number of distinct Restaurants

db.business.aggregate([
    {
        $group: {
            _id: null,
            distinctNameCount: { $addToSet: "$name" }
        }
    },
    {
        $project: {
            _id: 0,
            distinctNameCount: { $size: "$distinctNameCount" }
        }
    }
])

Pasted image 20231121132043

Top Rated Restaurant in each state

db.business.aggregate([
    {
        $lookup: {
            from: "business",
            let: { state: "$state", stars: "$stars" },
            pipeline: [
                {
                    $group: {
                        _id: "$state",
                        maxStars: { $max: "$stars" }
                    }
                },
                {
                    $match: {
                        $expr: {
                            $and: [
                                { $eq: ["$$state", "$_id"] },
                                { $eq: ["$$stars", "$maxStars"] }
                            ]
                        }
                    }
                }
            ],
            as: "maxStarsData"
        }
    },
    {
        $unwind: "$maxStarsData"
    },
    {
        $match: {
            $expr: {
                $and: [
                    { $eq: ["$state", "$maxStarsData._id"] },
                    { $eq: ["$stars", "$maxStarsData.maxStars"] }
                ]
            }
        }
    },
    {
        $project: {
            _id: 0,
            state: 1,
            stars: 1,
            name: 1
        }
    }
])

Pasted image 20231121132144

Restaurants with the most branches

db.business.aggregate([
    {
        $group: {
            _id: "$name",
            locations: { $addToSet: "$address" },
            count: { $sum: 1 }
        }
    },
    {
        $match: {
            count: { $gt: 1 }
        }
    },
    {
        $sort: {
            count: -1
        }
    },
    {
        $project: {
            _id: 0,
            name: "$_id",
            locations: 1
        }
    }
])

Pasted image 20231121132304

Restaurants which covers the most states

db.business.aggregate([
    {
        $group: {
            _id: "$name",
            uniqueStates: { $addToSet: "$state" },
            count: { $sum: 1 }
        }
    },
    {
        $match: {
            count: { $gt: 1 }
        }
    },
    {
        $sort: {
            count: 1
        }
    },
    {
        $project: {
            _id: 0,
            name: "$_id",
            state: { $size: "$uniqueStates" }
        }
    }
])

Pasted image 20231121132444

Number of branches per state

db.business.aggregate([
    {
        $group: {
            _id: {
                name: "$name",
                state: "$state"
            },
            addressCount: { $sum: 1 }
        }
    },
    {
        $match: {
            addressCount: { $gt: 1 }
        }
    },
    {
        $sort: {
            addressCount: 1
        }
    },
    {
        $project: {
            _id: 0,
            name: "$_id.name",
            state: "$_id.state",
            address: "$addressCount"
        }
    }
])

Pasted image 20231121132531

Complex Queries

db.user.aggregate([
    {
        $project: {
            name: 1,
            nr_friends: {
                $size: {
                    $ifNull: [
                        { $split: ["$friends", ","] },
                        []
                    ]
                }
            }
        }
    },
    {
        $sort: {
            nr_friends: -1
        }
    }
])

Pasted image 20231121135348

Correlation Coefficient of review_count and stars

db.business.aggregate([
    {
        $project: {
            stars: { $toDouble: "$stars" },
            review_count: { $toDouble: "$review_count" }
        }
    },
    {
        $group: {
            _id: null,
            review_and_stars: { $push: { stars: "$stars", review_count: "$review_count" } }
        }
    },
    {
        $project: {
            _id: 0,
            correlation: {
                $function: {
                    body: "function calculateCorrelation(a, b) {const n = a.length, m = a.reduce((s, v) => s + v, 0) / n, k = b.reduce((s, v) => s + v, 0) / n, cov = Array.from({ length: n }, (_, i) => (a[i] - m) * (b[i] - k)).reduce((s, v) => s + v, 0), var1 = a.reduce((s, v) => s + (v - m) * (v - m), 0), var2 = b.reduce((s, v) => s + (v - k) * (v - k), 0); return cov / Math.sqrt(var1 * var2);}
",
                    args: ["$review_and_stars.stars", "$review_and_stars.review_count"],
                    lang: "js"
                }
            }
        }
    }
])

Pasted image 20231121140227

Most visited restaurant

db.checkin.aggregate([
    {
        $project: {
            business_id: 1,
            _id: 0,
            nr_visited: { $size: { $split: ["$date", ","] } }
        }
    },
    {
        $lookup: {
            from: "business",
            localField: "business_id",
            foreignField: "business_id",
            as: "business_info"
        }
    },
    {
        $unwind: "$business_info"
    },
    {
        $project: {
            _id: 0,
            name: "$business_info.name",
            nr_visited: 1
        }
    }
])

Pasted image 20231121142036

Which year produced the most number of elite users

db.user.aggregate([
    {
        $project: {
            year: { $toInt: { $substr: ["$yelping_since", 0, 4] } },
            nr_elite: { $size: { $split: ["$elite", ','] } }
        }
    },
    {
        $group: {
            _id: "$year",
            total_nr_elite: { $sum: "$nr_elite" }
        }
    },
    {
        $sort: {
            _id: 1
        }
    }
])

Pasted image 20231121142433

Finding Average Star by State using MapReduce

var mapFunction = function() {
    emit(this.state, { totalStars: this.stars });
};

var reduceFunction = function(key, values) {
    var reducedObject = { count: 0, totalStars: 0 };

    values.forEach(function(value) {
        reducedObject.count += 1;
        reducedObject.totalStars += value.totalStars;
    });

    reducedObject.averageStars = reducedObject.totalStars / reducedObject.count;

    return { state: key, averageStars: reducedObject.averageStars };
};

db.business.mapReduce(
    mapFunction,
    reduceFunction,
    {
        out: { inline: 1 }
    }
);

Pasted image 20231121143353

Cursor

var cursor = db.business.find({"state": "AZ"})
while (cursor.hasNext()) { 
	var document = cursor.next();
	printjson(document);
}

Pasted image 20231121143626