..
mongodb
TODO
- Basic queries
- Concepts of Arrays
- Aggregates
- cursors
- MapReduce
- functions
- innovative queries
Implementation
Loading dataset
Average rating per state
db.business.aggregate([
{
$group: {
_id: "$state",
avgStars: { $avg: "$stars" }
}
}
])
Number of restaurants per state
db.business.aggregate([
{
$group: {
_id: "$state",
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
state: "$_id",
businessCount: "$count"
}
}
])
Number of distinct Restaurants
db.business.aggregate([
{
$group: {
_id: null,
distinctNameCount: { $addToSet: "$name" }
}
},
{
$project: {
_id: 0,
distinctNameCount: { $size: "$distinctNameCount" }
}
}
])
Top Rated Restaurant in each state
db.business.aggregate([
{
$lookup: {
from: "business",
let: { state: "$state", stars: "$stars" },
pipeline: [
{
$group: {
_id: "$state",
maxStars: { $max: "$stars" }
}
},
{
$match: {
$expr: {
$and: [
{ $eq: ["$$state", "$_id"] },
{ $eq: ["$$stars", "$maxStars"] }
]
}
}
}
],
as: "maxStarsData"
}
},
{
$unwind: "$maxStarsData"
},
{
$match: {
$expr: {
$and: [
{ $eq: ["$state", "$maxStarsData._id"] },
{ $eq: ["$stars", "$maxStarsData.maxStars"] }
]
}
}
},
{
$project: {
_id: 0,
state: 1,
stars: 1,
name: 1
}
}
])
Restaurants with the most branches
db.business.aggregate([
{
$group: {
_id: "$name",
locations: { $addToSet: "$address" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
},
{
$sort: {
count: -1
}
},
{
$project: {
_id: 0,
name: "$_id",
locations: 1
}
}
])
Restaurants which covers the most states
db.business.aggregate([
{
$group: {
_id: "$name",
uniqueStates: { $addToSet: "$state" },
count: { $sum: 1 }
}
},
{
$match: {
count: { $gt: 1 }
}
},
{
$sort: {
count: 1
}
},
{
$project: {
_id: 0,
name: "$_id",
state: { $size: "$uniqueStates" }
}
}
])
Number of branches per state
db.business.aggregate([
{
$group: {
_id: {
name: "$name",
state: "$state"
},
addressCount: { $sum: 1 }
}
},
{
$match: {
addressCount: { $gt: 1 }
}
},
{
$sort: {
addressCount: 1
}
},
{
$project: {
_id: 0,
name: "$_id.name",
state: "$_id.state",
address: "$addressCount"
}
}
])
Complex Queries
Most popular user
db.user.aggregate([
{
$project: {
name: 1,
nr_friends: {
$size: {
$ifNull: [
{ $split: ["$friends", ","] },
[]
]
}
}
}
},
{
$sort: {
nr_friends: -1
}
}
])
Correlation Coefficient of review_count and stars
db.business.aggregate([
{
$project: {
stars: { $toDouble: "$stars" },
review_count: { $toDouble: "$review_count" }
}
},
{
$group: {
_id: null,
review_and_stars: { $push: { stars: "$stars", review_count: "$review_count" } }
}
},
{
$project: {
_id: 0,
correlation: {
$function: {
body: "function calculateCorrelation(a, b) {const n = a.length, m = a.reduce((s, v) => s + v, 0) / n, k = b.reduce((s, v) => s + v, 0) / n, cov = Array.from({ length: n }, (_, i) => (a[i] - m) * (b[i] - k)).reduce((s, v) => s + v, 0), var1 = a.reduce((s, v) => s + (v - m) * (v - m), 0), var2 = b.reduce((s, v) => s + (v - k) * (v - k), 0); return cov / Math.sqrt(var1 * var2);}
",
args: ["$review_and_stars.stars", "$review_and_stars.review_count"],
lang: "js"
}
}
}
}
])
Most visited restaurant
db.checkin.aggregate([
{
$project: {
business_id: 1,
_id: 0,
nr_visited: { $size: { $split: ["$date", ","] } }
}
},
{
$lookup: {
from: "business",
localField: "business_id",
foreignField: "business_id",
as: "business_info"
}
},
{
$unwind: "$business_info"
},
{
$project: {
_id: 0,
name: "$business_info.name",
nr_visited: 1
}
}
])
Which year produced the most number of elite users
db.user.aggregate([
{
$project: {
year: { $toInt: { $substr: ["$yelping_since", 0, 4] } },
nr_elite: { $size: { $split: ["$elite", ','] } }
}
},
{
$group: {
_id: "$year",
total_nr_elite: { $sum: "$nr_elite" }
}
},
{
$sort: {
_id: 1
}
}
])
Finding Average Star by State using MapReduce
var mapFunction = function() {
emit(this.state, { totalStars: this.stars });
};
var reduceFunction = function(key, values) {
var reducedObject = { count: 0, totalStars: 0 };
values.forEach(function(value) {
reducedObject.count += 1;
reducedObject.totalStars += value.totalStars;
});
reducedObject.averageStars = reducedObject.totalStars / reducedObject.count;
return { state: key, averageStars: reducedObject.averageStars };
};
db.business.mapReduce(
mapFunction,
reduceFunction,
{
out: { inline: 1 }
}
);
Cursor
var cursor = db.business.find({"state": "AZ"})
while (cursor.hasNext()) {
var document = cursor.next();
printjson(document);
}