How to efficiently store JSON file objects into MongoDB using NodeJs stream?
Considering the file size can cross GB, here what I have done.
Create a large JSON file using the following code (which outputs 198 mb file)
const fs = require('fs');
const arrayOfUsers = [];
for (let i = 0; i < 1e6; i += 1) {
arrayOfUsers.push({
firstName: 'Barb',
lastName: 'E. Riser',
status: 'unregistered',
updatedOn: '2017-01-17T13:24:51.403Z',
createdOn: '2017-01-17T13:24:51.403Z',
googleLocation: {
loc: {
coordinates: [null, null],
},
},
});
}
// write to file
fs.writeFile(`${__dirname}/largeUsers.json`, JSON.stringify(arrayOfUsers), (err) => {
if (err) {
console.log('Error occured, exiting...');
process.exit(-1);
}
console.log('Write successful, exiting...');
process.exit(0);
});
Now I am using this same file to store it's content in MongoDB
const fs = require('fs');
const mongoose = require('mongoose');
const JSONStream = require('JSONStream');
const User = require('./models/User');
const startTime = Date.now();
const databaseURL = 'mongodb://127.0.0.1/dextra';
mongoose.connect(databaseURL);
mongoose.Promise = global.Promise;
const db = mongoose.connection;
db.on('open', () => {
console.log('Connected to mongo server.\nImport from file to DB started...');
const dataStreamFromFile = fs.createReadStream(`${__dirname}/largeUsers.json`);
dataStreamFromFile.pipe(JSONStream.parse('*')).on('data', (chunk) => {
new User(chunk).save();
});
dataStreamFromFile.on('end', () => {
const timeTaken = Date.now() - startTime;
console.log(`Import completed in ${timeTaken} milisecs, closing connection...`);
db.close();
process.exit(0);
});
});
db.on('error', (err) => {
console.error('MongoDB connection error:', err);
process.exit(-1);
});
The issues I am facing are as follows:
- If I am not using any transformation streams I am not guaranteed a complete array of objects
- JSONStream is parsing each object inside the json file chunk and it is taking a long time
- How to manage MongoDB connections using mongoose
- How to decrease the latency
- How to create a batch from the chunk (any ways I can do that using any other transform streams) and then insert that batch of objects say 500 or 1000 in a batch?