Considering the file size can cross GB, here what I have done.
Create a large JSON file using the following code (which outputs 198 mb file)
const fs = require('fs');
const arrayOfUsers = [];
for (let i = 0; i < 1e6; i += 1) {
arrayOfUsers.push({
firstName: 'Barb',
lastName: 'E. Riser',
status: 'unregistered',
updatedOn: '2017-01-17T13:24:51.403Z',
createdOn: '2017-01-17T13:24:51.403Z',
googleLocation: {
loc: {
coordinates: [null, null],
},
},
});
}
// write to file
fs.writeFile(`${__dirname}/largeUsers.json`, JSON.stringify(arrayOfUsers), (err) => {
if (err) {
console.log('Error occured, exiting...');
process.exit(-1);
}
console.log('Write successful, exiting...');
process.exit(0);
});
Now I am using this same file to store it's content in MongoDB
const fs = require('fs');
const mongoose = require('mongoose');
const JSONStream = require('JSONStream');
const User = require('./models/User');
const startTime = Date.now();
const databaseURL = 'mongodb://127.0.0.1/dextra';
mongoose.connect(databaseURL);
mongoose.Promise = global.Promise;
const db = mongoose.connection;
db.on('open', () => {
console.log('Connected to mongo server.\nImport from file to DB started...');
const dataStreamFromFile = fs.createReadStream(`${__dirname}/largeUsers.json`);
dataStreamFromFile.pipe(JSONStream.parse('*')).on('data', (chunk) => {
new User(chunk).save();
});
dataStreamFromFile.on('end', () => {
const timeTaken = Date.now() - startTime;
console.log(`Import completed in ${timeTaken} milisecs, closing connection...`);
db.close();
process.exit(0);
});
});
db.on('error', (err) => {
console.error('MongoDB connection error:', err);
process.exit(-1);
});
The issues I am facing are as follows:
So I came up with a very simpe code snippet which is as follow:
const fs = require('fs'); const mongoose = require('mongoose'); const JSONStream = require('JSONStream'); const User = require('./src/models/User'); const config = require('./config.json'); mongoose.connect(config.MONGODB_URI, { poolSize: config.DB_POOL_SIZE }); mongoose.Promise = global.Promise; const db = mongoose.connection; let arrayOfUsers = []; db.on('open', () => { console.log('Connected to mongo server.\n'); process.stdout.write('Processing.'); const dataStreamFromFile = fs.createReadStream(`${__dirname}/users_large.json`); dataStreamFromFile.pipe(JSONStream.parse('*')).on('data', async (userData) => { arrayOfUsers.push(userData); if (arrayOfUsers.length === config.BATCH_INSERT_VALUE) { dataStreamFromFile.pause(); await User.insertMany(arrayOfUsers); arrayOfUsers = []; process.stdout.write('.'); dataStreamFromFile.resume(); } }); dataStreamFromFile.on('end', async () => { await User.insertMany(arrayOfUsers); // left over data console.log('\nImport complete, closing connection...'); db.close(); process.exit(0); }); }); db.on('error', (err) => { console.error('MongoDB connection error: ', err); process.exit(-1); });Note: Inserting 50k records is also a bit slow. Will keep on trying !