Watch keynotes and sessions from MongoDB.live, our virtual developer conference.

Insert multiple text files into mongo

I am trying to insert 4 text files from a directory to mongodb. I want that all the contents should be in a single document with key as their file name and value as their contents. I am able to insert them into multiple documents using insert_many. Pl let me know how can I do it to a single document.

You may write a script that creates a .json document and then you may import this document or integrate it to a .js script.

Untested bashscript (will fail with files with single or double quotes) :

echo “{”
echo " ‘my-first-file:’"
echo -n " ‘"
cat my-first-file
echo -n "’,"
echo " ‘my-2nd-file:’"
echo -n " ‘"
cat my-2n-file
echo -n "’,"

echo “}”

Hi @hari_shekon,

Make sure the Documents size is less than 16MB

MongoDB Document size

step 1 would be create an etl workflow process, if you are going to perform this load frequently. If not, if you can have an one-off manual task.

you can either $push or $addtoset to accomplise.

$push

$addtoSet

I have attached a Json Doc, JSON Schema, MongDB Create script(demo) and Mongoose Schema for your reference.

JSON Schema

{
    "$schema": "http://json-schema.org/draft-04/schema#",
    "type": "object",
    "title": "file",
    "additionalProperties": false,
    "properties": {
        "file_load_id": {
            "type": "string",
            "pattern": "^[a-fA-F0-9]{24}$"
        },
        "files": {
            "type": "array",
            "additionalItems": true,
            "uniqueItems": false,
            "items": {
                "id": "file",
                "type": "object",
                "properties": {
                    "seq_no": {
                        "type": "number"
                    },
                    "name": {
                        "type": "string"
                    },
                    "content": {
                        "type": "string"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "seq_no"
                ]
            }
        }
    },
    "required": [
        "file_load_id"
    ]
}

Create the collection(change the fieldname if you need)

use file_db;

    db.createCollection( "file",{
    "storageEngine": {
        "wiredTiger": {}
    },
    "capped": false,
    "validator": {
        "$jsonSchema": {
            "bsonType": "object",
            "title": "file",
            "additionalProperties": false,
            "properties": {
                "_id": {
                    "bsonType": "objectId"
                },
                "file_load_id": {
                    "bsonType": "objectId"
                },
                "files": {
                    "bsonType": "array",
                    "additionalItems": true,
                    "uniqueItems": false,
                    "items": {
                        "bsonType": "object",
                        "properties": {
                            "seq_no": {
                                "bsonType": "number"
                            },
                            "name": {
                                "bsonType": "string"
                            },
                            "content": {
                                "bsonType": "string"
                            }
                        },
                        "additionalProperties": false,
                        "required": [
                            "seq_no"
                        ]
                    }
                }
            },
            "required": [
                "file_load_id"
            ]
        }
    },
    "validationLevel": "off",
    "validationAction": "warn"
});

Mongoose Script

var file = new Schema({
    file_load_id: {
        type: Schema.Types.ObjectId,
        required: true
    },
    files: [
        new Schema({
            seq_no: {
                type: Number,
                required: true
            },
            name: {
                type: String
            },
            content: {
                type: String
            }
        })
    ]
});

A sample collection

{
    "file_load_id": ObjectId("507f1f77bcf86cd799439011"),
    "files": [
        {
            "seq_no": 1,
            "name": "file_no_1",
            "content": "Lorem"
        },
		{
            "seq_no": 2,
            "name": "file_no_1",
            "content": "Lorem"
        },
		{
            "seq_no": 3,
            "name": "file_no_1",
            "content": "Lorem"
        }
    ]
}

You can do this with mongo shell, using the script below.
To use it, you need to connect to mongo shell, and .load() script with the following contents:

// 1. declare function, that will read files in a directory
//    and put it into the new document payload
function provideDocumentPayloadFromFilesInDir(pathToDir) {
  const list = listFiles(pathToDir);
  const doc = { files: [] };
  list.forEach(item => {
    // do not descend into subdirectories
    if (item.isDirectory === false) {
      const file = {
        // generate _id for the file in case
        // you may need to manipulate it later
        _id: new ObjectId(),
        size: item.size,
        name: item.basename,
        // read the contentwith cat() shell method
        content: cat(item.name),
      };
      doc.files.push(file);
    }
  });
  return doc;
}

// 2. create document payload with files content
const doc = provideDocumentPayloadFromFilesInDir(<path>);

// 3. insert new document into collection
const targetDb = db.getSiblingDB(<dbName>);
targetDb.getCollection(<collectionName>).insertOne(doc);

Make sure you do not load huge files, otherwise you may hit BSON Document limit of 16MB.