Skip to main content

Getting Started

🧰 Learn about individual Toolkits​

Here's a quick start guide to help you get started with the Bangla Toolkit (BNTK).

Installation​

Install the required packages for Bengali natural language processing:

npm install @bntk/tokenization @bntk/stemming @bntk/pos @bntk/ner @bntk/transliteration

Example Usage​

import * as ner from "@bntk/ner";
import * as pos from "@bntk/pos";
import * as stemming from "@bntk/stemming";
import * as tokenization from "@bntk/tokenization";
import * as transliteration from "@bntk/transliteration";

const contents = `āĻ†āĻŽāĻŋ āĻŦāĻžāĻ‚āĻ˛āĻž āĻ˛ā§‡āĻ–āĻžāĻ° āĻœāĻ¨ā§āĻ¯ āĻŸā§āĻ˛āĻŋāĻŸāĻ• āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ° āĻ•āĻ°āĻ›āĻŋāĨ¤ āĻ†āĻŽāĻžāĻ° āĻŦāĻ¨ā§āĻ§ā§ āĻ°āĻšāĻŋāĻŽ āĻĸāĻžāĻ•āĻžāĻ¯āĻŧ āĻĨāĻžāĻ•ā§‡āĻ¨āĨ¤`;

// ==== Tokenization ====
const sentences = tokenization.tokenizeSentences(contents);
console.log(sentences);
// ['āĻ†āĻŽāĻŋ āĻŦāĻžāĻ‚āĻ˛āĻž āĻ˛ā§‡āĻ–āĻžāĻ° āĻœāĻ¨ā§āĻ¯ āĻŸā§āĻ˛āĻŋāĻŸāĻ• āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ° āĻ•āĻ°āĻ›āĻŋ', 'āĻ†āĻŽāĻžāĻ° āĻŦāĻ¨ā§āĻ§ā§ āĻ°āĻšāĻŋāĻŽ āĻĸāĻžāĻ•āĻžāĻ¯āĻŧ āĻĨāĻžāĻ•ā§‡āĻ¨']

const words = tokenization.tokenizeWords(sentences[0]);
console.log(words);
// ['āĻ†āĻŽāĻŋ', 'āĻŦāĻžāĻ‚āĻ˛āĻž', 'āĻ˛ā§‡āĻ–āĻžāĻ°', 'āĻœāĻ¨ā§āĻ¯', 'āĻŸā§āĻ˛āĻŋāĻŸāĻ•', 'āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ°', 'āĻ•āĻ°āĻ›āĻŋ']

// ==== Stemming ====
const stemmedWords = stemming.stemWords(words);
console.log(stemmedWords);
// ['āĻ†āĻŽāĻŋ', 'āĻŦāĻžāĻ‚āĻ˛āĻž', 'āĻ˛ā§‡āĻ–āĻžāĻ°', 'āĻœāĻ¨ā§āĻ¯', 'āĻŸā§āĻ˛āĻŋāĻŸāĻ•', 'āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ°', 'āĻ•āĻ°āĻ›āĻŋ']

// ==== POS ====
const taggedWords = pos.tagWords(stemmedWords);
console.log(taggedWords);
// ['āĻ†āĻŽāĻŋ/PRON', 'āĻŦāĻžāĻ‚āĻ˛āĻž/NOUN', 'āĻ˛ā§‡āĻ–āĻžāĻ°/NOUN', 'āĻœāĻ¨ā§āĻ¯/ADP', 'āĻŸā§āĻ˛āĻŋāĻŸāĻ•/NOUN', 'āĻŦā§āĻ¯āĻŦāĻšāĻžāĻ°/VERB', 'āĻ•āĻ°āĻ›āĻŋ/VERB']

// ==== NER ====
const entities = ner.extractEntities(sentences[1]);
console.log(entities);
// [{type: 'PRONOUN', value: 'āĻ†āĻŽāĻžāĻ°', start: 0, end: 4}, {...}]

// ==== Transliteration ====
const transliterated = transliteration.transliterate("amar name apon.");
console.log(transliterated);
// 'āĻ†āĻŽāĻžāĻ° āĻ¨āĻžāĻŽ āĻ†āĻĒāĻ¨āĨ¤'

📚 See API Reference​