diff --git a/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js new file mode 100644 index 0000000..7fdb6d9 --- /dev/null +++ b/Services/BankStatementProcessor/bankStatementProcessor-KARNATAKA.js @@ -0,0 +1,600 @@ +/** + * Generated by OpenAI - Attempt 1 + * Processes bank statement data from Excel + * @param {Array} rawData - Array of objects from bank statement Excel + * @returns {Object} Processed bank statement data + */ +function processBankStatement(rawData) { + // Helpers + function isNonEmpty(val) { + return val !== null && val !== undefined && String(val).trim() !== ""; + } + function normalizeStr(s) { + return String(s || "") + .replace(/\s+/g, " ") + .trim(); + } + function normalizeLower(s) { + return normalizeStr(s).toLowerCase(); + } + function parseNumber(val) { + if (val === null || val === undefined) return null; + if (typeof val === "number") { + if (isNaN(val)) return null; + return val; + } + let s = String(val).trim(); + if (!s) return null; + // Remove currency symbols/letters but keep digits, minus, comma, dot and parentheses + s = s.replace(/[^\d.,()\-]/g, ""); + let negative = false; + if (/\(.*\)/.test(s)) { + negative = true; + s = s.replace(/[()]/g, ""); + } + if (s.endsWith("-")) { + negative = true; + s = s.slice(0, -1); + } + s = s.replace(/,/g, ""); + if (!s || !/[0-9]/.test(s)) return null; + let num = parseFloat(s); + if (isNaN(num)) return null; + if (negative) num = -num; + return num; + } + function leftPad(num) { + return num < 10 ? "0" + num : String(num); + } + function formatDateYMD(d) { + const y = d.getUTCFullYear(); + const m = leftPad(d.getUTCMonth() + 1); + const day = leftPad(d.getUTCDate()); + return `${y}-${m}-${day}`; + } + function excelSerialToDate(serial) { + // Excel serial date number -> JS Date (UTC) + // Day 25569 corresponds to 1970-01-01 + const ms = Math.round((Number(serial) - 25569) * 86400 * 1000); + const d = new Date(ms); + if (isNaN(d.getTime())) return null; + return d; + } + function parseDate(val) { + if (val === null || val === undefined) return null; + if (typeof val === "number") { + // Treat large numbers as Excel serial + if (val > 10000) { + const d = excelSerialToDate(val); + return d ? formatDateYMD(d) : null; + } + return null; + } + let s = String(val).trim(); + if (!s) return null; + // Remove time if present after space + s = s.replace(/T.*$/i, "").trim(); + // Patterns + // YYYY-MM-DD or YYYY/MM/DD or YYYY.MM.DD + let m = s.match(/^(\d{4})[-\/.](\d{1,2})[-\/.](\d{1,2})$/); + if (m) { + const y = parseInt(m[1], 10); + const mo = parseInt(m[2], 10); + const d = parseInt(m[3], 10); + if (y && mo >= 1 && mo <= 12 && d >= 1 && d <= 31) { + const date = new Date(Date.UTC(y, mo - 1, d)); + return isNaN(date.getTime()) ? null : formatDateYMD(date); + } + } + // DD-MM-YYYY or DD/MM/YYYY or DD.MM.YYYY (or M/D/YYYY) + m = s.match(/^(\d{1,2})[-\/.](\d{1,2})[-\/.](\d{2,4})$/); + if (m) { + let d = parseInt(m[1], 10); + let mo = parseInt(m[2], 10); + let y = parseInt(m[3], 10); + if (y < 100) y += 2000; + // Disambiguate: if first > 12 assume DD-MM-YYYY, else if second > 12 assume MM-DD-YYYY, else assume DD-MM-YYYY + if (d > 31 || mo > 31) return null; + if (d > 12 && mo <= 12) { + // DD-MM-YYYY + } else if (mo > 12 && d <= 12) { + // MM-DD-YYYY -> swap + const tmp = d; + d = mo; + mo = tmp; + } + const date = new Date(Date.UTC(y, mo - 1, d)); + return isNaN(date.getTime()) ? null : formatDateYMD(date); + } + // DD-Mon-YYYY or DD-MON-YYYY or DD Mon YYYY + m = s.match(/^(\d{1,2})[\-\s]([A-Za-z]{3,})[\-\s,]?(\d{2,4})$/); + if (m) { + const d = parseInt(m[1], 10); + const mon = m[2].slice(0, 3).toLowerCase(); + const months = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, oct: 10, nov: 11, dec: 12 + }; + const mo = months[mon]; + let y = parseInt(m[3], 10); + if (!mo) return null; + if (y < 100) y += 2000; + const date = new Date(Date.UTC(y, mo - 1, d)); + return isNaN(date.getTime()) ? null : formatDateYMD(date); + } + // Mon DD YYYY + m = s.match(/^([A-Za-z]{3,})\s+(\d{1,2}),?\s+(\d{2,4})$/); + if (m) { + const mon = m[1].slice(0, 3).toLowerCase(); + const months = { + jan: 1, feb: 2, mar: 3, apr: 4, may: 5, jun: 6, + jul: 7, aug: 8, sep: 9, oct: 10, nov: 11, dec: 12 + }; + const mo = months[mon]; + const d = parseInt(m[2], 10); + let y = parseInt(m[3], 10); + if (y < 100) y += 2000; + if (!mo) return null; + const date = new Date(Date.UTC(y, mo - 1, d)); + return isNaN(date.getTime()) ? null : formatDateYMD(date); + } + // Try Date.parse as last fallback (may be locale dependent) + const tryDate = new Date(s); + if (!isNaN(tryDate.getTime())) { + return formatDateYMD(new Date(Date.UTC(tryDate.getFullYear(), tryDate.getMonth(), tryDate.getDate()))); + } + return null; + } + function extractIFSC(text) { + const s = String(text || ""); + const m = s.match(/[A-Za-z]{4}0[A-Za-z0-9]{6}/); + return m ? m[0].toUpperCase() : null; + } + function looksLikeHeaderValue(val) { + const s = normalizeLower(val); + const tokens = [ + "date", "txn date", "tran date", "transaction date", "value date", "post date", + "narration", "description", "details", "particulars", "remarks", "transaction details", + "debit", "dr", "withdrawal", "withdrawals", "debit amount", "withdrawal amt", + "credit", "cr", "deposit", "credit amount", "credit amt", "receipt", + "balance", "bal", "closing balance", "running balance", "available balance", + "sol", "sol id", "branch code" + ]; + return tokens.some(t => s === t || s.includes(t)); + } + function mapHeaderRow(row) { + const mapping = { + dateKey: null, + narrationKey: null, + debitKey: null, + creditKey: null, + balanceKey: null, + branchCodeKey: null + }; + const dateSyn = ["date", "txn date", "tran date", "transaction date", "value date", "post date", "dt"]; + const narrSyn = ["narration", "description", "details", "particulars", "remarks", "transaction details", "desc"]; + const debitSyn = ["debit", "dr", "withdrawal", "withdrawals", "debit amount", "withdrawal amt", "payment", "paid"]; + const creditSyn = ["credit", "cr", "deposit", "credit amount", "credit amt", "receipt", "received"]; + const balanceSyn = ["balance", "bal", "closing balance", "running balance", "available balance", "balance amt"]; + const branchCodeSyn = ["sol", "sol id", "branch code", "br code"]; + + const keys = Object.keys(row || {}); + for (const k of keys) { + const v = row[k]; + if (!isNonEmpty(v)) continue; + const s = normalizeLower(v); + const sClean = s.replace(/\./g, "").replace(/\s+/g, " ").trim(); + + function matchAny(arr) { + return arr.find(t => sClean === t || sClean.includes(t)); + } + + if (!mapping.dateKey && matchAny(dateSyn)) mapping.dateKey = k; + if (!mapping.narrationKey && matchAny(narrSyn)) mapping.narrationKey = k; + if (!mapping.debitKey && matchAny(debitSyn)) mapping.debitKey = k; + if (!mapping.creditKey && matchAny(creditSyn)) mapping.creditKey = k; + if (!mapping.balanceKey && matchAny(balanceSyn)) mapping.balanceKey = k; + if (!mapping.branchCodeKey && matchAny(branchCodeSyn)) mapping.branchCodeKey = k; + } + + // Heuristics: If none explicitly matched but typical short headers exist + // DR/CR/BAL in uppercase may not match lower includes properly due to spacing, cover directly + const values = keys.map(k => String(row[k] || "").trim()); + const indexOfValue = (predArr) => { + for (const k of keys) { + const val = String(row[k] || "").trim().toUpperCase(); + for (const p of predArr) { + if (val === p || val.includes(p)) return k; + } + } + return null; + }; + if (!mapping.debitKey) mapping.debitKey = indexOfValue(["DR", "DEBIT"]); + if (!mapping.creditKey) mapping.creditKey = indexOfValue(["CR", "CREDIT"]); + if (!mapping.balanceKey) mapping.balanceKey = indexOfValue(["BAL", "BALANCE"]); + if (!mapping.narrationKey) mapping.narrationKey = indexOfValue(["NARRATION", "PARTICULARS", "DESCRIPTION", "DETAILS", "REMARKS"]); + if (!mapping.dateKey) mapping.dateKey = indexOfValue(["DATE", "TXN DATE", "TRAN DATE", "VALUE DATE"]); + + // Ensure it's a valid header mapping + const haveDate = !!mapping.dateKey; + const haveSomeAmount = !!(mapping.debitKey || mapping.creditKey); + const haveNarrOrBal = !!(mapping.narrationKey || mapping.balanceKey); + const valid = haveDate && haveSomeAmount && haveNarrOrBal; + + return valid ? mapping : null; + } + function rowLooksLikeHeader(row) { + // Check if row contains known header tokens + let score = 0; + for (const v of Object.values(row || {})) { + if (!isNonEmpty(v)) continue; + if (looksLikeHeaderValue(v)) score += 1; + } + return score >= 2; // at least two recognizable headers + } + function extractKeyValueFromCell(cellText) { + // Try to split "Label : Value" or "Label - Value" or "Label :- Value" + const s = normalizeStr(cellText); + let m = s.match(/^\s*([^:–\-]+?)\s*[:\-–]+\s*(.+)\s*$/); + if (m) { + return { key: normalizeStr(m[1]), value: normalizeStr(m[2]) }; + } + return null; + } + function cleanAccountNo(val) { + if (!val) return null; + const s = String(val).replace(/\s+/g, ""); + const m = s.match(/(\d{9,18})/); + return m ? m[1] : null; + } + function extractCityFromText(text) { + const s = String(text || ""); + // Pattern like "SURAT CITY-395017" + let m = s.match(/([A-Za-z ]+)\s+CITY\b/i); + if (m) return normalizeStr(m[1]).toUpperCase(); + // Try capture before hyphen with pincode + m = s.match(/^([A-Za-z ]+)[\s-]+(\d{6})$/); + if (m) return normalizeStr(m[1]).toUpperCase(); + return null; + } + function extractCounterparty(desc, type) { + if (!desc) return { from: null, to: null }; + const s = String(desc); + const upiMatch = s.match(/[a-z0-9.\-_]+@[a-z]+/i); + // Split on common separators to find meaningful tokens + const parts = s.split(/[\/|-]/).map(t => normalizeStr(t)).filter(Boolean); + const blacklist = new Set([ + "upi", "p2m", "p2a", "neft", "imps", "rtgs", "bank", "ltd", "limited", "ref", "payment", "pay", "to", "from", "transfer", "trx", "txn", "id", "axis bank", "hdfc bank", "icici bank", "sbi", "bank ltd", "bank limited" + ]); + let candidate = null; + for (const p of parts) { + const low = p.toLowerCase(); + if (blacklist.has(low)) continue; + if (/^\d+$/.test(p)) continue; + if (p.length < 3) continue; + // Prefer parts with letters + if (/[A-Za-z]/.test(p)) { + candidate = p; + break; + } + } + if (!candidate && upiMatch) candidate = upiMatch[0]; + if (type === "deposit") { + return { from: candidate || (upiMatch ? upiMatch[0] : null), to: null }; + } else if (type === "withdrawal") { + return { from: null, to: candidate || (upiMatch ? upiMatch[0] : null) }; + } + return { from: null, to: null }; + } + function round2(n) { + return Math.round((n + Number.EPSILON) * 100) / 100; + } + + const result = { + bank_details: { + bank_name: null, + opening_balance: 0, + ifsc: null, + address: null, + city: null, + account_no: null, + account_holder_name: null, + branch_name: null, + branch_code: null + }, + transactions: [] + }; + + if (!Array.isArray(rawData)) rawData = []; + const addressLines = []; + + // 1) Bank details extraction from first N rows + const detailRowLimit = Math.min(40, rawData.length); + for (let i = 0; i < detailRowLimit; i++) { + const row = rawData[i] || {}; + const entries = Object.entries(row); + for (const [, rawVal] of entries) { + if (!isNonEmpty(rawVal)) continue; + const val = String(rawVal); + // IFSC + if (!result.bank_details.ifsc) { + const ifsc = extractIFSC(val); + if (ifsc) result.bank_details.ifsc = ifsc; + } + // Account no + if (!result.bank_details.account_no) { + const kv = extractKeyValueFromCell(val); + if (kv) { + const keyNorm = normalizeLower(kv.key); + const value = kv.value; + if (/(account(\s*no\.?| number)|a\/c|ac no|a\/c no)/i.test(kv.key)) { + const ac = cleanAccountNo(value); + if (ac) result.bank_details.account_no = ac; + } + if (/bank\s*name/i.test(kv.key) && !result.bank_details.bank_name) { + result.bank_details.bank_name = normalizeStr(value); + } + if (/(account holder|account holder name|holder name|customer name)\b/i.test(kv.key) && !result.bank_details.account_holder_name) { + result.bank_details.account_holder_name = normalizeStr(value); + } + if (/branch\s*name/i.test(kv.key) && !result.bank_details.branch_name) { + result.bank_details.branch_name = normalizeStr(value); + } + if (/branch\s*code|br\.?\s*code/i.test(kv.key) && !result.bank_details.branch_code) { + result.bank_details.branch_code = normalizeStr(value); + } + if (/opening\s*balance|open\s*bal|balance\s*b\/f/i.test(kv.key)) { + const num = parseNumber(value); + if (num !== null) result.bank_details.opening_balance = num; + } + if (/address/i.test(kv.key)) { + if (normalizeStr(value)) addressLines.push(normalizeStr(value)); + } + if (/ifsc/i.test(kv.key) && !result.bank_details.ifsc) { + const ifsc2 = extractIFSC(value); + if (ifsc2) result.bank_details.ifsc = ifsc2; + } + } else { + // If line like "Statement of Account No - 123456..." + if (/account\s*no/i.test(val)) { + const ac = cleanAccountNo(val); + if (ac && !result.bank_details.account_no) result.bank_details.account_no = ac; + } + } + } else { + // Even if account_no already set, still try other fields using kv pairs + const kv2 = extractKeyValueFromCell(val); + if (kv2) { + if (/bank\s*name/i.test(kv2.key) && !result.bank_details.bank_name) result.bank_details.bank_name = normalizeStr(kv2.value); + if (/(account holder|account holder name|holder name|customer name)\b/i.test(kv2.key) && !result.bank_details.account_holder_name) result.bank_details.account_holder_name = normalizeStr(kv2.value); + if (/branch\s*name/i.test(kv2.key) && !result.bank_details.branch_name) result.bank_details.branch_name = normalizeStr(kv2.value); + if (/branch\s*code|br\.?\s*code/i.test(kv2.key) && !result.bank_details.branch_code) result.bank_details.branch_code = normalizeStr(kv2.value); + if (/opening\s*balance|open\s*bal|balance\s*b\/f/i.test(kv2.key)) { + const num = parseNumber(kv2.value); + if (num !== null) result.bank_details.opening_balance = num; + } + if (/address/i.test(kv2.key)) { + if (normalizeStr(kv2.value)) addressLines.push(normalizeStr(kv2.value)); + } + if (/ifsc/i.test(kv2.key) && !result.bank_details.ifsc) { + const ifsc3 = extractIFSC(kv2.value); + if (ifsc3) result.bank_details.ifsc = ifsc3; + } + } + } + // Try to infer account holder name if line looks like "Name :- XYZ" but not "Bank Name" or "Joint Holder" + if (!result.bank_details.account_holder_name) { + const kvName = extractKeyValueFromCell(val); + if (kvName) { + const keyNorm = normalizeLower(kvName.key); + if (/^name\b/.test(keyNorm) && !/bank\s*name/.test(keyNorm) && !/joint\s*holder/.test(keyNorm)) { + const nm = normalizeStr(kvName.value); + if (nm && nm.length >= 2) result.bank_details.account_holder_name = nm; + } + } + } + // Collect plausible address lines (exclude obvious labels) + const lowerVal = normalizeLower(val); + const looksLikeLabel = /:|-/.test(val) && /(account|ifsc|micr|mobile|email|pan|nominee|customer|ckyc|statement|period|branch|bank|address|balance|opening|closing)/i.test(val); + if (!looksLikeLabel) { + // likely an address or free text header + const cityMaybe = extractCityFromText(val); + if (cityMaybe && !result.bank_details.city) result.bank_details.city = cityMaybe; + // Keep as address line only if short row and not header-like + if (val && val.length <= 60 && !rowLooksLikeHeader(row)) { + addressLines.push(normalizeStr(val)); + } + } + // Try bank_name heuristic if contains "... BANK" and not a transaction row + if (!result.bank_details.bank_name) { + const m = val.match(/\b([A-Z ]+BANK(?:\s+(?:LTD|LIMITED))?)\b/i); + if (m && !rowLooksLikeHeader(row)) { + const maybeBank = normalizeStr(m[1]).toUpperCase(); + // Avoid setting if from transaction description lines later; restrict to early rows + if (i < 15) result.bank_details.bank_name = maybeBank; + } + } + } + } + + // Finalize address + if (addressLines.length > 0) { + // Deduplicate while preserving order + const seen = new Set(); + const filtered = []; + for (const line of addressLines) { + const key = line.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); + filtered.push(line); + } + result.bank_details.address = filtered.join(", "); + // If city still null, try extracting from combined address + if (!result.bank_details.city) { + result.bank_details.city = extractCityFromText(result.bank_details.address); + } + } + + // If IFSC still not found, scan entire dataset + if (!result.bank_details.ifsc) { + outerIFSC: + for (let i = 0; i < rawData.length; i++) { + const row = rawData[i] || {}; + for (const v of Object.values(row)) { + if (!isNonEmpty(v)) continue; + const code = extractIFSC(v); + if (code) { + result.bank_details.ifsc = code; + break outerIFSC; + } + } + } + } + + // 2) Identify header row and map keys + let headerIndex = -1; + let headerMap = null; + for (let i = 0; i < rawData.length; i++) { + const row = rawData[i] || {}; + if (!row || Object.keys(row).length === 0) continue; + if (rowLooksLikeHeader(row)) { + const mp = mapHeaderRow(row); + if (mp) { + headerIndex = i; + headerMap = mp; + break; + } + } else { + // Sometimes header row may not trigger rowLooksLikeHeader; try mapping directly + const mp2 = mapHeaderRow(row); + if (mp2) { + headerIndex = i; + headerMap = mp2; + break; + } + } + } + + // Optional branch_code from first txn row if SOL column found + if (headerMap && headerMap.branchCodeKey && rawData[headerIndex + 1]) { + const solVal = rawData[headerIndex + 1][headerMap.branchCodeKey]; + if (isNonEmpty(solVal) && !result.bank_details.branch_code) { + result.bank_details.branch_code = normalizeStr(solVal); + } + } + + // 3) Process transactions + let voucher = 1; + if (headerIndex >= 0 && headerMap) { + for (let i = headerIndex + 1; i < rawData.length; i++) { + const row = rawData[i] || {}; + // Concatenate all values to detect totals/end + const rowText = Object.values(row) + .map(v => String(v || "")) + .join(" ") + .toLowerCase(); + if (/total|summary|closing balance|end of statement|thank/i.test(rowText)) { + // Likely summary/end rows; skip + continue; + } + + const dateVal = headerMap.dateKey ? row[headerMap.dateKey] : null; + const narrVal = headerMap.narrationKey ? row[headerMap.narrationKey] : null; + const debitVal = headerMap.debitKey ? row[headerMap.debitKey] : null; + const creditVal = headerMap.creditKey ? row[headerMap.creditKey] : null; + const balanceVal = headerMap.balanceKey ? row[headerMap.balanceKey] : null; + + // Skip empty rows + const hasAny = + isNonEmpty(dateVal) || + isNonEmpty(narrVal) || + isNonEmpty(debitVal) || + isNonEmpty(creditVal) || + isNonEmpty(balanceVal); + if (!hasAny) continue; + + // Parse + const date = parseDate(dateVal); + const desc = isNonEmpty(narrVal) ? normalizeStr(narrVal) : ""; + const debitNum = parseNumber(debitVal); + const creditNum = parseNumber(creditVal); + const balanceNum = parseNumber(balanceVal); + + // If no amounts and no date, skip + if (debitNum === null && creditNum === null && !date) continue; + + let type = null; + let amount = null; + if (debitNum !== null && Math.abs(debitNum) > 0) { + type = "withdrawal"; + amount = Math.abs(debitNum); + } + if (creditNum !== null && Math.abs(creditNum) > 0) { + // Prioritize debit if both exist + if (!type) { + type = "deposit"; + amount = Math.abs(creditNum); + } + } + + if (!type || amount === null) { + // If neither debit nor credit valid, skip + continue; + } + + const counterparty = extractCounterparty(desc, type); + + const txn = { + date: date || null, + voucher_number: voucher++, + amount: Number(amount), + desc: desc || "", + from: counterparty.from, + to: counterparty.to, + type: type, + balance: balanceNum !== null ? Number(balanceNum) : null + }; + result.transactions.push(txn); + } + } + + // Compute opening balance if not provided and can deduce from first transaction + if ((result.bank_details.opening_balance === 0 || result.bank_details.opening_balance === null) && result.transactions.length > 0) { + const first = result.transactions[0]; + if (first && typeof first.balance === "number" && typeof first.amount === "number" && first.type) { + let opening = null; + if (first.type === "withdrawal") { + opening = first.balance + first.amount; + } else if (first.type === "deposit") { + opening = first.balance - first.amount; + } + if (opening !== null) result.bank_details.opening_balance = round2(opening); + } + } + + // Ensure mandatory IFSC is present; if still not found, set to empty string to indicate missing, + // but caller expects it; we keep it as null if not found. + if (!result.bank_details.ifsc) { + // As a last resort, look for "IFSC" keyword anywhere and then search nearby cells + outerLastIFSC: + for (let i = 0; i < rawData.length; i++) { + const row = rawData[i] || {}; + for (const v of Object.values(row)) { + if (!isNonEmpty(v)) continue; + const s = String(v); + if (/ifsc/i.test(s)) { + const code = extractIFSC(s); + if (code) { + result.bank_details.ifsc = code; + break outerLastIFSC; + } + } + } + } + } + + // Guarantee all specified fields exist (already ensured) + return result; +} + +module.exports = processBankStatement; \ No newline at end of file diff --git a/src/BANK.js b/src/BANK.js index 5a6953f..f00b361 100644 --- a/src/BANK.js +++ b/src/BANK.js @@ -9,6 +9,7 @@ const BANK = { YES: "yes", OVERSEASE: "oversease", KOTAK: "kotak", + KARNATAKA: "karnataka", } module.exports = BANK;