Document Grounding with RAG in SAP AI Core
In Part 3, we built sophisticated AI pipelines. But our AI still "hallucinates" - it generates plausible-sounding responses that might not reflect your company's actual products, policies, or procedures.
Retrieval-Augmented Generation (RAG) solves this by grounding AI responses in real documents from your knowledge base.
This post is part of a series:
- Getting Started with SAP AI Core and the SAP AI SDK in CAP
- Leveraging LLM Models and Deployments in SAP AI Core
- Orchestrating AI Workflows with SAP AI Core
- Document Grounding with RAG in SAP AI Core (this post)
- Production-Ready AI Applications with SAP AI Core
What are we building?
We'll enhance our Support Ticket System to:
- Upload and index support documentation
- Search relevant docs when processing tickets
- Generate responses grounded in actual documentation
- Cite sources for transparency
Understanding RAG
RAG works in three phases:
| Phase | Description |
|---|---|
| Index | Convert documents into vector embeddings and store them |
| Retrieve | Find relevant document chunks based on the query |
| Generate | Use retrieved context to generate accurate responses |
Query → [Embed] → [Vector Search] → [Retrieved Docs] → [LLM + Context] → ResponseStep 1: Setting Up Document Grounding
SAP AI Core provides document grounding through the Orchestration service. First, install the required packages:
npm install @sap-ai-sdk/orchestration @sap-ai-sdk/document-groundingThe @sap-ai-sdk/orchestration package handles grounding within chat completions, while @sap-ai-sdk/document-grounding provides direct access to vector storage and retrieval APIs.
Resource Group Configuration
Important: Your resource group needs the label document-grounding: true to use the grounding service. Set this in SAP AI Launchpad when creating or editing the resource group.
Create /srv/lib/document-grounding.js:
const { VectorApi, RetrievalApi } = require('@sap-ai-sdk/document-grounding');
class KnowledgeBaseClient {
constructor(resourceGroup = 'default') {
this.resourceGroup = resourceGroup;
}
/**
* Upload a document to the vector store
*/
async uploadDocument(collectionId, document) {
const { id, title, content, metadata } = document;
try {
const response = await VectorApi.createDocuments(
collectionId,
{
documents: [
{
metadata: [
{ key: 'title', value: title },
{ key: 'id', value: id },
...Object.entries(metadata || {}).map(([key, value]) => ({
key,
value: String(value)
}))
],
chunks: [
{
content: content,
metadata: []
}
]
}
]
},
{ 'AI-Resource-Group': this.resourceGroup }
).execute();
console.log(`Document uploaded: ${id}`);
return response;
} catch (error) {
console.error(`Error uploading document ${id}:`, error);
throw error;
}
}
/**
* Search for relevant documents using Retrieval API
*/
async search(query, options = {}) {
const filters = options.filters || [];
const maxResults = options.maxResults || 5;
try {
const response = await RetrievalApi.search(
{
query,
filters: [
{
id: 'search-filter',
searchConfiguration: {
maxChunkCount: maxResults
},
dataRepositories: filters.repositories || ['*'],
dataRepositoryType: filters.type || 'vector'
}
]
},
{ 'AI-Resource-Group': this.resourceGroup }
).execute();
return this._parseSearchResults(response);
} catch (error) {
console.error('Error searching documents:', error);
throw error;
}
}
/**
* Parse search results into a usable format
*/
_parseSearchResults(response) {
if (!response.results || !response.results[0]) {
return [];
}
return response.results[0].chunks?.map(chunk => ({
content: chunk.content,
score: chunk.score,
metadata: chunk.metadata?.reduce((acc, m) => {
acc[m.key] = m.value;
return acc;
}, {})
})) || [];
}
/**
* Delete a document from the collection
*/
async deleteDocument(collectionId, documentId) {
try {
await VectorApi.deleteDocumentById(
collectionId,
documentId,
{ 'AI-Resource-Group': this.resourceGroup }
).execute();
console.log(`Document deleted: ${documentId}`);
} catch (error) {
console.error(`Error deleting document ${documentId}:`, error);
throw error;
}
}
/**
* Create a new collection
*/
async createCollection(collectionId) {
try {
const response = await VectorApi.createCollection(
{
collectionId,
embeddingModel: {
name: 'text-embedding-ada-002'
}
},
{ 'AI-Resource-Group': this.resourceGroup }
).execute();
console.log(`Collection created: ${collectionId}`);
return response;
} catch (error) {
console.error(`Error creating collection ${collectionId}:`, error);
throw error;
}
}
}
module.exports = KnowledgeBaseClient;Step 2: Creating the Document Upload Service
Add a service to manage documentation. Update /srv/knowledge-service.cds:
service KnowledgeService @(path: '/knowledge') {
entity Documents {
key ID: UUID;
collectionID: String;
title: String;
category: String;
content: LargeString;
createdAt: Timestamp;
}
action uploadDocument(
title: String,
category: String,
content: String
) returns UUID;
action searchDocuments(
query: String,
maxResults: Integer
) returns array of {
content: String;
score: Double;
title: String;
category: String;
};
action initializeCollection() returns String;
}Implement the handler in /srv/knowledge-service.js:
const cds = require('@sap/cds');
const KnowledgeBaseClient = require('./lib/document-grounding');
module.exports = class KnowledgeService extends cds.ApplicationService {
async init() {
const { Documents } = this.entities;
const kbClient = new KnowledgeBaseClient('default');
const COLLECTION_ID = 'support-docs';
// Initialize collection
this.on('initializeCollection', async () => {
await kbClient.createCollection(COLLECTION_ID);
return COLLECTION_ID;
});
// Upload document to knowledge base
this.on('uploadDocument', async (req) => {
const { title, category, content } = req.data;
const id = cds.utils.uuid();
try {
// Store in database for tracking
await INSERT.into(Documents).entries({
ID: id,
collectionID: COLLECTION_ID,
title,
category,
content,
createdAt: new Date()
});
// Index in vector store
await kbClient.uploadDocument(COLLECTION_ID, {
id,
title,
content,
metadata: { category }
});
return id;
} catch (error) {
console.error('Error uploading document:', error);
throw error;
}
});
// Search documents
this.on('searchDocuments', async (req) => {
const { query, maxResults } = req.data;
try {
const results = await kbClient.search(query, {
maxResults: maxResults || 5
});
return results.map(r => ({
content: r.content,
score: r.score,
title: r.metadata?.title || 'Untitled',
category: r.metadata?.category || 'General'
}));
} catch (error) {
console.error('Error searching documents:', error);
throw error;
}
});
await super.init();
}
};Step 3: Integrating RAG into the Orchestration Pipeline
Now let's add grounding to our ticket processing. Update /srv/lib/ticket-pipeline.js:
const {
OrchestrationClient,
buildDocumentGroundingConfig
} = require('@sap-ai-sdk/orchestration');
class RAGTicketPipeline {
constructor(resourceGroup = 'default') {
this.resourceGroup = resourceGroup;
}
/**
* Generate a grounded response using RAG
*/
async generateGroundedResponse(ticket, dataRepositories = ['*']) {
const orchestrationClient = new OrchestrationClient(
{
promptTemplating: {
model: {
name: 'gpt-4o',
params: {
max_tokens: 1000,
temperature: 0.3 // Lower for factual responses
}
}
},
grounding: buildDocumentGroundingConfig({
placeholders: {
input: ['groundingRequest'],
output: 'groundingOutput'
},
filters: [
{
id: 'support-docs-filter',
dataRepositories: dataRepositories,
dataRepositoryType: 'vector'
}
],
// Include metadata in grounding results for citation
metadata_params: ['title', 'category']
})
},
{
resourceGroup: this.resourceGroup
}
);
try {
const response = await orchestrationClient.chatCompletion({
messages: [
{
role: 'system',
content: `You are a customer support agent.
IMPORTANT: Base your response ONLY on the provided documentation context.
If the documentation doesn't contain relevant information, say so honestly.
Always cite the source document when providing information.
Context from our knowledge base:
{{?groundingOutput}}`
},
{
role: 'user',
content: `Customer Ticket:
Subject: {{?subject}}
Description: {{?description}}
Question to answer: {{?groundingRequest}}
Provide a helpful response based on our documentation.`
}
],
placeholderValues: {
subject: ticket.subject,
description: ticket.description,
groundingRequest: `${ticket.subject} ${ticket.description}`
}
});
return {
content: response.getContent(),
usage: response.getTokenUsage(),
finishReason: response.getFinishReason()
};
} catch (error) {
console.error('Error generating grounded response:', error);
throw error;
}
}
/**
* Generate a response with source citations
*/
async generateWithCitations(ticket, dataRepositories = ['*']) {
const KnowledgeBaseClient = require('./document-grounding');
const kbClient = new KnowledgeBaseClient(this.resourceGroup);
// First, retrieve relevant documents
const searchResults = await kbClient.search(
`${ticket.subject} ${ticket.description}`,
{ maxResults: 5, filters: { repositories: dataRepositories } }
);
// Generate response with grounding
const response = await this.generateGroundedResponse(ticket, dataRepositories);
// Format citations from search results
const citations = searchResults.map((result, idx) => ({
reference: `[${idx + 1}]`,
title: result.metadata?.title || 'Untitled',
category: result.metadata?.category || 'General',
relevanceScore: result.score
}));
// Append citation list to response
let citedResponse = response.content;
if (citations.length > 0) {
citedResponse += '\n\n---\n**Sources:**\n';
citations.forEach(c => {
citedResponse += `${c.reference} ${c.title} (${c.category}) - Relevance: ${(c.relevanceScore * 100).toFixed(1)}%\n`;
});
}
return {
content: citedResponse,
citations,
usage: response.usage
};
}
}
module.exports = RAGTicketPipeline;The key changes:
- Uses
buildDocumentGroundingConfig()to configure grounding - Specifies
placeholdersfor input/output parameter binding - Sets
metadata_paramsto include document metadata for citations - Configures data repository filters to search specific collections
Step 4: Uploading Support Documentation
Let's create a script to upload sample documentation. Create /scripts/upload-docs.js:
const cds = require('@sap/cds');
const SUPPORT_DOCS = [
{
title: 'Password Reset Guide',
category: 'Account Access',
content: `# Password Reset Procedure
## Self-Service Reset
1. Go to login page and click "Forgot Password"
2. Enter your registered email address
3. Check your email for reset link (valid for 24 hours)
4. Click the link and enter new password
5. Password must be 8+ characters with uppercase, lowercase, number, and special character
## If Reset Email Not Received
- Check spam/junk folder
- Verify email address is correct
- Contact support if issue persists
## Account Lockout
After 5 failed login attempts, account is locked for 30 minutes.
Contact support for immediate unlock if urgent.`
},
{
title: 'Billing FAQ',
category: 'Billing',
content: `# Billing Questions
## Payment Methods
We accept: Visa, Mastercard, American Express, PayPal, Bank Transfer
## Invoice Schedule
- Monthly plans: Billed on subscription start date
- Annual plans: Billed annually with 15% discount
## Refund Policy
- Cancel within 14 days for full refund
- After 14 days, prorated refund for annual plans
- No refund for monthly plans mid-cycle
## Failed Payments
- 3-day grace period after failed payment
- Service suspended after 3 failed attempts
- Contact support to update payment method`
},
{
title: 'API Rate Limits',
category: 'Technical',
content: `# API Rate Limiting
## Default Limits
- Free tier: 100 requests/minute
- Pro tier: 1,000 requests/minute
- Enterprise: Custom limits
## Rate Limit Headers
Check these response headers:
- X-RateLimit-Limit: Your limit
- X-RateLimit-Remaining: Requests left
- X-RateLimit-Reset: Reset timestamp
## Handling Rate Limits
When you receive 429 Too Many Requests:
1. Check Retry-After header
2. Implement exponential backoff
3. Consider upgrading plan for higher limits
## Best Practices
- Cache responses where possible
- Use webhooks instead of polling
- Batch requests when supported`
}
];
async function uploadDocs() {
const srv = await cds.connect.to('KnowledgeService');
// Initialize collection first
try {
await srv.initializeCollection();
console.log('Collection initialized');
} catch (error) {
console.log('Collection may already exist:', error.message);
}
// Upload documents
for (const doc of SUPPORT_DOCS) {
try {
const id = await srv.uploadDocument(doc);
console.log(`Uploaded: ${doc.title} (${id})`);
} catch (error) {
console.error(`Error uploading ${doc.title}:`, error.message);
}
}
console.log('All documents uploaded');
}
uploadDocs().catch(console.error);Run with:
cds repl
> .run scripts/upload-docs.jsStep 5: Testing Grounded Responses
Update /test/requests.http:
### Initialize collection
POST http://localhost:4004/knowledge/initializeCollection
Content-Type: application/json
### Upload a document
POST http://localhost:4004/knowledge/uploadDocument
Content-Type: application/json
{
"title": "Premium Add-on Features",
"category": "Billing",
"content": "# Premium Add-on Service ($49.99/month)\n\nIncludes:\n- Priority support (4-hour response time)\n- Advanced analytics dashboard\n- Custom integrations\n- Dedicated account manager\n\nTo cancel: Go to Settings > Subscriptions > Manage Add-ons"
}
### Search documents
POST http://localhost:4004/knowledge/searchDocuments
Content-Type: application/json
{
"query": "premium add-on charge invoice",
"maxResults": 3
}
### Create ticket about billing
POST http://localhost:4004/api/Tickets
Content-Type: application/json
{
"subject": "Unknown charge on invoice",
"description": "I see a $49.99 charge for 'Premium Add-on Service' on my invoice. I don't remember signing up for this. What is it and how do I cancel?"
}
### Process with RAG
POST http://localhost:4004/api/processTicketWithRAG
Content-Type: application/json
{
"ticketId": "YOUR-TICKET-ID"
}Step 6: Advanced Grounding Configurations
Using SAP Help Portal as Data Source
SAP AI Core supports help.sap.com as a built-in grounding source:
grounding: buildDocumentGroundingConfig({
placeholders: {
input: ['groundingRequest'],
output: 'groundingOutput'
},
filters: [
{
id: 'sap-help-filter',
dataRepositoryType: 'help.sap.com',
dataRepositories: [] // Not needed for help.sap.com
}
]
})Mixing Multiple Data Sources
Combine custom documents with SAP Help Portal:
grounding: buildDocumentGroundingConfig({
placeholders: {
input: ['groundingRequest'],
output: 'groundingOutput'
},
filters: [
{
id: 'custom-docs',
dataRepositoryType: 'vector',
dataRepositories: ['support-docs']
},
{
id: 'sap-help',
dataRepositoryType: 'help.sap.com',
dataRepositories: []
}
]
})Step 7: Document Chunking Strategies
For large documents, implement smart chunking:
class DocumentChunker {
/**
* Split document into overlapping chunks
*/
chunkDocument(content, options = {}) {
const chunkSize = options.chunkSize || 500;
const overlap = options.overlap || 50;
const chunks = [];
let start = 0;
while (start < content.length) {
const end = Math.min(start + chunkSize, content.length);
chunks.push({
content: content.slice(start, end),
startIndex: start,
endIndex: end
});
start += chunkSize - overlap;
}
return chunks;
}
/**
* Split by semantic boundaries (paragraphs/sections)
*/
chunkBySections(content) {
// Split by markdown headers
const sections = content.split(/\n#{1,3}\s+/);
return sections.filter(s => s.trim().length > 50);
}
/**
* Smart chunking that preserves context
*/
smartChunk(content, maxChunkSize = 500) {
const paragraphs = content.split(/\n\n+/);
const chunks = [];
let currentChunk = '';
for (const para of paragraphs) {
if (currentChunk.length + para.length > maxChunkSize) {
if (currentChunk) chunks.push(currentChunk.trim());
currentChunk = para;
} else {
currentChunk += '\n\n' + para;
}
}
if (currentChunk) chunks.push(currentChunk.trim());
return chunks;
}
}
module.exports = DocumentChunker;Recap
In this post, we implemented RAG to ground AI responses in documentation:
- Document indexing: Upload and store documents in SAP HANA Vector Engine
- Semantic search: Find relevant documentation using vector embeddings
- Grounded generation: Generate responses based on retrieved context
- Source citations: Track and display information sources
- Resource configuration: Set up resource groups with grounding enabled
Our Support Ticket System now provides accurate, documentation-backed responses with proper source attribution.
Next Steps
In Part 5: Production-Ready AI Applications, we'll cover:
- Deploying to Cloud Foundry
- Security and authentication best practices
- Monitoring and observability
- Cost optimization strategies
- CI/CD pipelines for AI applications
- Performance tuning and caching
- Error handling and resilience patterns
