AI-powered semantic search plugin that enhances AtoM search capabilities through thesaurus-based query expansion, vector embeddings, and integration with external knowledge sources (WordNet/Datamuse, Wikidata). Provides intelligent synonym matching, multilingual support, and Elasticsearch integration for improved search relevance in GLAM institutions.
namespace AtomFramework\Services\SemanticSearch;class ThesaurusService{ // Term management public function addTerm(string $term, string $source, string $language, array $options): ?int public function getTerm(int $id): ?object public function findTerm(string $term, ?string $source, string $language): ?object public function searchTerms(string $query, int $limit = 20): array public function normalizeTerm(string $term): string // Synonym management public function addSynonym(int $termId, string $synonymText, string $source, string $type, float $weight): ?int public function getSynonyms(int $termId, ?string $type, ?float $minWeight, int $limit): array public function getSynonymsForText(string $term, string $language = 'en'): array // Query expansion public function expandQuery(string $query, string $language = 'en'): array // Elasticsearch export public function exportToElasticsearch(?string $outputPath = null): string public function getElasticsearchConfig(): array // Local import public function importLocalSynonyms(?string $domain = null): array // Settings public function getSetting(string $key, $default = null) public function setSetting(string $key, $value): bool // Statistics public function getStats(): array}
class SemanticSearchService{ // Search with expansion public function search(string $query, array $options = []): array public function buildElasticsearchQuery(string $query, ?array $expansion, array $options): array // Expansion info public function getExpansionInfo(string $query, string $language = 'en'): array // Suggestions public function getSuggestions(string $prefix, int $limit = 10): array public function getDidYouMean(string $query): array // Configuration public function isEnabled(): bool public function enable(): void public function disable(): void // Analytics public function getPopularSearches(int $limit = 20, ?string $period = null): array public function getExpansionStats(): array}
class EmbeddingService{ // Embedding models public const MODEL_NOMIC = 'nomic-embed-text'; public const MODEL_MXBAI = 'mxbai-embed-large'; public const MODEL_ALL_MINILM = 'all-minilm'; // Availability public function isAvailable(): bool public function getAvailableModels(): array // Embedding generation public function getEmbedding(string $text, ?string $model = null): ?array public function getEmbeddings(array $texts, ?string $model = null): array // Term embeddings public function generateTermEmbedding(int $termId, ?string $model = null): bool public function getTermEmbedding(int $termId, ?string $model = null): ?array public function generateAllEmbeddings(?string $model = null): array // Similarity search public function cosineSimilarity(array $a, array $b): float public function findSimilarTerms(string $query, int $limit = 10, float $minSimilarity = 0.7): array public function findRelatedTerms(int $termId, int $limit = 10): array // Statistics public function getStats(): array}
class WordNetSyncService{ // Domain sync methods public function syncArchivalTerms(): array // ~150 terms public function syncLibraryTerms(): array // ~55 terms public function syncMuseumTerms(): array // ~65 terms public function syncGeneralTerms(): array // ~300 terms public function syncSouthAfricanTerms(): array // ~120 terms public function syncHistoricalTerms(): array // ~40 terms public function syncAllDomains(): array // All 730+ terms // Custom sync public function syncTerms(array $terms, string $domain): array public function syncCustomTerms(array $terms, string $domain): array public function syncDomain(string $domain, int $limit = 0): array // Datamuse API public function fetchSynonyms(string $word): array public function fetchRelatedWords(string $word): array public function fetchDefinitions(string $word): array public function fetchSoundsLike(string $word): array public function fetchSpelledLike(string $word): array}
class WikidataSyncService{ // Sync operations public function syncHeritageTerms(): array public function syncSouthAfricanTerms(): array public function syncArchivalTerms(int $limit = 0): array public function syncClassAndSubclasses(string $qid, string $domain): array // SPARQL queries public function fetchItem(string $qid): ?array public function fetchSubclasses(string $parentQid, int $limit = null): array public function fetchArchiveTerms(): array public function fetchSouthAfricanHeritage(): array}
Term "archive"
|
v
+----------------------+
| Get term + definition|
| "archive: a place |
| where historical |
| records are kept" |
+----------------------+
|
v
+----------------------+
| Ollama API Request |
| POST /api/embeddings |
| model: nomic-embed |
| prompt: text |
+----------------------+
|
v
+----------------------+
| Response |
| [0.012, -0.089, ...] |
| (768 dimensions) |
+----------------------+
|
v
+----------------------+
| Store in DB |
| ahg_thesaurus_ |
| embedding table |
+----------------------+
# Full sync (all tasks)phpbin/semantic-search-cron.phpall
# Individual tasksphpbin/semantic-search-cron.phpsync-wordnet
phpbin/semantic-search-cron.phpsync-wikidata
phpbin/semantic-search-cron.phpupdate-embeddings
phpbin/semantic-search-cron.phpexport-es
phpbin/semantic-search-cron.phpcleanup
# Options--domain=archival# Filter by domain--limit=500# Limit terms processed--force# Force sync even if recent--dry-run# Show what would happen--quiet# Suppress output