The keywords package provides SQL keyword recognition, categorization, and multi-dialect support. It enables the tokenizer and parser to correctly identify and classify SQL keywords across PostgreSQL, MySQL, SQL Server, Oracle, and SQLite dialects.
- Multi-Dialect Support: PostgreSQL, MySQL, SQL Server, Oracle, SQLite
- Keyword Categorization: Reserved, DML, compound, window functions
- Compound Keywords: GROUP BY, ORDER BY, LEFT JOIN, etc.
- Case-Insensitive: Recognizes keywords in any case
- Extensible: Support for adding custom keywords
- Thread-Safe: All operations are safe for concurrent use
Main keyword registry:
type Keywords struct {
dialect SQLDialect
// Internal keyword maps
}Supported SQL dialects:
type SQLDialect int
const (
PostgreSQL SQLDialect = iota
MySQL
SQLServer
Oracle
SQLite
Generic // SQL-99 standard keywords
)Keyword classification:
type KeywordCategory int
const (
CategoryReserved KeywordCategory = iota
CategoryDML
CategoryDDL
CategoryFunction
CategoryOperator
CategoryDataType
)package main
import (
"github.com/ajitpratap0/GoSQLX/pkg/sql/keywords"
)
func main() {
// Create keyword registry for PostgreSQL
kw := keywords.New(keywords.PostgreSQL)
// Check if word is a keyword
if kw.IsKeyword("SELECT") {
fmt.Println("SELECT is a keyword")
}
// Check if reserved
if kw.IsReserved("TABLE") {
fmt.Println("TABLE is reserved")
}
// Get keyword info
keyword := kw.GetKeyword("JOIN")
fmt.Printf("Type: %s, Category: %d\n", keyword.TokenType, keyword.Category)
}kw := keywords.New(keywords.Generic)
// Check compound keywords
if kw.IsCompoundKeyword("GROUP", "BY") {
fmt.Println("GROUP BY is a compound keyword")
}
// Get compound keyword type
tokenType := kw.GetCompoundKeywordType("ORDER", "BY")
fmt.Printf("ORDER BY token type: %s\n", tokenType)// PostgreSQL-specific
pgKw := keywords.New(keywords.PostgreSQL)
if pgKw.IsKeyword("ILIKE") {
fmt.Println("ILIKE is PostgreSQL-specific")
}
// MySQL-specific
myKw := keywords.New(keywords.MySQL)
if myKw.IsKeyword("UNSIGNED") {
fmt.Println("UNSIGNED is MySQL-specific")
}
// SQLite-specific
sqliteKw := keywords.New(keywords.SQLite)
if sqliteKw.IsKeyword("AUTOINCREMENT") {
fmt.Println("AUTOINCREMENT is SQLite-specific")
}Core SQL statement keywords:
SELECT, FROM, WHERE, INSERT, UPDATE, DELETE, CREATE, ALTER, DROP,
JOIN, INNER, LEFT, RIGHT, OUTER, FULL, CROSS, NATURAL,
GROUP, ORDER, HAVING, UNION, EXCEPT, INTERSECT,
WITH, RECURSIVE, AS, ON, USING,
WINDOW, PARTITION, OVER, ROWS, RANGE, etc.
Data manipulation modifiers:
DISTINCT, ALL, FETCH, FIRST, NEXT, LAST, ONLY,
WITH TIES, NULLS, LIMIT, OFFSET, etc.
Multi-word keywords recognized as single tokens:
GROUP BY, ORDER BY,
LEFT JOIN, RIGHT JOIN, FULL JOIN, CROSS JOIN, NATURAL JOIN,
INNER JOIN, LEFT OUTER JOIN, RIGHT OUTER JOIN, FULL OUTER JOIN,
UNION ALL, WITH TIES, NULLS FIRST, NULLS LAST, etc.
Window function names and modifiers:
ROW_NUMBER, RANK, DENSE_RANK, NTILE, PERCENT_RANK, CUME_DIST,
LAG, LEAD, FIRST_VALUE, LAST_VALUE, NTH_VALUE,
ROWS BETWEEN, RANGE BETWEEN, UNBOUNDED PRECEDING, CURRENT ROW, etc.
pgKeywords := []string{
"MATERIALIZED", // Materialized views
"ILIKE", // Case-insensitive LIKE
"SIMILAR", // SIMILAR TO operator
"FREEZE", // VACUUM FREEZE
"ANALYSE", "ANALYZE", // Statistics gathering
"CONCURRENTLY", // Concurrent operations
"REINDEX", // Index rebuilding
"TOAST", // TOAST storage
"NOWAIT", // Lock timeout
"RECURSIVE", // Recursive CTEs
"RETURNING", // RETURNING clause
}mysqlKeywords := []string{
"BINARY", // Binary collation
"CHAR", "VARCHAR", // Character types
"DATETIME", // DateTime type
"DECIMAL", // Decimal type
"UNSIGNED", // Unsigned modifier
"ZEROFILL", // Zero-fill display
"FORCE", // Force index
"IGNORE", // Ignore errors
"INDEX", "KEY", // Index keywords
"KILL", // Kill query
"OPTION", // Query options
"PURGE", // Purge logs
"READ", "WRITE", // Lock types
"STATUS", // Show status
"VARIABLES", // Show variables
}sqliteKeywords := []string{
"ABORT", // Transaction abort
"ACTION", // Foreign key action
"AFTER", // Trigger timing
"ATTACH", // Attach database
"AUTOINCREMENT", // Auto-increment
"CONFLICT", // Conflict resolution
"DATABASE", // Database keyword
"DETACH", // Detach database
"EXCLUSIVE", // Exclusive lock
"INDEXED", // Index hints
"INSTEAD", // INSTEAD OF trigger
"PLAN", // Query plan
"QUERY", // Query keyword
"RAISE", // Raise error
"REPLACE", // Replace operation
"TEMP", "TEMPORARY", // Temporary objects
"VACUUM", // Database vacuum
"VIRTUAL", // Virtual tables
}Create a keyword registry for a specific dialect:
func New(dialect SQLDialect) *KeywordsCheck if a word is a SQL keyword:
func (k *Keywords) IsKeyword(word string) boolCheck if a keyword is reserved:
func (k *Keywords) IsReserved(word string) boolGet detailed keyword information:
func (k *Keywords) GetKeyword(word string) *KeywordGet the token type for a keyword:
func (k *Keywords) GetTokenType(word string) stringCheck if two words form a compound keyword:
func (k *Keywords) IsCompoundKeyword(word1, word2 string) boolGet the token type for a compound keyword:
func (k *Keywords) GetCompoundKeywordType(word1, word2 string) stringAdd a custom keyword (for extensions):
func (k *Keywords) AddKeyword(word string, tokenType string, category KeywordCategory)The keywords package is used by the tokenizer to identify SQL keywords:
// In tokenizer
kw := keywords.New(keywords.PostgreSQL)
// Check if identifier is actually a keyword
if kw.IsKeyword(identifierText) {
tokenType = kw.GetTokenType(identifierText)
} else {
tokenType = "IDENTIFIER"
}The parser uses keyword information for syntax validation:
// Check if next token is a specific keyword
if p.currentToken.Type == "GROUP" {
// Expecting "BY" for GROUP BY
if p.peekToken.Type == "BY" {
// Parse GROUP BY clause
}
}All keyword matching is case-insensitive:
kw := keywords.New(keywords.Generic)
kw.IsKeyword("SELECT") // true
kw.IsKeyword("select") // true
kw.IsKeyword("Select") // true
kw.IsKeyword("SeLeCt") // true- Lookup Time: O(1) hash map lookups
- Memory: Pre-allocated keyword maps
- Thread-Safe: No synchronization overhead for reads
- Cache-Friendly: Keywords stored in contiguous memory
func ValidateIdentifier(name string) error {
kw := keywords.New(keywords.PostgreSQL)
if kw.IsReserved(name) {
return fmt.Errorf("%s is a reserved keyword", name)
}
return nil
}func FormatKeyword(word string, style string) string {
kw := keywords.New(keywords.Generic)
if !kw.IsKeyword(word) {
return word // Not a keyword, return as-is
}
switch style {
case "upper":
return strings.ToUpper(word)
case "lower":
return strings.ToLower(word)
case "title":
return strings.Title(strings.ToLower(word))
default:
return word
}
}func HighlightSQL(sql string) string {
kw := keywords.New(keywords.Generic)
words := strings.Fields(sql)
for i, word := range words {
if kw.IsKeyword(word) {
words[i] = fmt.Sprintf("<keyword>%s</keyword>", word)
}
}
return strings.Join(words, " ")
}Run keyword tests:
# All tests
go test -v ./pkg/sql/keywords/
# With race detection
go test -race ./pkg/sql/keywords/
# Specific dialects
go test -v -run TestPostgreSQLKeywords ./pkg/sql/keywords/
go test -v -run TestMySQLKeywords ./pkg/sql/keywords/
go test -v -run TestCompoundKeywords ./pkg/sql/keywords/// GOOD: Create once at package level
var globalKeywords = keywords.New(keywords.PostgreSQL)
func IsKeyword(word string) bool {
return globalKeywords.IsKeyword(word)
}
// BAD: Creating repeatedly
func IsKeyword(word string) bool {
kw := keywords.New(keywords.PostgreSQL) // Wasteful
return kw.IsKeyword(word)
}// Match your database
pgKeywords := keywords.New(keywords.PostgreSQL) // For PostgreSQL
myKeywords := keywords.New(keywords.MySQL) // For MySQL
genericKeywords := keywords.New(keywords.Generic) // For SQL-99 standardfunc ValidateTableName(name string) error {
kw := keywords.New(keywords.PostgreSQL)
if kw.IsReserved(name) {
return fmt.Errorf("'%s' is a reserved keyword and cannot be used as a table name", name)
}
return nil
}- tokenizer: Uses keywords for token classification
- parser: Uses keywords for syntax validation
- models: Token type definitions
- v1.5.0: Added NULLS FIRST/LAST keywords
- v1.4.0: Expanded PostgreSQL operator support
- v1.3.0: Window function keywords
- v1.2.0: CTE and set operation keywords
- v1.0.0: Core keyword system with multi-dialect support