Skip to content

Commit cd894f8

Browse files
authored
feat(spanner/spansql): Add support for tokenlist and create search index (#11522)
Adds support for parsing `tokenlist` and `create search index` in spanner/spansql. Fixes https://togithub.com/googleapis/google-cloud-go/issues/11466
1 parent 7cd2512 commit cd894f8

File tree

6 files changed

+648
-8
lines changed

6 files changed

+648
-8
lines changed

spanner/spansql/keywords.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,11 @@ var keywords = map[string]bool{
127127

128128
// funcs is the set of reserved keywords that are functions.
129129
// https://cloud.google.com/spanner/docs/functions-and-operators
130-
var funcs = make(map[string]bool)
131-
var funcArgParsers = make(map[string]func(*parser) (Expr, *parseError))
132-
var aggregateFuncs = make(map[string]bool)
130+
var (
131+
funcs = make(map[string]bool)
132+
funcArgParsers = make(map[string]func(*parser) (Expr, *parseError))
133+
aggregateFuncs = make(map[string]bool)
134+
)
133135

134136
func init() {
135137
for _, f := range funcNames {
@@ -153,6 +155,11 @@ func init() {
153155
// Special case of SEQUENCE arg for GET_NEXT_SEQUENCE_VALUE, GET_INTERNAL_SEQUENCE_STATE
154156
funcArgParsers["GET_NEXT_SEQUENCE_VALUE"] = sequenceArgParser
155157
funcArgParsers["GET_INTERNAL_SEQUENCE_STATE"] = sequenceArgParser
158+
// Special case for tokenization, which uses `[, key => value]` definitions
159+
funcArgParsers["TOKENIZE_FULLTEXT"] = tokenDefinitionArgParser
160+
funcArgParsers["TOKENIZE_NGRAMS"] = tokenDefinitionArgParser
161+
funcArgParsers["TOKENIZE_NUMBER"] = tokenDefinitionArgParser
162+
funcArgParsers["TOKENIZE_SUBSTRING"] = tokenDefinitionArgParser
156163
}
157164

158165
var funcNames = []string{
@@ -234,6 +241,15 @@ var funcNames = []string{
234241
"TRIM",
235242
"UPPER",
236243

244+
// Token functions.
245+
"TOKEN",
246+
"TOKENIZE_BOOL",
247+
"TOKENIZE_FULLTEXT",
248+
"TOKENIZE_NGRAMS",
249+
"TOKENIZE_NUMBER",
250+
"TOKENIZE_SUBSTRING",
251+
"TOKENLIST_CONCAT",
252+
237253
// Array functions.
238254
"ARRAY",
239255
"ARRAY_CONCAT",

spanner/spansql/parser.go

Lines changed: 258 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,24 @@ func (p *parser) sniff(want ...string) bool {
938938
return true
939939
}
940940

941+
// sniff reports whether the next N+skip tokens are as specified.
942+
func (p *parser) sniff_ahead(skip int, want ...string) bool {
943+
// Store current parser state and restore on the way out.
944+
orig := *p
945+
defer func() { *p = orig }()
946+
947+
for i := 0; i < skip; i++ {
948+
p.next()
949+
}
950+
951+
for _, w := range want {
952+
if !p.next().caseEqual(w) {
953+
return false
954+
}
955+
}
956+
return true
957+
}
958+
941959
// sniffTokenType reports whether the next token type is as specified.
942960
func (p *parser) sniffTokenType(want tokenType) bool {
943961
orig := *p
@@ -980,7 +998,7 @@ func (p *parser) parseDDLStmt() (DDLStmt, *parseError) {
980998

981999
/*
9821000
statement:
983-
{ create_database | create_table | create_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream }
1001+
{ create_database | create_table | create_index | create_search_index | alter_table | drop_table | rename_table | drop_index | create_change_stream | alter_change_stream | drop_change_stream }
9841002
*/
9851003

9861004
// TODO: support create_database
@@ -991,6 +1009,9 @@ func (p *parser) parseDDLStmt() (DDLStmt, *parseError) {
9911009
} else if p.sniff("CREATE", "INDEX") || p.sniff("CREATE", "UNIQUE", "INDEX") || p.sniff("CREATE", "NULL_FILTERED", "INDEX") || p.sniff("CREATE", "UNIQUE", "NULL_FILTERED", "INDEX") {
9921010
ci, err := p.parseCreateIndex()
9931011
return ci, err
1012+
} else if p.sniff("CREATE", "SEARCH", "INDEX") {
1013+
ci, err := p.parseCreateSearchIndex()
1014+
return ci, err
9941015
} else if p.sniff("CREATE", "VIEW") || p.sniff("CREATE", "OR", "REPLACE", "VIEW") {
9951016
cv, err := p.parseCreateView()
9961017
return cv, err
@@ -1361,6 +1382,144 @@ func (p *parser) parseCreateIndex() (*CreateIndex, *parseError) {
13611382
return ci, nil
13621383
}
13631384

1385+
func (p *parser) parseCreateSearchIndex() (*CreateSearchIndex, *parseError) {
1386+
debugf("parseCreateSearchIndex: %v", p)
1387+
1388+
/*
1389+
CREATE SEARCH INDEX index_name
1390+
ON table_name ( token_column_list )
1391+
[ storing_clause ] [ partition_clause ]
1392+
[ orderby_clause ] [ where_clause ]
1393+
[ interleave_clause ] [ options_clause ]
1394+
1395+
where index_name is:
1396+
{a—z|A—Z}[{a—z|A—Z|0—9|_}+]
1397+
1398+
and token_column_list is:
1399+
column_name [, ...]
1400+
1401+
and storing_clause is:
1402+
STORING ( column_name [, ...] )
1403+
1404+
and partition_clause is:
1405+
PARTITION BY column_name [, ...]
1406+
1407+
and orderby_clause is:
1408+
ORDER BY column_name [ {ASC | DESC} ] [, column_name [ {ASC | DESC} ]]
1409+
1410+
and where_clause is:
1411+
WHERE column_name IS NOT NULL [AND ...]
1412+
1413+
and interleave_clause is:
1414+
, INTERLEAVE IN table_name
1415+
1416+
and options_clause is:
1417+
OPTIONS ( option_name=option_value [, ...] )
1418+
1419+
*/
1420+
1421+
if err := p.expect("CREATE"); err != nil {
1422+
return nil, err
1423+
}
1424+
pos := p.Pos()
1425+
if err := p.expect("SEARCH", "INDEX"); err != nil {
1426+
return nil, err
1427+
}
1428+
1429+
// Parse the index name
1430+
iname, err := p.parseTableOrIndexOrColumnName()
1431+
if err != nil {
1432+
return nil, err
1433+
}
1434+
1435+
// Parse the table name
1436+
if err := p.expect("ON"); err != nil {
1437+
return nil, err
1438+
}
1439+
tname, err := p.parseTableOrIndexOrColumnName()
1440+
if err != nil {
1441+
return nil, err
1442+
}
1443+
ci := &CreateSearchIndex{
1444+
Name: iname,
1445+
Table: tname,
1446+
Position: pos,
1447+
}
1448+
ci.Columns, err = p.parseKeyPartList()
1449+
if err != nil {
1450+
return nil, err
1451+
}
1452+
1453+
if p.eat("STORING") {
1454+
ci.Storing, err = p.parseColumnNameList()
1455+
if err != nil {
1456+
return nil, err
1457+
}
1458+
}
1459+
1460+
if p.eat("PARTITION", "BY") {
1461+
ci.PartitionBy, err = p.parseIDList()
1462+
if err != nil {
1463+
return nil, err
1464+
}
1465+
}
1466+
1467+
if p.eat("ORDER", "BY") {
1468+
for {
1469+
o, err := p.parseOrder()
1470+
if err != nil {
1471+
return nil, err
1472+
}
1473+
ci.OrderBy = append(ci.OrderBy, o)
1474+
1475+
if p.sniff(",", "INTERLEAVE", "IN") {
1476+
break
1477+
}
1478+
1479+
if !p.eat(",") {
1480+
break
1481+
}
1482+
1483+
}
1484+
}
1485+
1486+
if p.eat("WHERE") {
1487+
for {
1488+
name, err := p.parseTableOrIndexOrColumnName()
1489+
if err != nil {
1490+
return nil, err
1491+
}
1492+
if err := p.expect("IS", "NOT", "NULL"); err != nil {
1493+
return nil, err
1494+
}
1495+
ci.WhereIsNotNull = append(ci.WhereIsNotNull, name)
1496+
1497+
if !p.sniff("AND") {
1498+
break
1499+
}
1500+
if err := p.expect("AND"); err != nil {
1501+
return nil, err
1502+
}
1503+
}
1504+
}
1505+
1506+
if p.eat(",", "INTERLEAVE", "IN") {
1507+
ci.Interleave, err = p.parseTableOrIndexOrColumnName()
1508+
if err != nil {
1509+
return nil, err
1510+
}
1511+
}
1512+
1513+
if p.sniff("OPTIONS") {
1514+
ci.Options, err = p.parseSearchIndexOptions()
1515+
if err != nil {
1516+
return nil, err
1517+
}
1518+
}
1519+
1520+
return ci, nil
1521+
}
1522+
13641523
func (p *parser) parseCreateView() (*CreateView, *parseError) {
13651524
debugf("parseCreateView: %v", p)
13661525

@@ -1564,6 +1723,7 @@ func (p *parser) parseRevokeRole() (*RevokeRole, *parseError) {
15641723

15651724
return r, nil
15661725
}
1726+
15671727
func (p *parser) parseGrantOrRevokeRoleList(end string) ([]ID, *parseError) {
15681728
var roleList []ID
15691729
f := func(p *parser) *parseError {
@@ -1626,6 +1786,7 @@ func (p *parser) parsePrivileges() ([]Privilege, *parseError) {
16261786
}
16271787
return privs, nil
16281788
}
1789+
16291790
func (p *parser) parseAlterTable() (*AlterTable, *parseError) {
16301791
debugf("parseAlterTable: %v", p)
16311792

@@ -2054,7 +2215,7 @@ func (p *parser) parseColumnDef() (ColumnDef, *parseError) {
20542215

20552216
/*
20562217
column_def:
2057-
column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) STORED}] [options_def]
2218+
column_name {scalar_type | array_type} [NOT NULL] [{DEFAULT ( expression ) | AS ( expression ) {STORED | HIDDEN}}] [options_def]
20582219
*/
20592220

20602221
name, err := p.parseTableOrIndexOrColumnName()
@@ -2091,8 +2252,12 @@ func (p *parser) parseColumnDef() (ColumnDef, *parseError) {
20912252
if err := p.expect(")"); err != nil {
20922253
return ColumnDef{}, err
20932254
}
2094-
if err := p.expect("STORED"); err != nil {
2095-
return ColumnDef{}, err
2255+
if p.eat("HIDDEN") {
2256+
cd.Hidden = true
2257+
} else {
2258+
if err := p.expect("STORED"); err != nil {
2259+
return ColumnDef{}, err
2260+
}
20962261
}
20972262
}
20982263

@@ -2163,6 +2328,70 @@ func (p *parser) parseColumnAlteration() (ColumnAlteration, *parseError) {
21632328
return sct, nil
21642329
}
21652330

2331+
func (p *parser) parseSearchIndexOptions() (SearchIndexOptions, *parseError) {
2332+
debugf("parseSearchIndexOptions: %v", p)
2333+
/*
2334+
options_def:
2335+
OPTIONS (sort_order_sharding = { true | false }, disable_automatic_uid_column = { true | false })
2336+
*/
2337+
2338+
if err := p.expect("OPTIONS"); err != nil {
2339+
return SearchIndexOptions{}, err
2340+
}
2341+
if err := p.expect("("); err != nil {
2342+
return SearchIndexOptions{}, err
2343+
}
2344+
2345+
// TODO: Figure out if column options are case insensitive.
2346+
// We ignore case for the key (because it is easier) but not the value.
2347+
var opts SearchIndexOptions
2348+
for {
2349+
if p.eat("sort_order_sharding", "=") {
2350+
tok := p.next()
2351+
if tok.err != nil {
2352+
return SearchIndexOptions{}, tok.err
2353+
}
2354+
sortOrderSharding := new(bool)
2355+
switch tok.value {
2356+
case "true":
2357+
*sortOrderSharding = true
2358+
case "false":
2359+
*sortOrderSharding = false
2360+
default:
2361+
return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value)
2362+
}
2363+
opts.SortOrderSharding = sortOrderSharding
2364+
} else if p.eat("disable_automatic_uid_column", "=") {
2365+
tok := p.next()
2366+
if tok.err != nil {
2367+
return SearchIndexOptions{}, tok.err
2368+
}
2369+
disableAutomaticUIDColumn := new(bool)
2370+
switch tok.value {
2371+
case "true":
2372+
*disableAutomaticUIDColumn = true
2373+
case "false":
2374+
*disableAutomaticUIDColumn = false
2375+
default:
2376+
return SearchIndexOptions{}, p.errorf("got %q, want true or false", tok.value)
2377+
}
2378+
opts.DisableAutomaticUIDColumn = disableAutomaticUIDColumn
2379+
}
2380+
if p.sniff(")") {
2381+
break
2382+
}
2383+
if !p.eat(",") {
2384+
return SearchIndexOptions{}, p.errorf("missing ',' in options list")
2385+
}
2386+
}
2387+
2388+
if err := p.expect(")"); err != nil {
2389+
return SearchIndexOptions{}, err
2390+
}
2391+
2392+
return opts, nil
2393+
}
2394+
21662395
func (p *parser) parseColumnOptions() (ColumnOptions, *parseError) {
21672396
debugf("parseColumnOptions: %v", p)
21682397
/*
@@ -2891,6 +3120,7 @@ func (p *parser) parseCreateSequence() (*CreateSequence, *parseError) {
28913120

28923121
return cs, nil
28933122
}
3123+
28943124
func (p *parser) parseCreateProtoBundle() (*CreateProtoBundle, *parseError) {
28953125
debugf("parseCreateProtoBundle: %v", p)
28963126

@@ -3107,6 +3337,7 @@ var baseTypes = map[string]TypeBase{
31073337
"JSON": JSON,
31083338
"PROTO": Proto, // for use in CAST
31093339
"ENUM": Enum, // for use in CAST
3340+
"TOKENLIST": Tokenlist,
31103341
}
31113342

31123343
func (p *parser) parseBaseType() (Type, *parseError) {
@@ -3919,6 +4150,29 @@ var sequenceArgParser = func(p *parser) (Expr, *parseError) {
39194150
return p.parseExpr()
39204151
}
39214152

4153+
var tokenDefinitionArgParser = func(p *parser) (Expr, *parseError) {
4154+
if p.sniff_ahead(1, "=", ">") {
4155+
tok := p.next()
4156+
if tok.err != nil {
4157+
return DefinitionExpr{}, tok.err
4158+
}
4159+
definition := tok.value
4160+
if err := p.expect("=", ">"); err != nil {
4161+
return DefinitionExpr{}, err
4162+
}
4163+
value, err := p.parseExpr()
4164+
if err != nil {
4165+
return DefinitionExpr{}, err
4166+
}
4167+
return DefinitionExpr{
4168+
Key: definition,
4169+
Value: value,
4170+
}, nil
4171+
} else {
4172+
return p.parseExpr()
4173+
}
4174+
}
4175+
39224176
func (p *parser) parseAggregateFunc() (Func, *parseError) {
39234177
tok := p.next()
39244178
if tok.err != nil {

0 commit comments

Comments
 (0)