From 5c372def69b5d16f86acc82e6dbeb2010e61b587 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sat, 14 Mar 2026 08:04:38 +0800 Subject: [PATCH 1/6] feat: introduce go binding --- .github/workflows/ci.yml | 6 +- bindings/go/Makefile | 61 ++++++++ bindings/go/catalog.go | 112 ++++++++++++++ bindings/go/embed_darwin_amd64.go | 31 ++++ bindings/go/embed_darwin_arm64.go | 31 ++++ bindings/go/embed_linux_amd64.go | 31 ++++ bindings/go/embed_linux_arm64.go | 31 ++++ bindings/go/error.go | 84 +++++++++++ bindings/go/ffi.go | 104 +++++++++++++ bindings/go/go.mod | 10 ++ bindings/go/go.sum | 8 + bindings/go/identifier.go | 88 +++++++++++ bindings/go/lib.go | 78 ++++++++++ bindings/go/paimon.go | 72 +++++++++ bindings/go/plan.go | 51 +++++++ bindings/go/read_builder.go | 107 +++++++++++++ bindings/go/table.go | 79 ++++++++++ bindings/go/table_read.go | 171 +++++++++++++++++++++ bindings/go/table_scan.go | 79 ++++++++++ bindings/go/tests/go.mod | 27 ++++ bindings/go/tests/go.sum | 57 +++++++ bindings/go/tests/paimon_test.go | 166 ++++++++++++++++++++ bindings/go/types.go | 243 ++++++++++++++++++++++++++++++ bindings/go/util_unix.go | 52 +++++++ 24 files changed, 1778 insertions(+), 1 deletion(-) create mode 100644 bindings/go/Makefile create mode 100644 bindings/go/catalog.go create mode 100644 bindings/go/embed_darwin_amd64.go create mode 100644 bindings/go/embed_darwin_arm64.go create mode 100644 bindings/go/embed_linux_amd64.go create mode 100644 bindings/go/embed_linux_arm64.go create mode 100644 bindings/go/error.go create mode 100644 bindings/go/ffi.go create mode 100644 bindings/go/go.mod create mode 100644 bindings/go/go.sum create mode 100644 bindings/go/identifier.go create mode 100644 bindings/go/lib.go create mode 100644 bindings/go/paimon.go create mode 100644 bindings/go/plan.go create mode 100644 bindings/go/read_builder.go create mode 100644 bindings/go/table.go create mode 100644 bindings/go/table_read.go create mode 100644 bindings/go/table_scan.go create mode 100644 bindings/go/tests/go.mod create mode 100644 bindings/go/tests/go.sum create mode 100644 bindings/go/tests/paimon_test.go create mode 100644 bindings/go/types.go create mode 100644 bindings/go/util_unix.go diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 42128d6..73f47c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -90,12 +90,16 @@ jobs: - name: Start Docker containers run: make docker-up - - name: Integration Test + - name: Rust Integration Test run: cargo test -p paimon-integration-tests --all-targets env: RUST_LOG: DEBUG RUST_BACKTRACE: full + - name: Go Integration Test + working-directory: bindings/go + run: make test + - name: Stop Docker containers if: always() run: make docker-down diff --git a/bindings/go/Makefile b/bindings/go/Makefile new file mode 100644 index 0000000..b60762a --- /dev/null +++ b/bindings/go/Makefile @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +RUST_ROOT := $(shell cd ../.. && pwd) +TARGET_DIR := $(RUST_ROOT)/target +GO_DIR := $(shell pwd) + +# Detect platform +UNAME_S := $(shell uname -s) +UNAME_M := $(shell uname -m) + +ifeq ($(UNAME_S),Darwin) + LIB_EXT := dylib + OS := darwin +else + LIB_EXT := so + OS := linux +endif + +ifeq ($(UNAME_M),arm64) + ARCH := arm64 +else ifeq ($(UNAME_M),aarch64) + ARCH := arm64 +else + ARCH := amd64 +endif + +LIB_NAME := libpaimon_c.$(OS).$(ARCH).$(LIB_EXT) + +.PHONY: build test clean + +# Build the Rust shared library, compress with zstd, and place in Go package dir. +build: + cd $(RUST_ROOT) && cargo build -p paimon-c --release + zstd -19 -f $(TARGET_DIR)/release/libpaimon_c.$(LIB_EXT) -o $(GO_DIR)/$(LIB_NAME).zst + +# Run Go integration tests. +# Requires test data: run 'make docker-up' from the repo root first. +# CGO is needed by arrow-go's cdata package used in tests. +PAIMON_TEST_WAREHOUSE ?= /tmp/paimon-warehouse + +test: build + cd $(GO_DIR)/tests && PAIMON_TEST_WAREHOUSE=$(PAIMON_TEST_WAREHOUSE) go test -v ./... + +clean: + cd $(RUST_ROOT) && cargo clean + rm -f $(GO_DIR)/libpaimon_c.*.zst diff --git a/bindings/go/catalog.go b/bindings/go/catalog.go new file mode 100644 index 0000000..8da8e6a --- /dev/null +++ b/bindings/go/catalog.go @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// Catalog wraps a paimon FileSystemCatalog. +type Catalog struct { + ctx context.Context + inner *paimonCatalog +} + +// NewFileSystemCatalog creates a new FileSystemCatalog for the given warehouse path. +func (p *Paimon) NewFileSystemCatalog(warehouse string) (*Catalog, error) { + createFn := ffiCatalogNew.symbol(p.ctx) + inner, err := createFn(warehouse) + if err != nil { + return nil, err + } + return &Catalog{ctx: p.ctx, inner: inner}, nil +} + +// Close releases the catalog resources. +func (c *Catalog) Close() { + ffiCatalogFree.symbol(c.ctx)(c.inner) +} + +// GetTable retrieves a table from the catalog using the given identifier. +func (c *Catalog) GetTable(id *Identifier) (*Table, error) { + getFn := ffiCatalogGetTable.symbol(c.ctx) + inner, err := getFn(c.inner, id.inner) + if err != nil { + return nil, err + } + return &Table{ctx: c.ctx, inner: inner}, nil +} + +var ffiCatalogNew = newFFI(ffiOpts{ + sym: "paimon_catalog_new", + rType: &typeResultCatalogNew, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(warehouse string) (*paimonCatalog, error) { + return func(warehouse string) (*paimonCatalog, error) { + byteWarehouse, err := BytePtrFromString(warehouse) + if err != nil { + return nil, err + } + var result resultCatalogNew + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&byteWarehouse), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.catalog, nil + } +}) + +var ffiCatalogFree = newFFI(ffiOpts{ + sym: "paimon_catalog_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(catalog *paimonCatalog) { + return func(catalog *paimonCatalog) { + ffiCall( + nil, + unsafe.Pointer(&catalog), + ) + } +}) + +var ffiCatalogGetTable = newFFI(ffiOpts{ + sym: "paimon_catalog_get_table", + rType: &typeResultGetTable, + aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(catalog *paimonCatalog, id *paimonIdentifier) (*paimonTable, error) { + return func(catalog *paimonCatalog, id *paimonIdentifier) (*paimonTable, error) { + var result resultGetTable + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&catalog), + unsafe.Pointer(&id), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.table, nil + } +}) diff --git a/bindings/go/embed_darwin_amd64.go b/bindings/go/embed_darwin_amd64.go new file mode 100644 index 0000000..304816e --- /dev/null +++ b/bindings/go/embed_darwin_amd64.go @@ -0,0 +1,31 @@ +//go:build darwin && amd64 + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import _ "embed" + +//go:embed libpaimon_c.darwin.amd64.dylib.zst +var libPaimonZst []byte + +func tempFilePattern() string { + return "libpaimon_c*.dylib" +} diff --git a/bindings/go/embed_darwin_arm64.go b/bindings/go/embed_darwin_arm64.go new file mode 100644 index 0000000..304b3a0 --- /dev/null +++ b/bindings/go/embed_darwin_arm64.go @@ -0,0 +1,31 @@ +//go:build darwin && arm64 + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import _ "embed" + +//go:embed libpaimon_c.darwin.arm64.dylib.zst +var libPaimonZst []byte + +func tempFilePattern() string { + return "libpaimon_c*.dylib" +} diff --git a/bindings/go/embed_linux_amd64.go b/bindings/go/embed_linux_amd64.go new file mode 100644 index 0000000..1f07c3f --- /dev/null +++ b/bindings/go/embed_linux_amd64.go @@ -0,0 +1,31 @@ +//go:build linux && amd64 + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import _ "embed" + +//go:embed libpaimon_c.linux.amd64.so.zst +var libPaimonZst []byte + +func tempFilePattern() string { + return "libpaimon_c*.so" +} diff --git a/bindings/go/embed_linux_arm64.go b/bindings/go/embed_linux_arm64.go new file mode 100644 index 0000000..16bcc99 --- /dev/null +++ b/bindings/go/embed_linux_arm64.go @@ -0,0 +1,31 @@ +//go:build linux && arm64 + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import _ "embed" + +//go:embed libpaimon_c.linux.arm64.so.zst +var libPaimonZst []byte + +func tempFilePattern() string { + return "libpaimon_c*.so" +} diff --git a/bindings/go/error.go b/bindings/go/error.go new file mode 100644 index 0000000..710ed83 --- /dev/null +++ b/bindings/go/error.go @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "fmt" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// ErrorCode represents categories of errors from paimon. +type ErrorCode int32 + +const ( + CodeUnexpected ErrorCode = 0 + CodeUnsupported ErrorCode = 1 + CodeNotFound ErrorCode = 2 + CodeAlreadyExist ErrorCode = 3 + CodeInvalidInput ErrorCode = 4 + CodeIoError ErrorCode = 5 +) + +func parseError(ctx context.Context, err *paimonError) error { + if err == nil { + return nil + } + defer ffiErrorFree.symbol(ctx)(err) + return &Error{ + code: ErrorCode(err.code), + message: string(parseBytes(err.message)), + } +} + +// Error represents a paimon error with code and message. +type Error struct { + code ErrorCode + message string +} + +func (e *Error) Error() string { + return fmt.Sprintf("paimon error(%d): %s", e.code, e.message) +} + +// Code returns the error code. +func (e *Error) Code() ErrorCode { + return e.code +} + +// Message returns the error message. +func (e *Error) Message() string { + return e.message +} + +var ffiErrorFree = newFFI(ffiOpts{ + sym: "paimon_error_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(e *paimonError) { + return func(e *paimonError) { + ffiCall( + nil, + unsafe.Pointer(&e), + ) + } +}) diff --git a/bindings/go/ffi.go b/bindings/go/ffi.go new file mode 100644 index 0000000..dbcdeb8 --- /dev/null +++ b/bindings/go/ffi.go @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "errors" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +type ffiOpts struct { + sym contextKey + rType *ffi.Type + aTypes []*ffi.Type +} + +type ffiCall func(rValue unsafe.Pointer, aValues ...unsafe.Pointer) + +type contextKey string + +func (c contextKey) String() string { + return string(c) +} + +type contextWithFFI func(ctx context.Context, lib uintptr) (context.Context, error) + +// FFI is a generic type-safe wrapper for a foreign function. +type FFI[T any] struct { + opts ffiOpts + withFunc func(ctx context.Context, ffiCall ffiCall) T +} + +func newFFI[T any](opts ffiOpts, withFunc func(ctx context.Context, ffiCall ffiCall) T) *FFI[T] { + f := &FFI[T]{ + opts: opts, + withFunc: withFunc, + } + withFFIs = append(withFFIs, f.withFFI) + return f +} + +func (f *FFI[T]) symbol(ctx context.Context) T { + return ctx.Value(f.opts.sym).(T) +} + +func (f *FFI[T]) withFFI(ctx context.Context, lib uintptr) (context.Context, error) { + var cif ffi.Cif + if status := ffi.PrepCif( + &cif, + ffi.DefaultAbi, + uint32(len(f.opts.aTypes)), + f.opts.rType, + f.opts.aTypes..., + ); status != ffi.OK { + return nil, errors.New(status.String()) + } + fn, err := GetProcAddress(lib, f.opts.sym.String()) + if err != nil { + return nil, err + } + val := f.withFunc(ctx, func(rValue unsafe.Pointer, aValues ...unsafe.Pointer) { + ffi.Call(&cif, fn, rValue, aValues...) + }) + return context.WithValue(ctx, f.opts.sym, val), nil +} + +var withFFIs []contextWithFFI + +func newContext(path string) (ctx context.Context, cancel context.CancelFunc, err error) { + lib, err := LoadLibrary(path) + if err != nil { + return + } + ctx = context.Background() + for _, withFFI := range withFFIs { + ctx, err = withFFI(ctx, lib) + if err != nil { + return + } + } + cancel = func() { + _ = FreeLibrary(lib) + } + return +} diff --git a/bindings/go/go.mod b/bindings/go/go.mod new file mode 100644 index 0000000..ddeeed9 --- /dev/null +++ b/bindings/go/go.mod @@ -0,0 +1,10 @@ +module github.com/apache/paimon-rust/bindings/go + +go 1.22.4 + +require ( + github.com/ebitengine/purego v0.8.4 + github.com/jupiterrider/ffi v0.5.0 + github.com/klauspost/compress v1.17.9 + golang.org/x/sys v0.24.0 +) diff --git a/bindings/go/go.sum b/bindings/go/go.sum new file mode 100644 index 0000000..a3373ec --- /dev/null +++ b/bindings/go/go.sum @@ -0,0 +1,8 @@ +github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= +github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/jupiterrider/ffi v0.5.0 h1:j2nSgpabbV1JOwgP4Kn449sJUHq3cVLAZVBoOYn44V8= +github.com/jupiterrider/ffi v0.5.0/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= diff --git a/bindings/go/identifier.go b/bindings/go/identifier.go new file mode 100644 index 0000000..8018b12 --- /dev/null +++ b/bindings/go/identifier.go @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// Identifier identifies a table by database and object name. +type Identifier struct { + ctx context.Context + inner *paimonIdentifier +} + +// NewIdentifier creates a new Identifier with the given database and object name. +func (p *Paimon) NewIdentifier(database, object string) (*Identifier, error) { + createFn := ffiIdentifierNew.symbol(p.ctx) + inner, err := createFn(database, object) + if err != nil { + return nil, err + } + return &Identifier{ctx: p.ctx, inner: inner}, nil +} + +// Close releases the identifier resources. +func (id *Identifier) Close() { + ffiIdentifierFree.symbol(id.ctx)(id.inner) +} + +var ffiIdentifierNew = newFFI(ffiOpts{ + sym: "paimon_identifier_new", + rType: &typeResultIdentifierNew, + aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(database, object string) (*paimonIdentifier, error) { + return func(database, object string) (*paimonIdentifier, error) { + byteDB, err := BytePtrFromString(database) + if err != nil { + return nil, err + } + byteObj, err := BytePtrFromString(object) + if err != nil { + return nil, err + } + var result resultIdentifierNew + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&byteDB), + unsafe.Pointer(&byteObj), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.identifier, nil + } +}) + +var ffiIdentifierFree = newFFI(ffiOpts{ + sym: "paimon_identifier_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(id *paimonIdentifier) { + return func(id *paimonIdentifier) { + ffiCall( + nil, + unsafe.Pointer(&id), + ) + } +}) diff --git a/bindings/go/lib.go b/bindings/go/lib.go new file mode 100644 index 0000000..1100ec9 --- /dev/null +++ b/bindings/go/lib.go @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "bytes" + "fmt" + "io" + "os" + "sync" + + "github.com/klauspost/compress/zstd" +) + +var ( + libOnce sync.Once + libPath string + libErr error +) + +// loadEmbeddedLib decompresses the embedded shared library and writes it +// to a temp file. Called once via sync.Once. +func loadEmbeddedLib() error { + libOnce.Do(func() { + data, err := decompressLib(libPaimonZst) + if err != nil { + libErr = fmt.Errorf("paimon: failed to decompress embedded library: %w", err) + return + } + libPath, err = writeTempExec(tempFilePattern(), data) + if err != nil { + libErr = fmt.Errorf("paimon: failed to write temp library: %w", err) + return + } + }) + return libErr +} + +func decompressLib(raw []byte) ([]byte, error) { + decoder, err := zstd.NewReader(bytes.NewReader(raw)) + if err != nil { + return nil, err + } + defer decoder.Close() + return io.ReadAll(decoder) +} + +func writeTempExec(pattern string, binary []byte) (string, error) { + f, err := os.CreateTemp("", pattern) + if err != nil { + return "", err + } + defer f.Close() + if _, err = f.Write(binary); err != nil { + return "", err + } + if err = f.Chmod(os.ModePerm); err != nil { + return "", err + } + return f.Name(), nil +} diff --git a/bindings/go/paimon.go b/bindings/go/paimon.go new file mode 100644 index 0000000..4caa133 --- /dev/null +++ b/bindings/go/paimon.go @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +// Package paimon provides a Go binding for Apache Paimon Rust. +// +// This binding uses purego and libffi to call into the paimon-c shared library +// without requiring CGO. The pre-built shared library is embedded in the +// package and automatically loaded at runtime — no manual build step needed. +// +// Basic usage: +// +// p, err := paimon.Open() +// if err != nil { +// log.Fatal(err) +// } +// defer p.Close() +// +// catalog, err := p.NewFileSystemCatalog("/path/to/warehouse") +// ... +package paimon + +import ( + "context" +) + +// Paimon is the entry point for all paimon operations. +// Create one with Open() or OpenLibrary(). +type Paimon struct { + ctx context.Context + cancel context.CancelFunc +} + +// Open loads the embedded paimon-c shared library and returns a Paimon instance. +// The library is decompressed from the embedded binary on first call and +// cached for subsequent calls. +func Open() (*Paimon, error) { + if err := loadEmbeddedLib(); err != nil { + return nil, err + } + return OpenLibrary(libPath) +} + +// OpenLibrary loads a paimon-c shared library from an explicit filesystem path. +// Use this for development when working with a locally built library. +func OpenLibrary(path string) (*Paimon, error) { + ctx, cancel, err := newContext(path) + if err != nil { + return nil, err + } + return &Paimon{ctx: ctx, cancel: cancel}, nil +} + +// Close releases the shared library resources. +func (p *Paimon) Close() { + p.cancel() +} diff --git a/bindings/go/plan.go b/bindings/go/plan.go new file mode 100644 index 0000000..5182ae5 --- /dev/null +++ b/bindings/go/plan.go @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// Plan holds the scan result containing data splits to read. +type Plan struct { + ctx context.Context + inner *paimonPlan +} + +// Close releases the plan resources. +func (p *Plan) Close() { + ffiPlanFree.symbol(p.ctx)(p.inner) +} + +var ffiPlanFree = newFFI(ffiOpts{ + sym: "paimon_plan_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(plan *paimonPlan) { + return func(plan *paimonPlan) { + ffiCall( + nil, + unsafe.Pointer(&plan), + ) + } +}) diff --git a/bindings/go/read_builder.go b/bindings/go/read_builder.go new file mode 100644 index 0000000..8e5cea8 --- /dev/null +++ b/bindings/go/read_builder.go @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// ReadBuilder creates TableScan and TableRead instances. +type ReadBuilder struct { + ctx context.Context + inner *paimonReadBuilder +} + +// Close releases the read builder resources. +func (rb *ReadBuilder) Close() { + ffiReadBuilderFree.symbol(rb.ctx)(rb.inner) +} + +// NewScan creates a TableScan for planning which data files to read. +func (rb *ReadBuilder) NewScan() (*TableScan, error) { + createFn := ffiReadBuilderNewScan.symbol(rb.ctx) + inner, err := createFn(rb.inner) + if err != nil { + return nil, err + } + return &TableScan{ctx: rb.ctx, inner: inner}, nil +} + +// NewRead creates a TableRead for reading data from splits. +func (rb *ReadBuilder) NewRead() (*TableRead, error) { + createFn := ffiReadBuilderNewRead.symbol(rb.ctx) + inner, err := createFn(rb.inner) + if err != nil { + return nil, err + } + return &TableRead{ctx: rb.ctx, inner: inner}, nil +} + +var ffiReadBuilderFree = newFFI(ffiOpts{ + sym: "paimon_read_builder_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(rb *paimonReadBuilder) { + return func(rb *paimonReadBuilder) { + ffiCall( + nil, + unsafe.Pointer(&rb), + ) + } +}) + +var ffiReadBuilderNewScan = newFFI(ffiOpts{ + sym: "paimon_read_builder_new_scan", + rType: &typeResultTableScan, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(rb *paimonReadBuilder) (*paimonTableScan, error) { + return func(rb *paimonReadBuilder) (*paimonTableScan, error) { + var result resultTableScan + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&rb), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.scan, nil + } +}) + +var ffiReadBuilderNewRead = newFFI(ffiOpts{ + sym: "paimon_read_builder_new_read", + rType: &typeResultNewRead, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(rb *paimonReadBuilder) (*paimonTableRead, error) { + return func(rb *paimonReadBuilder) (*paimonTableRead, error) { + var result resultNewRead + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&rb), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.read, nil + } +}) diff --git a/bindings/go/table.go b/bindings/go/table.go new file mode 100644 index 0000000..df6eeef --- /dev/null +++ b/bindings/go/table.go @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// Table represents a paimon table. +type Table struct { + ctx context.Context + inner *paimonTable +} + +// Close releases the table resources. +func (t *Table) Close() { + ffiTableFree.symbol(t.ctx)(t.inner) +} + +// NewReadBuilder creates a ReadBuilder for this table. +func (t *Table) NewReadBuilder() *ReadBuilder { + createFn := ffiTableNewReadBuilder.symbol(t.ctx) + inner, err := createFn(t.inner) + if err != nil { + panic("paimon: NewReadBuilder called on invalid table: " + err.Error()) + } + return &ReadBuilder{ctx: t.ctx, inner: inner} +} + +var ffiTableFree = newFFI(ffiOpts{ + sym: "paimon_table_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(table *paimonTable) { + return func(table *paimonTable) { + ffiCall( + nil, + unsafe.Pointer(&table), + ) + } +}) + +var ffiTableNewReadBuilder = newFFI(ffiOpts{ + sym: "paimon_table_new_read_builder", + rType: &typeResultReadBuilder, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(table *paimonTable) (*paimonReadBuilder, error) { + return func(table *paimonTable) (*paimonReadBuilder, error) { + var result resultReadBuilder + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&table), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.readBuilder, nil + } +}) diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go new file mode 100644 index 0000000..f335328 --- /dev/null +++ b/bindings/go/table_read.go @@ -0,0 +1,171 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "runtime" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// TableRead reads data from a table given a plan of splits. +type TableRead struct { + ctx context.Context + inner *paimonTableRead +} + +// Close releases the table read resources. +func (tr *TableRead) Close() { + ffiTableReadFree.symbol(tr.ctx)(tr.inner) +} + +// RecordBatchReader iterates over Arrow record batches one at a time via +// the Arrow C Data Interface (zero-copy). Call Next to advance and Close +// when done. +// +// reader, _ := read.ToArrow(plan) +// defer reader.Close() +// for { +// batch, err := reader.Next() +// if batch == nil { break } +// record, _ := cdata.ImportCRecordBatch( +// (*cdata.CArrowArray)(batch.Array), +// (*cdata.CArrowSchema)(batch.Schema), +// ) +// // use record ... +// record.Release() +// } +type RecordBatchReader struct { + ctx context.Context + inner *paimonRecordBatchReader +} + +// Next returns the next ArrowBatch, or (nil, nil) when iteration is complete. +func (r *RecordBatchReader) Next() (*ArrowBatch, error) { + array, schema, err := ffiRecordBatchReaderNext.symbol(r.ctx)(r.inner) + if err != nil { + return nil, err + } + if array == nil && schema == nil { + return nil, nil + } + ab := &ArrowBatch{ctx: r.ctx, Array: array, Schema: schema} + runtime.SetFinalizer(ab, (*ArrowBatch).free) + return ab, nil +} + +// Close releases the underlying C record batch reader. +func (r *RecordBatchReader) Close() { + ffiRecordBatchReaderFree.symbol(r.ctx)(r.inner) +} + +// ToArrow creates a RecordBatchReader that lazily reads Arrow record batches +// from the given plan via the Arrow C Data Interface (zero-copy). +// +// The caller must call Close on the returned reader when done. +func (tr *TableRead) ToArrow(plan *Plan) (*RecordBatchReader, error) { + reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, plan.inner) + if err != nil { + return nil, err + } + return &RecordBatchReader{ctx: tr.ctx, inner: reader}, nil +} + +var ffiTableReadFree = newFFI(ffiOpts{ + sym: "paimon_table_read_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(read *paimonTableRead) { + return func(read *paimonTableRead) { + ffiCall( + nil, + unsafe.Pointer(&read), + ) + } +}) + +var ffiTableReadToArrow = newFFI(ffiOpts{ + sym: "paimon_table_read_to_arrow", + rType: &typeResultRecordBatchReader, + aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(read *paimonTableRead, plan *paimonPlan) (*paimonRecordBatchReader, error) { + return func(read *paimonTableRead, plan *paimonPlan) (*paimonRecordBatchReader, error) { + var result resultRecordBatchReader + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&read), + unsafe.Pointer(&plan), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.reader, nil + } +}) + +var ffiRecordBatchReaderNext = newFFI(ffiOpts{ + sym: "paimon_record_batch_reader_next", + rType: &typeResultNextBatch, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(reader *paimonRecordBatchReader) (unsafe.Pointer, unsafe.Pointer, error) { + return func(reader *paimonRecordBatchReader) (unsafe.Pointer, unsafe.Pointer, error) { + var result resultNextBatch + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&reader), + ) + if result.error != nil { + return nil, nil, parseError(ctx, result.error) + } + return result.array, result.schema, nil + } +}) + +var ffiRecordBatchReaderFree = newFFI(ffiOpts{ + sym: "paimon_record_batch_reader_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(reader *paimonRecordBatchReader) { + return func(reader *paimonRecordBatchReader) { + ffiCall( + nil, + unsafe.Pointer(&reader), + ) + } +}) + +var ffiArrowBatchFree = newFFI(ffiOpts{ + sym: "paimon_arrow_batch_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&typeArrowBatch}, +}, func(_ context.Context, ffiCall ffiCall) func(array unsafe.Pointer, schema unsafe.Pointer) { + return func(array unsafe.Pointer, schema unsafe.Pointer) { + batch := struct { + array unsafe.Pointer + schema unsafe.Pointer + }{array: array, schema: schema} + ffiCall( + nil, + unsafe.Pointer(&batch), + ) + } +}) diff --git a/bindings/go/table_scan.go b/bindings/go/table_scan.go new file mode 100644 index 0000000..c2f0155 --- /dev/null +++ b/bindings/go/table_scan.go @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// TableScan scans a table and produces a Plan containing data splits. +type TableScan struct { + ctx context.Context + inner *paimonTableScan +} + +// Close releases the table scan resources. +func (ts *TableScan) Close() { + ffiTableScanFree.symbol(ts.ctx)(ts.inner) +} + +// Plan executes the scan and returns a Plan containing data splits to read. +func (ts *TableScan) Plan() (*Plan, error) { + planFn := ffiTableScanPlan.symbol(ts.ctx) + inner, err := planFn(ts.inner) + if err != nil { + return nil, err + } + return &Plan{ctx: ts.ctx, inner: inner}, nil +} + +var ffiTableScanFree = newFFI(ffiOpts{ + sym: "paimon_table_scan_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(scan *paimonTableScan) { + return func(scan *paimonTableScan) { + ffiCall( + nil, + unsafe.Pointer(&scan), + ) + } +}) + +var ffiTableScanPlan = newFFI(ffiOpts{ + sym: "paimon_table_scan_plan", + rType: &typeResultPlan, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(scan *paimonTableScan) (*paimonPlan, error) { + return func(scan *paimonTableScan) (*paimonPlan, error) { + var result resultPlan + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&scan), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.plan, nil + } +}) diff --git a/bindings/go/tests/go.mod b/bindings/go/tests/go.mod new file mode 100644 index 0000000..a9b0380 --- /dev/null +++ b/bindings/go/tests/go.mod @@ -0,0 +1,27 @@ +module paimon_test + +go 1.22.4 + +require ( + github.com/apache/arrow-go/v18 v18.0.0 + github.com/apache/paimon-rust/bindings/go v0.0.0 +) + +require ( + github.com/ebitengine/purego v0.8.4 // indirect + github.com/goccy/go-json v0.10.3 // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/jupiterrider/ffi v0.5.0 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/tools v0.26.0 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect +) + +replace github.com/apache/paimon-rust/bindings/go => ../ diff --git a/bindings/go/tests/go.sum b/bindings/go/tests/go.sum new file mode 100644 index 0000000..cb52020 --- /dev/null +++ b/bindings/go/tests/go.sum @@ -0,0 +1,57 @@ +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/apache/arrow-go/v18 v18.0.0 h1:1dBDaSbH3LtulTyOVYaBCHO3yVRwjV+TZaqn3g6V7ZM= +github.com/apache/arrow-go/v18 v18.0.0/go.mod h1:t6+cWRSmKgdQ6HsxisQjok+jBpKGhRDiqcf3p0p/F+A= +github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= +github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= +github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jupiterrider/ffi v0.5.0 h1:j2nSgpabbV1JOwgP4Kn449sJUHq3cVLAZVBoOYn44V8= +github.com/jupiterrider/ffi v0.5.0/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/bindings/go/tests/paimon_test.go b/bindings/go/tests/paimon_test.go new file mode 100644 index 0000000..f944bf0 --- /dev/null +++ b/bindings/go/tests/paimon_test.go @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon_test + +import ( + "os" + "sort" + "testing" + + "github.com/apache/arrow-go/v18/arrow/array" + "github.com/apache/arrow-go/v18/arrow/cdata" + paimon "github.com/apache/paimon-rust/bindings/go" +) + +// TestReadLogTable reads the test table and verifies the data matches expected values. +// +// The table was populated by Docker provisioning with: +// +// (1, 'alice'), (2, 'bob'), (3, 'carol') +func TestReadLogTable(t *testing.T) { + warehouse := os.Getenv("PAIMON_TEST_WAREHOUSE") + if warehouse == "" { + warehouse = "/Users/yuxia/Projects/rust-projects/paimon-rust/dev/paimon-warehouse" + } + + if _, err := os.Stat(warehouse); os.IsNotExist(err) { + t.Skipf("Skipping: warehouse %s does not exist (run 'make docker-up' first)", warehouse) + } + + p, err := paimon.Open() + if err != nil { + t.Fatalf("Failed to open paimon: %v", err) + } + defer p.Close() + + catalog, err := p.NewFileSystemCatalog(warehouse) + if err != nil { + t.Fatalf("Failed to create catalog: %v", err) + } + defer catalog.Close() + + identifier, err := p.NewIdentifier("default", "simple_log_table") + if err != nil { + t.Fatalf("Failed to create identifier: %v", err) + } + defer identifier.Close() + + table, err := catalog.GetTable(identifier) + if err != nil { + t.Fatalf("Failed to get table: %v", err) + } + defer table.Close() + + readBuilder := table.NewReadBuilder() + defer readBuilder.Close() + + scan, err := readBuilder.NewScan() + if err != nil { + t.Fatalf("Failed to create scan: %v", err) + } + defer scan.Close() + + plan, err := scan.Plan() + if err != nil { + t.Fatalf("Failed to plan: %v", err) + } + defer plan.Close() + + read, err := readBuilder.NewRead() + if err != nil { + t.Fatalf("Failed to create read: %v", err) + } + defer read.Close() + + reader, err := read.ToArrow(plan) + if err != nil { + t.Fatalf("Failed to read arrow: %v", err) + } + defer reader.Close() + + // Import Arrow batches via C Data Interface and collect rows. + // Strings are copied before Release because arrow-go's String.Value() + // returns zero-copy references into the Arrow buffer. + type row struct { + id int32 + name string + } + var rows []row + batchIdx := 0 + for { + batch, err := reader.Next() + if err != nil { + t.Fatalf("Batch %d: failed to read next batch: %v", batchIdx, err) + } + if batch == nil { + break + } + + record, err := cdata.ImportCRecordBatch( + (*cdata.CArrowArray)(batch.Array), + (*cdata.CArrowSchema)(batch.Schema), + ) + if err != nil { + t.Fatalf("Batch %d: failed to import record batch: %v", batchIdx, err) + } + + idIdx := record.Schema().FieldIndices("id") + nameIdx := record.Schema().FieldIndices("name") + if len(idIdx) == 0 || len(nameIdx) == 0 { + t.Fatalf("Batch %d: missing expected columns (id, name) in schema: %s", batchIdx, record.Schema()) + } + + idCol := record.Column(idIdx[0]).(*array.Int32) + nameCol := record.Column(nameIdx[0]).(*array.String) + + for j := 0; j < int(record.NumRows()); j++ { + rows = append(rows, row{ + id: idCol.Value(j), + name: string([]byte(nameCol.Value(j))), + }) + } + record.Release() + batchIdx++ + } + + if len(rows) == 0 { + t.Fatal("Expected at least one row, got 0") + } + + sort.Slice(rows, func(i, j int) bool { + return rows[i].id < rows[j].id + }) + + expected := []row{ + {1, "alice"}, + {2, "bob"}, + {3, "carol"}, + } + + if len(rows) != len(expected) { + t.Fatalf("Expected %d rows, got %d: %v", len(expected), len(rows), rows) + } + + for i, exp := range expected { + if rows[i] != exp { + t.Errorf("Row %d: expected %v, got %v", i, exp, rows[i]) + } + } +} diff --git a/bindings/go/types.go b/bindings/go/types.go new file mode 100644 index 0000000..afe77a6 --- /dev/null +++ b/bindings/go/types.go @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "context" + "unsafe" + + "github.com/jupiterrider/ffi" +) + +// FFI type definitions mirroring C repr structs from paimon-c. +var ( + // paimon_bytes { data: *mut u8, len: usize } + typeBytes = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_error { code: i32, message: paimon_bytes } + typeError = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypeSint32, + &typeBytes, + nil, + }[0], + } + + // Result types: { value, *error } + // paimon_result_catalog_new { catalog: paimon_catalog, error: *paimon_error } + typeResultCatalogNew = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_get_table { table: paimon_table, error: *paimon_error } + typeResultGetTable = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_identifier_new { identifier: paimon_identifier, error: *paimon_error } + typeResultIdentifierNew = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_new_read { read: paimon_table_read, error: *paimon_error } + typeResultNewRead = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_read_builder { read_builder: paimon_read_builder, error: *paimon_error } + typeResultReadBuilder = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_table_scan { scan: paimon_table_scan, error: *paimon_error } + typeResultTableScan = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_plan { plan: paimon_plan, error: *paimon_error } + typeResultPlan = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_record_batch_reader { reader: *paimon_record_batch_reader, error: *paimon_error } + typeResultRecordBatchReader = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_arrow_batch { array: *c_void, schema: *c_void } + typeArrowBatch = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, + &ffi.TypePointer, + nil, + }[0], + } + + // paimon_result_next_batch { batch: paimon_arrow_batch, error: *paimon_error } + typeResultNextBatch = ffi.Type{ + Type: ffi.Struct, + Elements: &[]*ffi.Type{ + &ffi.TypePointer, // batch.array + &ffi.TypePointer, // batch.schema + &ffi.TypePointer, // error + nil, + }[0], + } +) + +// Go mirror structs for C types. + +type paimonBytes struct { + data *byte + len uintptr +} + +type paimonError struct { + code int32 + message paimonBytes +} + +// Opaque pointer wrappers +type paimonCatalog struct{} +type paimonIdentifier struct{} +type paimonTable struct{} +type paimonReadBuilder struct{} +type paimonTableScan struct{} +type paimonTableRead struct{} +type paimonPlan struct{} +type paimonRecordBatchReader struct{} + +// Result types matching the C repr structs +type resultCatalogNew struct { + catalog *paimonCatalog + error *paimonError +} + +type resultGetTable struct { + table *paimonTable + error *paimonError +} + +type resultIdentifierNew struct { + identifier *paimonIdentifier + error *paimonError +} + +type resultNewRead struct { + read *paimonTableRead + error *paimonError +} + +type resultReadBuilder struct { + readBuilder *paimonReadBuilder + error *paimonError +} + +type resultTableScan struct { + scan *paimonTableScan + error *paimonError +} + +type resultPlan struct { + plan *paimonPlan + error *paimonError +} + +type resultRecordBatchReader struct { + reader *paimonRecordBatchReader + error *paimonError +} + +// ArrowBatch holds a single Arrow record batch via the Arrow C Data Interface. +// Array and Schema are pointers to heap-allocated ArrowArray and ArrowSchema structs. +// The container structs are freed automatically by the GC. +type ArrowBatch struct { + ctx context.Context + Array unsafe.Pointer + Schema unsafe.Pointer +} + +func (b *ArrowBatch) free() { + ffiArrowBatchFree.symbol(b.ctx)(b.Array, b.Schema) +} + +type resultNextBatch struct { + array unsafe.Pointer + schema unsafe.Pointer + error *paimonError +} + +func parseBytes(b paimonBytes) []byte { + if b.len == 0 { + return nil + } + data := make([]byte, b.len) + copy(data, unsafe.Slice(b.data, b.len)) + return data +} diff --git a/bindings/go/util_unix.go b/bindings/go/util_unix.go new file mode 100644 index 0000000..abc074b --- /dev/null +++ b/bindings/go/util_unix.go @@ -0,0 +1,52 @@ +//go:build !windows + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package paimon + +import ( + "github.com/ebitengine/purego" + "golang.org/x/sys/unix" +) + +func BytePtrFromString(s string) (*byte, error) { + if s == "" { + return new(byte), nil + } + return unix.BytePtrFromString(s) +} + +func LoadLibrary(path string) (uintptr, error) { + return purego.Dlopen(path, purego.RTLD_LAZY|purego.RTLD_GLOBAL) +} + +func FreeLibrary(handle uintptr) error { + if handle == 0 { + return nil + } + return purego.Dlclose(handle) +} + +func GetProcAddress(handle uintptr, name string) (uintptr, error) { + if handle == 0 { + return 0, nil + } + return purego.Dlsym(handle, name) +} From 2a2428c009c709eb246f79fa36bc0623032fa5c6 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sat, 14 Mar 2026 17:12:21 +0800 Subject: [PATCH 2/6] refine code --- bindings/go/catalog.go | 8 +++- bindings/go/ffi.go | 33 +++++++++++++--- bindings/go/go.mod | 22 +++++++++-- bindings/go/go.sum | 65 ++++++++++++++++++++++++++++---- bindings/go/identifier.go | 5 ++- bindings/go/paimon.go | 18 ++++++--- bindings/go/plan.go | 2 + bindings/go/read_builder.go | 8 +++- bindings/go/table.go | 9 +++-- bindings/go/table_read.go | 58 +++++++++++++++++++--------- bindings/go/table_scan.go | 5 ++- bindings/go/tests/go.mod | 4 +- bindings/go/tests/go.sum | 8 ++-- bindings/go/tests/paimon_test.go | 25 ++++++------ bindings/go/types.go | 25 +++++++----- 15 files changed, 216 insertions(+), 79 deletions(-) diff --git a/bindings/go/catalog.go b/bindings/go/catalog.go index 8da8e6a..51514fd 100644 --- a/bindings/go/catalog.go +++ b/bindings/go/catalog.go @@ -29,6 +29,7 @@ import ( // Catalog wraps a paimon FileSystemCatalog. type Catalog struct { ctx context.Context + lib *libRef inner *paimonCatalog } @@ -39,12 +40,14 @@ func (p *Paimon) NewFileSystemCatalog(warehouse string) (*Catalog, error) { if err != nil { return nil, err } - return &Catalog{ctx: p.ctx, inner: inner}, nil + p.lib.acquire() + return &Catalog{ctx: p.ctx, lib: p.lib, inner: inner}, nil } // Close releases the catalog resources. func (c *Catalog) Close() { ffiCatalogFree.symbol(c.ctx)(c.inner) + c.lib.release() } // GetTable retrieves a table from the catalog using the given identifier. @@ -54,7 +57,8 @@ func (c *Catalog) GetTable(id *Identifier) (*Table, error) { if err != nil { return nil, err } - return &Table{ctx: c.ctx, inner: inner}, nil + c.lib.acquire() + return &Table{ctx: c.ctx, lib: c.lib, inner: inner}, nil } var ffiCatalogNew = newFFI(ffiOpts{ diff --git a/bindings/go/ffi.go b/bindings/go/ffi.go index dbcdeb8..e37b0a5 100644 --- a/bindings/go/ffi.go +++ b/bindings/go/ffi.go @@ -22,11 +22,34 @@ package paimon import ( "context" "errors" + "sync/atomic" "unsafe" "github.com/jupiterrider/ffi" ) +// libRef is an atomic reference counter for the loaded shared library. +// Every object that may call FFI (including during Close/Release) must +// hold a reference. The library is freed only when the count drops to zero. +type libRef struct { + count atomic.Int32 + lib uintptr +} + +func newLibRef(lib uintptr) *libRef { + r := &libRef{lib: lib} + r.count.Store(1) + return r +} + +func (r *libRef) acquire() { r.count.Add(1) } + +func (r *libRef) release() { + if r.count.Add(-1) == 0 { + _ = FreeLibrary(r.lib) + } +} + type ffiOpts struct { sym contextKey rType *ffi.Type @@ -85,20 +108,18 @@ func (f *FFI[T]) withFFI(ctx context.Context, lib uintptr) (context.Context, err var withFFIs []contextWithFFI -func newContext(path string) (ctx context.Context, cancel context.CancelFunc, err error) { - lib, err := LoadLibrary(path) +func newContext(path string) (ctx context.Context, lib *libRef, err error) { + handle, err := LoadLibrary(path) if err != nil { return } ctx = context.Background() for _, withFFI := range withFFIs { - ctx, err = withFFI(ctx, lib) + ctx, err = withFFI(ctx, handle) if err != nil { return } } - cancel = func() { - _ = FreeLibrary(lib) - } + lib = newLibRef(handle) return } diff --git a/bindings/go/go.mod b/bindings/go/go.mod index ddeeed9..31cc85a 100644 --- a/bindings/go/go.mod +++ b/bindings/go/go.mod @@ -3,8 +3,22 @@ module github.com/apache/paimon-rust/bindings/go go 1.22.4 require ( - github.com/ebitengine/purego v0.8.4 - github.com/jupiterrider/ffi v0.5.0 - github.com/klauspost/compress v1.17.9 - golang.org/x/sys v0.24.0 + github.com/apache/arrow-go/v18 v18.0.0 + github.com/ebitengine/purego v0.10.0 + github.com/jupiterrider/ffi v0.6.0 + github.com/klauspost/compress v1.17.11 + golang.org/x/sys v0.26.0 +) + +require ( + github.com/goccy/go-json v0.10.3 // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/tools v0.26.0 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect ) diff --git a/bindings/go/go.sum b/bindings/go/go.sum index a3373ec..e605378 100644 --- a/bindings/go/go.sum +++ b/bindings/go/go.sum @@ -1,8 +1,57 @@ -github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= -github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= -github.com/jupiterrider/ffi v0.5.0 h1:j2nSgpabbV1JOwgP4Kn449sJUHq3cVLAZVBoOYn44V8= -github.com/jupiterrider/ffi v0.5.0/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= -golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= -golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/apache/arrow-go/v18 v18.0.0 h1:1dBDaSbH3LtulTyOVYaBCHO3yVRwjV+TZaqn3g6V7ZM= +github.com/apache/arrow-go/v18 v18.0.0/go.mod h1:t6+cWRSmKgdQ6HsxisQjok+jBpKGhRDiqcf3p0p/F+A= +github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= +github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jupiterrider/ffi v0.6.0 h1:UX378KcZvH5c8qgLi9KL/bL82SZTHdRspZ+jj7bvBng= +github.com/jupiterrider/ffi v0.6.0/go.mod h1:PqZ5Go6X9by8CIXgfprxfMPYmn8oT5m2O7AA56s64bY= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/bindings/go/identifier.go b/bindings/go/identifier.go index 8018b12..d5ef1bfb 100644 --- a/bindings/go/identifier.go +++ b/bindings/go/identifier.go @@ -29,6 +29,7 @@ import ( // Identifier identifies a table by database and object name. type Identifier struct { ctx context.Context + lib *libRef inner *paimonIdentifier } @@ -39,12 +40,14 @@ func (p *Paimon) NewIdentifier(database, object string) (*Identifier, error) { if err != nil { return nil, err } - return &Identifier{ctx: p.ctx, inner: inner}, nil + p.lib.acquire() + return &Identifier{ctx: p.ctx, lib: p.lib, inner: inner}, nil } // Close releases the identifier resources. func (id *Identifier) Close() { ffiIdentifierFree.symbol(id.ctx)(id.inner) + id.lib.release() } var ffiIdentifierNew = newFFI(ffiOpts{ diff --git a/bindings/go/paimon.go b/bindings/go/paimon.go index 4caa133..1c34742 100644 --- a/bindings/go/paimon.go +++ b/bindings/go/paimon.go @@ -41,9 +41,14 @@ import ( // Paimon is the entry point for all paimon operations. // Create one with Open() or OpenLibrary(). +// +// Paimon must outlive all objects derived from it (Catalog, Table, etc.), +// or those objects must be closed first. The underlying shared library is +// reference-counted and will not be unloaded until all derived objects +// are closed. type Paimon struct { - ctx context.Context - cancel context.CancelFunc + ctx context.Context + lib *libRef } // Open loads the embedded paimon-c shared library and returns a Paimon instance. @@ -59,14 +64,15 @@ func Open() (*Paimon, error) { // OpenLibrary loads a paimon-c shared library from an explicit filesystem path. // Use this for development when working with a locally built library. func OpenLibrary(path string) (*Paimon, error) { - ctx, cancel, err := newContext(path) + ctx, lib, err := newContext(path) if err != nil { return nil, err } - return &Paimon{ctx: ctx, cancel: cancel}, nil + return &Paimon{ctx: ctx, lib: lib}, nil } -// Close releases the shared library resources. +// Close releases this Paimon instance's reference to the shared library. +// The library is unloaded once all derived objects are also closed. func (p *Paimon) Close() { - p.cancel() + p.lib.release() } diff --git a/bindings/go/plan.go b/bindings/go/plan.go index 5182ae5..986f67d 100644 --- a/bindings/go/plan.go +++ b/bindings/go/plan.go @@ -29,12 +29,14 @@ import ( // Plan holds the scan result containing data splits to read. type Plan struct { ctx context.Context + lib *libRef inner *paimonPlan } // Close releases the plan resources. func (p *Plan) Close() { ffiPlanFree.symbol(p.ctx)(p.inner) + p.lib.release() } var ffiPlanFree = newFFI(ffiOpts{ diff --git a/bindings/go/read_builder.go b/bindings/go/read_builder.go index 8e5cea8..4bc6582 100644 --- a/bindings/go/read_builder.go +++ b/bindings/go/read_builder.go @@ -29,12 +29,14 @@ import ( // ReadBuilder creates TableScan and TableRead instances. type ReadBuilder struct { ctx context.Context + lib *libRef inner *paimonReadBuilder } // Close releases the read builder resources. func (rb *ReadBuilder) Close() { ffiReadBuilderFree.symbol(rb.ctx)(rb.inner) + rb.lib.release() } // NewScan creates a TableScan for planning which data files to read. @@ -44,7 +46,8 @@ func (rb *ReadBuilder) NewScan() (*TableScan, error) { if err != nil { return nil, err } - return &TableScan{ctx: rb.ctx, inner: inner}, nil + rb.lib.acquire() + return &TableScan{ctx: rb.ctx, lib: rb.lib, inner: inner}, nil } // NewRead creates a TableRead for reading data from splits. @@ -54,7 +57,8 @@ func (rb *ReadBuilder) NewRead() (*TableRead, error) { if err != nil { return nil, err } - return &TableRead{ctx: rb.ctx, inner: inner}, nil + rb.lib.acquire() + return &TableRead{ctx: rb.ctx, lib: rb.lib, inner: inner}, nil } var ffiReadBuilderFree = newFFI(ffiOpts{ diff --git a/bindings/go/table.go b/bindings/go/table.go index df6eeef..b6d49c5 100644 --- a/bindings/go/table.go +++ b/bindings/go/table.go @@ -29,22 +29,25 @@ import ( // Table represents a paimon table. type Table struct { ctx context.Context + lib *libRef inner *paimonTable } // Close releases the table resources. func (t *Table) Close() { ffiTableFree.symbol(t.ctx)(t.inner) + t.lib.release() } // NewReadBuilder creates a ReadBuilder for this table. -func (t *Table) NewReadBuilder() *ReadBuilder { +func (t *Table) NewReadBuilder() (*ReadBuilder, error) { createFn := ffiTableNewReadBuilder.symbol(t.ctx) inner, err := createFn(t.inner) if err != nil { - panic("paimon: NewReadBuilder called on invalid table: " + err.Error()) + return nil, err } - return &ReadBuilder{ctx: t.ctx, inner: inner} + t.lib.acquire() + return &ReadBuilder{ctx: t.ctx, lib: t.lib, inner: inner}, nil } var ffiTableFree = newFFI(ffiOpts{ diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go index f335328..e6ff1a9 100644 --- a/bindings/go/table_read.go +++ b/bindings/go/table_read.go @@ -21,73 +21,97 @@ package paimon import ( "context" + "io" "runtime" "unsafe" + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/cdata" "github.com/jupiterrider/ffi" ) // TableRead reads data from a table given a plan of splits. type TableRead struct { ctx context.Context + lib *libRef inner *paimonTableRead } // Close releases the table read resources. func (tr *TableRead) Close() { ffiTableReadFree.symbol(tr.ctx)(tr.inner) + tr.lib.release() } // RecordBatchReader iterates over Arrow record batches one at a time via -// the Arrow C Data Interface (zero-copy). Call Next to advance and Close -// when done. +// the Arrow C Data Interface (zero-copy). Call NextRecord to advance and +// Close when done. // -// reader, _ := read.ToArrow(plan) +// reader, _ := read.ToRecordBatchReader(plan) // defer reader.Close() // for { -// batch, err := reader.Next() -// if batch == nil { break } -// record, _ := cdata.ImportCRecordBatch( -// (*cdata.CArrowArray)(batch.Array), -// (*cdata.CArrowSchema)(batch.Schema), -// ) +// record, err := reader.NextRecord() +// if err != nil { break } // io.EOF at end // // use record ... // record.Release() // } type RecordBatchReader struct { ctx context.Context + lib *libRef inner *paimonRecordBatchReader } -// Next returns the next ArrowBatch, or (nil, nil) when iteration is complete. -func (r *RecordBatchReader) Next() (*ArrowBatch, error) { +// NextRecord returns the next Arrow record, or io.EOF when iteration is +// complete. The underlying C batch is imported via the Arrow C Data Interface +// and released automatically — the caller only needs to call Release on the +// returned arrow.Record when done. +func (r *RecordBatchReader) NextRecord() (arrow.Record, error) { + batch, err := r.next() + if err != nil { + return nil, err + } + record, err := cdata.ImportCRecordBatch( + (*cdata.CArrowArray)(batch.array), + (*cdata.CArrowSchema)(batch.schema), + ) + batch.release() + if err != nil { + return nil, err + } + return record, nil +} + +func (r *RecordBatchReader) next() (*arrowBatch, error) { array, schema, err := ffiRecordBatchReaderNext.symbol(r.ctx)(r.inner) if err != nil { return nil, err } if array == nil && schema == nil { - return nil, nil + return nil, io.EOF } - ab := &ArrowBatch{ctx: r.ctx, Array: array, Schema: schema} - runtime.SetFinalizer(ab, (*ArrowBatch).free) + r.lib.acquire() + ab := &arrowBatch{ctx: r.ctx, lib: r.lib, array: array, schema: schema} + runtime.SetFinalizer(ab, (*arrowBatch).release) return ab, nil } // Close releases the underlying C record batch reader. func (r *RecordBatchReader) Close() { ffiRecordBatchReaderFree.symbol(r.ctx)(r.inner) + r.lib.release() } -// ToArrow creates a RecordBatchReader that lazily reads Arrow record batches +// ToRecordBatchReader creates a RecordBatchReader that lazily reads Arrow record batches // from the given plan via the Arrow C Data Interface (zero-copy). // // The caller must call Close on the returned reader when done. -func (tr *TableRead) ToArrow(plan *Plan) (*RecordBatchReader, error) { +func (tr *TableRead) ToRecordBatchReader(plan *Plan) (*RecordBatchReader, error) { reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, plan.inner) if err != nil { return nil, err } - return &RecordBatchReader{ctx: tr.ctx, inner: reader}, nil + tr.lib.acquire() + return &RecordBatchReader{ctx: tr.ctx, lib: tr.lib, inner: reader}, nil } var ffiTableReadFree = newFFI(ffiOpts{ diff --git a/bindings/go/table_scan.go b/bindings/go/table_scan.go index c2f0155..96e7750 100644 --- a/bindings/go/table_scan.go +++ b/bindings/go/table_scan.go @@ -29,12 +29,14 @@ import ( // TableScan scans a table and produces a Plan containing data splits. type TableScan struct { ctx context.Context + lib *libRef inner *paimonTableScan } // Close releases the table scan resources. func (ts *TableScan) Close() { ffiTableScanFree.symbol(ts.ctx)(ts.inner) + ts.lib.release() } // Plan executes the scan and returns a Plan containing data splits to read. @@ -44,7 +46,8 @@ func (ts *TableScan) Plan() (*Plan, error) { if err != nil { return nil, err } - return &Plan{ctx: ts.ctx, inner: inner}, nil + ts.lib.acquire() + return &Plan{ctx: ts.ctx, lib: ts.lib, inner: inner}, nil } var ffiTableScanFree = newFFI(ffiOpts{ diff --git a/bindings/go/tests/go.mod b/bindings/go/tests/go.mod index a9b0380..d2d340a 100644 --- a/bindings/go/tests/go.mod +++ b/bindings/go/tests/go.mod @@ -8,10 +8,10 @@ require ( ) require ( - github.com/ebitengine/purego v0.8.4 // indirect + github.com/ebitengine/purego v0.10.0 // indirect github.com/goccy/go-json v0.10.3 // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect - github.com/jupiterrider/ffi v0.5.0 // indirect + github.com/jupiterrider/ffi v0.6.0 // indirect github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect diff --git a/bindings/go/tests/go.sum b/bindings/go/tests/go.sum index cb52020..e605378 100644 --- a/bindings/go/tests/go.sum +++ b/bindings/go/tests/go.sum @@ -6,8 +6,8 @@ github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/ebitengine/purego v0.8.4 h1:CF7LEKg5FFOsASUj0+QwaXf8Ht6TlFxg09+S9wz0omw= -github.com/ebitengine/purego v0.8.4/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= +github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= @@ -16,8 +16,8 @@ github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81A github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/jupiterrider/ffi v0.5.0 h1:j2nSgpabbV1JOwgP4Kn449sJUHq3cVLAZVBoOYn44V8= -github.com/jupiterrider/ffi v0.5.0/go.mod h1:x7xdNKo8h0AmLuXfswDUBxUsd2OqUP4ekC8sCnsmbvo= +github.com/jupiterrider/ffi v0.6.0 h1:UX378KcZvH5c8qgLi9KL/bL82SZTHdRspZ+jj7bvBng= +github.com/jupiterrider/ffi v0.6.0/go.mod h1:PqZ5Go6X9by8CIXgfprxfMPYmn8oT5m2O7AA56s64bY= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= diff --git a/bindings/go/tests/paimon_test.go b/bindings/go/tests/paimon_test.go index f944bf0..c8025ac 100644 --- a/bindings/go/tests/paimon_test.go +++ b/bindings/go/tests/paimon_test.go @@ -20,12 +20,13 @@ package paimon_test import ( + "errors" + "io" "os" "sort" "testing" "github.com/apache/arrow-go/v18/arrow/array" - "github.com/apache/arrow-go/v18/arrow/cdata" paimon "github.com/apache/paimon-rust/bindings/go" ) @@ -68,7 +69,10 @@ func TestReadLogTable(t *testing.T) { } defer table.Close() - readBuilder := table.NewReadBuilder() + readBuilder, err := table.NewReadBuilder() + if err != nil { + t.Fatalf("Failed to create read builder: %v", err) + } defer readBuilder.Close() scan, err := readBuilder.NewScan() @@ -89,7 +93,7 @@ func TestReadLogTable(t *testing.T) { } defer read.Close() - reader, err := read.ToArrow(plan) + reader, err := read.ToRecordBatchReader(plan) if err != nil { t.Fatalf("Failed to read arrow: %v", err) } @@ -105,25 +109,18 @@ func TestReadLogTable(t *testing.T) { var rows []row batchIdx := 0 for { - batch, err := reader.Next() - if err != nil { - t.Fatalf("Batch %d: failed to read next batch: %v", batchIdx, err) - } - if batch == nil { + record, err := reader.NextRecord() + if errors.Is(err, io.EOF) { break } - - record, err := cdata.ImportCRecordBatch( - (*cdata.CArrowArray)(batch.Array), - (*cdata.CArrowSchema)(batch.Schema), - ) if err != nil { - t.Fatalf("Batch %d: failed to import record batch: %v", batchIdx, err) + t.Fatalf("Batch %d: failed to read next record: %v", batchIdx, err) } idIdx := record.Schema().FieldIndices("id") nameIdx := record.Schema().FieldIndices("name") if len(idIdx) == 0 || len(nameIdx) == 0 { + record.Release() t.Fatalf("Batch %d: missing expected columns (id, name) in schema: %s", batchIdx, record.Schema()) } diff --git a/bindings/go/types.go b/bindings/go/types.go index afe77a6..04ce696 100644 --- a/bindings/go/types.go +++ b/bindings/go/types.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "runtime" "unsafe" "github.com/jupiterrider/ffi" @@ -214,17 +215,23 @@ type resultRecordBatchReader struct { error *paimonError } -// ArrowBatch holds a single Arrow record batch via the Arrow C Data Interface. -// Array and Schema are pointers to heap-allocated ArrowArray and ArrowSchema structs. -// The container structs are freed automatically by the GC. -type ArrowBatch struct { - ctx context.Context - Array unsafe.Pointer - Schema unsafe.Pointer +// arrowBatch holds a single Arrow record batch via the Arrow C Data Interface. +type arrowBatch struct { + ctx context.Context + lib *libRef + array unsafe.Pointer + schema unsafe.Pointer + released bool } -func (b *ArrowBatch) free() { - ffiArrowBatchFree.symbol(b.ctx)(b.Array, b.Schema) +func (b *arrowBatch) release() { + if b.released { + return + } + b.released = true + runtime.SetFinalizer(b, nil) + ffiArrowBatchFree.symbol(b.ctx)(b.array, b.schema) + b.lib.release() } type resultNextBatch struct { From 676345861c0366edf5a28a09441eb0d6106a37a3 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sat, 14 Mar 2026 17:59:00 +0800 Subject: [PATCH 3/6] refine code again --- bindings/c/src/table.rs | 25 ++++++++++- bindings/go/Makefile | 2 +- bindings/go/catalog.go | 16 ++++--- bindings/go/identifier.go | 16 ++++--- bindings/go/paimon.go | 20 ++++++--- bindings/go/plan.go | 53 ++++++++++++++++++++--- bindings/go/read_builder.go | 16 ++++--- bindings/go/table.go | 16 ++++--- bindings/go/table_read.go | 72 ++++++++++++++++++++++++-------- bindings/go/table_scan.go | 16 ++++--- bindings/go/tests/paimon_test.go | 2 +- 11 files changed, 191 insertions(+), 63 deletions(-) diff --git a/bindings/c/src/table.rs b/bindings/c/src/table.rs index 7f645c9..7892757 100644 --- a/bindings/c/src/table.rs +++ b/bindings/c/src/table.rs @@ -205,6 +205,19 @@ pub unsafe extern "C" fn paimon_plan_free(plan: *mut paimon_plan) { } } +/// Return the number of data splits in a plan. +/// +/// # Safety +/// `plan` must be a valid pointer from `paimon_table_scan_plan`, or null (returns 0). +#[no_mangle] +pub unsafe extern "C" fn paimon_plan_num_splits(plan: *const paimon_plan) -> usize { + if plan.is_null() { + return 0; + } + let plan_ref = &*((*plan).inner as *const Plan); + plan_ref.splits().len() +} + // ======================= TableRead =============================== /// Free a paimon_table_read. @@ -222,12 +235,18 @@ pub unsafe extern "C" fn paimon_table_read_free(read: *mut paimon_table_read) { /// via `paimon_record_batch_reader_next`. This avoids loading all batches /// into memory at once. /// +/// `offset` and `length` select a contiguous sub-range of splits from the +/// plan. The range is clamped to the available splits (out-of-range values +/// are silently adjusted). +/// /// # Safety /// `read` and `plan` must be valid pointers from previous paimon C calls, or null (returns error). #[no_mangle] pub unsafe extern "C" fn paimon_table_read_to_arrow( read: *const paimon_table_read, plan: *const paimon_plan, + offset: usize, + length: usize, ) -> paimon_result_record_batch_reader { if let Err(e) = check_non_null(read, "read") { return paimon_result_record_batch_reader { @@ -244,10 +263,14 @@ pub unsafe extern "C" fn paimon_table_read_to_arrow( let table = &*((*read).inner as *const Table); let plan_ref = &*((*plan).inner as *const Plan); + let all_splits = plan_ref.splits(); + let start = offset.min(all_splits.len()); + let end = (offset.saturating_add(length)).min(all_splits.len()); + let selected = &all_splits[start..end]; let rb = table.new_read_builder(); match rb.new_read() { - Ok(table_read) => match table_read.to_arrow(plan_ref.splits()) { + Ok(table_read) => match table_read.to_arrow(selected) { Ok(stream) => { let reader = Box::new(stream); let wrapper = Box::new(paimon_record_batch_reader { diff --git a/bindings/go/Makefile b/bindings/go/Makefile index b60762a..82c2b11 100644 --- a/bindings/go/Makefile +++ b/bindings/go/Makefile @@ -50,7 +50,7 @@ build: # Run Go integration tests. # Requires test data: run 'make docker-up' from the repo root first. -# CGO is needed by arrow-go's cdata package used in tests. +# CGO is needed by arrow-go's cdata package (imported by the main paimon package). PAIMON_TEST_WAREHOUSE ?= /tmp/paimon-warehouse test: build diff --git a/bindings/go/catalog.go b/bindings/go/catalog.go index 51514fd..2adbf4a 100644 --- a/bindings/go/catalog.go +++ b/bindings/go/catalog.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,9 +29,10 @@ import ( // Catalog wraps a paimon FileSystemCatalog. type Catalog struct { - ctx context.Context - lib *libRef - inner *paimonCatalog + ctx context.Context + lib *libRef + inner *paimonCatalog + closeOnce sync.Once } // NewFileSystemCatalog creates a new FileSystemCatalog for the given warehouse path. @@ -44,10 +46,12 @@ func (p *Paimon) NewFileSystemCatalog(warehouse string) (*Catalog, error) { return &Catalog{ctx: p.ctx, lib: p.lib, inner: inner}, nil } -// Close releases the catalog resources. +// Close releases the catalog resources. Safe to call multiple times. func (c *Catalog) Close() { - ffiCatalogFree.symbol(c.ctx)(c.inner) - c.lib.release() + c.closeOnce.Do(func() { + ffiCatalogFree.symbol(c.ctx)(c.inner) + c.lib.release() + }) } // GetTable retrieves a table from the catalog using the given identifier. diff --git a/bindings/go/identifier.go b/bindings/go/identifier.go index d5ef1bfb..98c6bc8 100644 --- a/bindings/go/identifier.go +++ b/bindings/go/identifier.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,9 +29,10 @@ import ( // Identifier identifies a table by database and object name. type Identifier struct { - ctx context.Context - lib *libRef - inner *paimonIdentifier + ctx context.Context + lib *libRef + inner *paimonIdentifier + closeOnce sync.Once } // NewIdentifier creates a new Identifier with the given database and object name. @@ -44,10 +46,12 @@ func (p *Paimon) NewIdentifier(database, object string) (*Identifier, error) { return &Identifier{ctx: p.ctx, lib: p.lib, inner: inner}, nil } -// Close releases the identifier resources. +// Close releases the identifier resources. Safe to call multiple times. func (id *Identifier) Close() { - ffiIdentifierFree.symbol(id.ctx)(id.inner) - id.lib.release() + id.closeOnce.Do(func() { + ffiIdentifierFree.symbol(id.ctx)(id.inner) + id.lib.release() + }) } var ffiIdentifierNew = newFFI(ffiOpts{ diff --git a/bindings/go/paimon.go b/bindings/go/paimon.go index 1c34742..199ae2b 100644 --- a/bindings/go/paimon.go +++ b/bindings/go/paimon.go @@ -19,9 +19,12 @@ // Package paimon provides a Go binding for Apache Paimon Rust. // -// This binding uses purego and libffi to call into the paimon-c shared library -// without requiring CGO. The pre-built shared library is embedded in the -// package and automatically loaded at runtime — no manual build step needed. +// This binding uses purego and libffi to call into the paimon-c shared library. +// The pre-built shared library is embedded in the package and automatically +// loaded at runtime — no manual build step needed. +// +// Note: reading Arrow record batches (via RecordBatchReader.NextRecord) +// requires CGO because it imports the arrow-go cdata package. // // Basic usage: // @@ -37,6 +40,7 @@ package paimon import ( "context" + "sync" ) // Paimon is the entry point for all paimon operations. @@ -47,8 +51,9 @@ import ( // reference-counted and will not be unloaded until all derived objects // are closed. type Paimon struct { - ctx context.Context - lib *libRef + ctx context.Context + lib *libRef + closeOnce sync.Once } // Open loads the embedded paimon-c shared library and returns a Paimon instance. @@ -73,6 +78,9 @@ func OpenLibrary(path string) (*Paimon, error) { // Close releases this Paimon instance's reference to the shared library. // The library is unloaded once all derived objects are also closed. +// Close is safe to call multiple times. func (p *Paimon) Close() { - p.lib.release() + p.closeOnce.Do(func() { + p.lib.release() + }) } diff --git a/bindings/go/plan.go b/bindings/go/plan.go index 986f67d..8ee37a2 100644 --- a/bindings/go/plan.go +++ b/bindings/go/plan.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,15 +29,40 @@ import ( // Plan holds the scan result containing data splits to read. type Plan struct { - ctx context.Context - lib *libRef - inner *paimonPlan + ctx context.Context + lib *libRef + inner *paimonPlan + closeOnce sync.Once } -// Close releases the plan resources. +// Close releases the plan resources. Safe to call multiple times. func (p *Plan) Close() { - ffiPlanFree.symbol(p.ctx)(p.inner) - p.lib.release() + p.closeOnce.Do(func() { + ffiPlanFree.symbol(p.ctx)(p.inner) + p.lib.release() + }) +} + +// DataSplit references a single data split inside a Plan. +// The parent Plan must remain open while the DataSplit is in use. +type DataSplit struct { + plan *Plan + index int +} + +// NumSplits returns the number of data splits in the plan. +func (p *Plan) NumSplits() int { + return ffiPlanNumSplits.symbol(p.ctx)(p.inner) +} + +// Splits returns all data splits in the plan. +func (p *Plan) Splits() []DataSplit { + n := p.NumSplits() + splits := make([]DataSplit, n) + for i := range splits { + splits[i] = DataSplit{plan: p, index: i} + } + return splits } var ffiPlanFree = newFFI(ffiOpts{ @@ -51,3 +77,18 @@ var ffiPlanFree = newFFI(ffiOpts{ ) } }) + +var ffiPlanNumSplits = newFFI(ffiOpts{ + sym: "paimon_plan_num_splits", + rType: &ffi.TypePointer, // usize == pointer-sized on 64-bit + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(plan *paimonPlan) int { + return func(plan *paimonPlan) int { + var count uintptr + ffiCall( + unsafe.Pointer(&count), + unsafe.Pointer(&plan), + ) + return int(count) + } +}) diff --git a/bindings/go/read_builder.go b/bindings/go/read_builder.go index 4bc6582..a956325 100644 --- a/bindings/go/read_builder.go +++ b/bindings/go/read_builder.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,15 +29,18 @@ import ( // ReadBuilder creates TableScan and TableRead instances. type ReadBuilder struct { - ctx context.Context - lib *libRef - inner *paimonReadBuilder + ctx context.Context + lib *libRef + inner *paimonReadBuilder + closeOnce sync.Once } -// Close releases the read builder resources. +// Close releases the read builder resources. Safe to call multiple times. func (rb *ReadBuilder) Close() { - ffiReadBuilderFree.symbol(rb.ctx)(rb.inner) - rb.lib.release() + rb.closeOnce.Do(func() { + ffiReadBuilderFree.symbol(rb.ctx)(rb.inner) + rb.lib.release() + }) } // NewScan creates a TableScan for planning which data files to read. diff --git a/bindings/go/table.go b/bindings/go/table.go index b6d49c5..6f99b32 100644 --- a/bindings/go/table.go +++ b/bindings/go/table.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,15 +29,18 @@ import ( // Table represents a paimon table. type Table struct { - ctx context.Context - lib *libRef - inner *paimonTable + ctx context.Context + lib *libRef + inner *paimonTable + closeOnce sync.Once } -// Close releases the table resources. +// Close releases the table resources. Safe to call multiple times. func (t *Table) Close() { - ffiTableFree.symbol(t.ctx)(t.inner) - t.lib.release() + t.closeOnce.Do(func() { + ffiTableFree.symbol(t.ctx)(t.inner) + t.lib.release() + }) } // NewReadBuilder creates a ReadBuilder for this table. diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go index e6ff1a9..f762c87 100644 --- a/bindings/go/table_read.go +++ b/bindings/go/table_read.go @@ -21,8 +21,10 @@ package paimon import ( "context" + "errors" "io" "runtime" + "sync" "unsafe" "github.com/apache/arrow-go/v18/arrow" @@ -32,15 +34,18 @@ import ( // TableRead reads data from a table given a plan of splits. type TableRead struct { - ctx context.Context - lib *libRef - inner *paimonTableRead + ctx context.Context + lib *libRef + inner *paimonTableRead + closeOnce sync.Once } -// Close releases the table read resources. +// Close releases the table read resources. Safe to call multiple times. func (tr *TableRead) Close() { - ffiTableReadFree.symbol(tr.ctx)(tr.inner) - tr.lib.release() + tr.closeOnce.Do(func() { + ffiTableReadFree.symbol(tr.ctx)(tr.inner) + tr.lib.release() + }) } // RecordBatchReader iterates over Arrow record batches one at a time via @@ -56,9 +61,10 @@ func (tr *TableRead) Close() { // record.Release() // } type RecordBatchReader struct { - ctx context.Context - lib *libRef - inner *paimonRecordBatchReader + ctx context.Context + lib *libRef + inner *paimonRecordBatchReader + closeOnce sync.Once } // NextRecord returns the next Arrow record, or io.EOF when iteration is @@ -95,18 +101,46 @@ func (r *RecordBatchReader) next() (*arrowBatch, error) { return ab, nil } -// Close releases the underlying C record batch reader. +// Close releases the underlying C record batch reader. Safe to call multiple times. func (r *RecordBatchReader) Close() { - ffiRecordBatchReaderFree.symbol(r.ctx)(r.inner) - r.lib.release() + r.closeOnce.Do(func() { + ffiRecordBatchReaderFree.symbol(r.ctx)(r.inner) + r.lib.release() + }) } // ToRecordBatchReader creates a RecordBatchReader that lazily reads Arrow record batches -// from the given plan via the Arrow C Data Interface (zero-copy). +// from the given splits via the Arrow C Data Interface (zero-copy). +// +// All splits must originate from the same Plan and form a contiguous index +// range (as returned by Plan.Splits or a contiguous sub-slice of it). +// The parent Plan must remain open while the reader is in use. // // The caller must call Close on the returned reader when done. -func (tr *TableRead) ToRecordBatchReader(plan *Plan) (*RecordBatchReader, error) { - reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, plan.inner) +func (tr *TableRead) ToRecordBatchReader(splits []DataSplit) (*RecordBatchReader, error) { + if len(splits) == 0 { + return nil, errors.New("paimon: no splits provided") + } + plan := splits[0].plan + minIdx := splits[0].index + maxIdx := splits[0].index + for _, s := range splits[1:] { + if s.plan != plan { + return nil, errors.New("paimon: all splits must be from the same plan") + } + if s.index < minIdx { + minIdx = s.index + } + if s.index > maxIdx { + maxIdx = s.index + } + } + if maxIdx-minIdx+1 != len(splits) { + return nil, errors.New("paimon: splits must be contiguous") + } + offset := uintptr(minIdx) + length := uintptr(len(splits)) + reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, plan.inner, offset, length) if err != nil { return nil, err } @@ -130,14 +164,16 @@ var ffiTableReadFree = newFFI(ffiOpts{ var ffiTableReadToArrow = newFFI(ffiOpts{ sym: "paimon_table_read_to_arrow", rType: &typeResultRecordBatchReader, - aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, -}, func(ctx context.Context, ffiCall ffiCall) func(read *paimonTableRead, plan *paimonPlan) (*paimonRecordBatchReader, error) { - return func(read *paimonTableRead, plan *paimonPlan) (*paimonRecordBatchReader, error) { + aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer, &ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(read *paimonTableRead, plan *paimonPlan, offset uintptr, length uintptr) (*paimonRecordBatchReader, error) { + return func(read *paimonTableRead, plan *paimonPlan, offset uintptr, length uintptr) (*paimonRecordBatchReader, error) { var result resultRecordBatchReader ffiCall( unsafe.Pointer(&result), unsafe.Pointer(&read), unsafe.Pointer(&plan), + unsafe.Pointer(&offset), + unsafe.Pointer(&length), ) if result.error != nil { return nil, parseError(ctx, result.error) diff --git a/bindings/go/table_scan.go b/bindings/go/table_scan.go index 96e7750..88622da 100644 --- a/bindings/go/table_scan.go +++ b/bindings/go/table_scan.go @@ -21,6 +21,7 @@ package paimon import ( "context" + "sync" "unsafe" "github.com/jupiterrider/ffi" @@ -28,15 +29,18 @@ import ( // TableScan scans a table and produces a Plan containing data splits. type TableScan struct { - ctx context.Context - lib *libRef - inner *paimonTableScan + ctx context.Context + lib *libRef + inner *paimonTableScan + closeOnce sync.Once } -// Close releases the table scan resources. +// Close releases the table scan resources. Safe to call multiple times. func (ts *TableScan) Close() { - ffiTableScanFree.symbol(ts.ctx)(ts.inner) - ts.lib.release() + ts.closeOnce.Do(func() { + ffiTableScanFree.symbol(ts.ctx)(ts.inner) + ts.lib.release() + }) } // Plan executes the scan and returns a Plan containing data splits to read. diff --git a/bindings/go/tests/paimon_test.go b/bindings/go/tests/paimon_test.go index c8025ac..ed2efca 100644 --- a/bindings/go/tests/paimon_test.go +++ b/bindings/go/tests/paimon_test.go @@ -93,7 +93,7 @@ func TestReadLogTable(t *testing.T) { } defer read.Close() - reader, err := read.ToRecordBatchReader(plan) + reader, err := read.ToRecordBatchReader(plan.Splits()) if err != nil { t.Fatalf("Failed to read arrow: %v", err) } From 3f43cd18d54ca982596fc838eecdea905c689c49 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sat, 14 Mar 2026 18:12:35 +0800 Subject: [PATCH 4/6] refine code again again --- bindings/go/paimon.go | 4 +-- bindings/go/plan.go | 65 +++++++++++++++++++++++++++++++++------ bindings/go/table_read.go | 22 +++++++------ bindings/go/table_scan.go | 2 +- 4 files changed, 70 insertions(+), 23 deletions(-) diff --git a/bindings/go/paimon.go b/bindings/go/paimon.go index 199ae2b..c273347 100644 --- a/bindings/go/paimon.go +++ b/bindings/go/paimon.go @@ -23,8 +23,8 @@ // The pre-built shared library is embedded in the package and automatically // loaded at runtime — no manual build step needed. // -// Note: reading Arrow record batches (via RecordBatchReader.NextRecord) -// requires CGO because it imports the arrow-go cdata package. +// This package requires CGO because it imports the arrow-go cdata package +// for Arrow C Data Interface support. // // Basic usage: // diff --git a/bindings/go/plan.go b/bindings/go/plan.go index 8ee37a2..7e523f1 100644 --- a/bindings/go/plan.go +++ b/bindings/go/plan.go @@ -21,46 +21,91 @@ package paimon import ( "context" + "runtime" "sync" + "sync/atomic" "unsafe" "github.com/jupiterrider/ffi" ) +// planHandle wraps the C plan pointer with reference counting. +// The C plan is freed when the last reference is released. +type planHandle struct { + ctx context.Context + lib *libRef + inner *paimonPlan + refs atomic.Int32 +} + +func newPlanHandle(ctx context.Context, lib *libRef, inner *paimonPlan) *planHandle { + h := &planHandle{ctx: ctx, lib: lib, inner: inner} + h.refs.Store(1) // initial ref for Plan + return h +} + +func (h *planHandle) acquire() { h.refs.Add(1) } + +func (h *planHandle) release() { + if h.refs.Add(-1) == 0 { + ffiPlanFree.symbol(h.ctx)(h.inner) + h.lib.release() + } +} + +// splitSet ties DataSplit values to a planHandle via a GC finalizer. +// When all DataSplits (and the slice backing them) become unreachable, +// the GC collects the splitSet and its finalizer releases the planHandle ref. +type splitSet struct { + handle *planHandle +} + +func newSplitSet(h *planHandle) *splitSet { + h.acquire() + s := &splitSet{handle: h} + runtime.SetFinalizer(s, (*splitSet).release) + return s +} + +func (s *splitSet) release() { + runtime.SetFinalizer(s, nil) + s.handle.release() +} + // Plan holds the scan result containing data splits to read. type Plan struct { - ctx context.Context - lib *libRef - inner *paimonPlan + handle *planHandle closeOnce sync.Once } // Close releases the plan resources. Safe to call multiple times. +// DataSplits obtained from Splits() remain valid after Close. func (p *Plan) Close() { p.closeOnce.Do(func() { - ffiPlanFree.symbol(p.ctx)(p.inner) - p.lib.release() + p.handle.release() }) } -// DataSplit references a single data split inside a Plan. -// The parent Plan must remain open while the DataSplit is in use. +// DataSplit identifies a single data split within a Plan. +// DataSplits keep the underlying plan data alive even after Plan.Close() +// is called, so they are safe to use independently. type DataSplit struct { - plan *Plan + set *splitSet index int } // NumSplits returns the number of data splits in the plan. func (p *Plan) NumSplits() int { - return ffiPlanNumSplits.symbol(p.ctx)(p.inner) + return ffiPlanNumSplits.symbol(p.handle.ctx)(p.handle.inner) } // Splits returns all data splits in the plan. func (p *Plan) Splits() []DataSplit { n := p.NumSplits() + set := newSplitSet(p.handle) splits := make([]DataSplit, n) for i := range splits { - splits[i] = DataSplit{plan: p, index: i} + splits[i] = DataSplit{set: set, index: i} } return splits } diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go index f762c87..690dbd8 100644 --- a/bindings/go/table_read.go +++ b/bindings/go/table_read.go @@ -52,7 +52,7 @@ func (tr *TableRead) Close() { // the Arrow C Data Interface (zero-copy). Call NextRecord to advance and // Close when done. // -// reader, _ := read.ToRecordBatchReader(plan) +// reader, _ := read.ToRecordBatchReader(plan.Splits()) // defer reader.Close() // for { // record, err := reader.NextRecord() @@ -109,23 +109,25 @@ func (r *RecordBatchReader) Close() { }) } -// ToRecordBatchReader creates a RecordBatchReader that lazily reads Arrow record batches -// from the given splits via the Arrow C Data Interface (zero-copy). +// ToRecordBatchReader creates a RecordBatchReader that lazily reads Arrow +// record batches from the given splits via the Arrow C Data Interface +// (zero-copy). // -// All splits must originate from the same Plan and form a contiguous index -// range (as returned by Plan.Splits or a contiguous sub-slice of it). -// The parent Plan must remain open while the reader is in use. +// The splits select a contiguous index range within a plan. Order of +// elements in the slice does not matter; the range is determined by the +// minimum and maximum indices. Duplicate or non-contiguous indices return +// an error. All splits must originate from the same Plan. // // The caller must call Close on the returned reader when done. func (tr *TableRead) ToRecordBatchReader(splits []DataSplit) (*RecordBatchReader, error) { if len(splits) == 0 { return nil, errors.New("paimon: no splits provided") } - plan := splits[0].plan + handle := splits[0].set.handle minIdx := splits[0].index maxIdx := splits[0].index for _, s := range splits[1:] { - if s.plan != plan { + if s.set.handle != handle { return nil, errors.New("paimon: all splits must be from the same plan") } if s.index < minIdx { @@ -136,11 +138,11 @@ func (tr *TableRead) ToRecordBatchReader(splits []DataSplit) (*RecordBatchReader } } if maxIdx-minIdx+1 != len(splits) { - return nil, errors.New("paimon: splits must be contiguous") + return nil, errors.New("paimon: splits must be contiguous (no gaps or duplicates)") } offset := uintptr(minIdx) length := uintptr(len(splits)) - reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, plan.inner, offset, length) + reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, handle.inner, offset, length) if err != nil { return nil, err } diff --git a/bindings/go/table_scan.go b/bindings/go/table_scan.go index 88622da..f8e922f 100644 --- a/bindings/go/table_scan.go +++ b/bindings/go/table_scan.go @@ -51,7 +51,7 @@ func (ts *TableScan) Plan() (*Plan, error) { return nil, err } ts.lib.acquire() - return &Plan{ctx: ts.ctx, lib: ts.lib, inner: inner}, nil + return &Plan{handle: newPlanHandle(ts.ctx, ts.lib, inner)}, nil } var ffiTableScanFree = newFFI(ffiOpts{ From c5f412458975b3eb0a86b9864a44067497e2b934 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sat, 14 Mar 2026 19:15:02 +0800 Subject: [PATCH 5/6] refine api --- bindings/go/catalog.go | 65 ++++++++++++-- bindings/go/ffi.go | 6 +- bindings/go/identifier.go | 72 +--------------- bindings/go/paimon.go | 65 +++++--------- bindings/go/plan.go | 70 +++++++-------- bindings/go/table_read.go | 142 +++++++++++++++++-------------- bindings/go/tests/paimon_test.go | 37 ++++---- bindings/go/util_unix.go | 8 +- 8 files changed, 217 insertions(+), 248 deletions(-) diff --git a/bindings/go/catalog.go b/bindings/go/catalog.go index 2adbf4a..fbe708a 100644 --- a/bindings/go/catalog.go +++ b/bindings/go/catalog.go @@ -36,14 +36,18 @@ type Catalog struct { } // NewFileSystemCatalog creates a new FileSystemCatalog for the given warehouse path. -func (p *Paimon) NewFileSystemCatalog(warehouse string) (*Catalog, error) { - createFn := ffiCatalogNew.symbol(p.ctx) +func NewFileSystemCatalog(warehouse string) (*Catalog, error) { + ctx, lib, err := ensureLoaded() + if err != nil { + return nil, err + } + createFn := ffiCatalogNew.symbol(ctx) inner, err := createFn(warehouse) if err != nil { return nil, err } - p.lib.acquire() - return &Catalog{ctx: p.ctx, lib: p.lib, inner: inner}, nil + lib.acquire() + return &Catalog{ctx: ctx, lib: lib, inner: inner}, nil } // Close releases the catalog resources. Safe to call multiple times. @@ -55,9 +59,16 @@ func (c *Catalog) Close() { } // GetTable retrieves a table from the catalog using the given identifier. -func (c *Catalog) GetTable(id *Identifier) (*Table, error) { +func (c *Catalog) GetTable(id Identifier) (*Table, error) { + createIdFn := ffiIdentifierNew.symbol(c.ctx) + cID, err := createIdFn(id.database, id.object) + if err != nil { + return nil, err + } + defer ffiIdentifierFree.symbol(c.ctx)(cID) + getFn := ffiCatalogGetTable.symbol(c.ctx) - inner, err := getFn(c.inner, id.inner) + inner, err := getFn(c.inner, cID) if err != nil { return nil, err } @@ -71,7 +82,7 @@ var ffiCatalogNew = newFFI(ffiOpts{ aTypes: []*ffi.Type{&ffi.TypePointer}, }, func(ctx context.Context, ffiCall ffiCall) func(warehouse string) (*paimonCatalog, error) { return func(warehouse string) (*paimonCatalog, error) { - byteWarehouse, err := BytePtrFromString(warehouse) + byteWarehouse, err := bytePtrFromString(warehouse) if err != nil { return nil, err } @@ -100,6 +111,46 @@ var ffiCatalogFree = newFFI(ffiOpts{ } }) +var ffiIdentifierNew = newFFI(ffiOpts{ + sym: "paimon_identifier_new", + rType: &typeResultIdentifierNew, + aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, +}, func(ctx context.Context, ffiCall ffiCall) func(database, object string) (*paimonIdentifier, error) { + return func(database, object string) (*paimonIdentifier, error) { + byteDB, err := bytePtrFromString(database) + if err != nil { + return nil, err + } + byteObj, err := bytePtrFromString(object) + if err != nil { + return nil, err + } + var result resultIdentifierNew + ffiCall( + unsafe.Pointer(&result), + unsafe.Pointer(&byteDB), + unsafe.Pointer(&byteObj), + ) + if result.error != nil { + return nil, parseError(ctx, result.error) + } + return result.identifier, nil + } +}) + +var ffiIdentifierFree = newFFI(ffiOpts{ + sym: "paimon_identifier_free", + rType: &ffi.TypeVoid, + aTypes: []*ffi.Type{&ffi.TypePointer}, +}, func(_ context.Context, ffiCall ffiCall) func(id *paimonIdentifier) { + return func(id *paimonIdentifier) { + ffiCall( + nil, + unsafe.Pointer(&id), + ) + } +}) + var ffiCatalogGetTable = newFFI(ffiOpts{ sym: "paimon_catalog_get_table", rType: &typeResultGetTable, diff --git a/bindings/go/ffi.go b/bindings/go/ffi.go index e37b0a5..317980c 100644 --- a/bindings/go/ffi.go +++ b/bindings/go/ffi.go @@ -46,7 +46,7 @@ func (r *libRef) acquire() { r.count.Add(1) } func (r *libRef) release() { if r.count.Add(-1) == 0 { - _ = FreeLibrary(r.lib) + _ = freeLibrary(r.lib) } } @@ -96,7 +96,7 @@ func (f *FFI[T]) withFFI(ctx context.Context, lib uintptr) (context.Context, err ); status != ffi.OK { return nil, errors.New(status.String()) } - fn, err := GetProcAddress(lib, f.opts.sym.String()) + fn, err := getProcAddress(lib, f.opts.sym.String()) if err != nil { return nil, err } @@ -109,7 +109,7 @@ func (f *FFI[T]) withFFI(ctx context.Context, lib uintptr) (context.Context, err var withFFIs []contextWithFFI func newContext(path string) (ctx context.Context, lib *libRef, err error) { - handle, err := LoadLibrary(path) + handle, err := loadLibrary(path) if err != nil { return } diff --git a/bindings/go/identifier.go b/bindings/go/identifier.go index 98c6bc8..3ed647d 100644 --- a/bindings/go/identifier.go +++ b/bindings/go/identifier.go @@ -19,77 +19,13 @@ package paimon -import ( - "context" - "sync" - "unsafe" - - "github.com/jupiterrider/ffi" -) - // Identifier identifies a table by database and object name. type Identifier struct { - ctx context.Context - lib *libRef - inner *paimonIdentifier - closeOnce sync.Once + database string + object string } // NewIdentifier creates a new Identifier with the given database and object name. -func (p *Paimon) NewIdentifier(database, object string) (*Identifier, error) { - createFn := ffiIdentifierNew.symbol(p.ctx) - inner, err := createFn(database, object) - if err != nil { - return nil, err - } - p.lib.acquire() - return &Identifier{ctx: p.ctx, lib: p.lib, inner: inner}, nil -} - -// Close releases the identifier resources. Safe to call multiple times. -func (id *Identifier) Close() { - id.closeOnce.Do(func() { - ffiIdentifierFree.symbol(id.ctx)(id.inner) - id.lib.release() - }) +func NewIdentifier(database, object string) Identifier { + return Identifier{database: database, object: object} } - -var ffiIdentifierNew = newFFI(ffiOpts{ - sym: "paimon_identifier_new", - rType: &typeResultIdentifierNew, - aTypes: []*ffi.Type{&ffi.TypePointer, &ffi.TypePointer}, -}, func(ctx context.Context, ffiCall ffiCall) func(database, object string) (*paimonIdentifier, error) { - return func(database, object string) (*paimonIdentifier, error) { - byteDB, err := BytePtrFromString(database) - if err != nil { - return nil, err - } - byteObj, err := BytePtrFromString(object) - if err != nil { - return nil, err - } - var result resultIdentifierNew - ffiCall( - unsafe.Pointer(&result), - unsafe.Pointer(&byteDB), - unsafe.Pointer(&byteObj), - ) - if result.error != nil { - return nil, parseError(ctx, result.error) - } - return result.identifier, nil - } -}) - -var ffiIdentifierFree = newFFI(ffiOpts{ - sym: "paimon_identifier_free", - rType: &ffi.TypeVoid, - aTypes: []*ffi.Type{&ffi.TypePointer}, -}, func(_ context.Context, ffiCall ffiCall) func(id *paimonIdentifier) { - return func(id *paimonIdentifier) { - ffiCall( - nil, - unsafe.Pointer(&id), - ) - } -}) diff --git a/bindings/go/paimon.go b/bindings/go/paimon.go index c273347..ac9ac27 100644 --- a/bindings/go/paimon.go +++ b/bindings/go/paimon.go @@ -28,14 +28,13 @@ // // Basic usage: // -// p, err := paimon.Open() -// if err != nil { -// log.Fatal(err) -// } -// defer p.Close() +// catalog, err := paimon.NewFileSystemCatalog("/path/to/warehouse") +// if err != nil { log.Fatal(err) } +// defer catalog.Close() // -// catalog, err := p.NewFileSystemCatalog("/path/to/warehouse") -// ... +// table, err := catalog.GetTable(paimon.NewIdentifier("default", "my_table")) +// if err != nil { log.Fatal(err) } +// defer table.Close() package paimon import ( @@ -43,44 +42,20 @@ import ( "sync" ) -// Paimon is the entry point for all paimon operations. -// Create one with Open() or OpenLibrary(). -// -// Paimon must outlive all objects derived from it (Catalog, Table, etc.), -// or those objects must be closed first. The underlying shared library is -// reference-counted and will not be unloaded until all derived objects -// are closed. -type Paimon struct { - ctx context.Context - lib *libRef - closeOnce sync.Once -} - -// Open loads the embedded paimon-c shared library and returns a Paimon instance. -// The library is decompressed from the embedded binary on first call and -// cached for subsequent calls. -func Open() (*Paimon, error) { - if err := loadEmbeddedLib(); err != nil { - return nil, err - } - return OpenLibrary(libPath) -} - -// OpenLibrary loads a paimon-c shared library from an explicit filesystem path. -// Use this for development when working with a locally built library. -func OpenLibrary(path string) (*Paimon, error) { - ctx, lib, err := newContext(path) - if err != nil { - return nil, err - } - return &Paimon{ctx: ctx, lib: lib}, nil -} +var ( + globalOnce sync.Once + globalCtx context.Context + globalLib *libRef + globalErr error +) -// Close releases this Paimon instance's reference to the shared library. -// The library is unloaded once all derived objects are also closed. -// Close is safe to call multiple times. -func (p *Paimon) Close() { - p.closeOnce.Do(func() { - p.lib.release() +func ensureLoaded() (context.Context, *libRef, error) { + globalOnce.Do(func() { + if err := loadEmbeddedLib(); err != nil { + globalErr = err + return + } + globalCtx, globalLib, globalErr = newContext(libPath) }) + return globalCtx, globalLib, globalErr } diff --git a/bindings/go/plan.go b/bindings/go/plan.go index 7e523f1..a183921 100644 --- a/bindings/go/plan.go +++ b/bindings/go/plan.go @@ -29,6 +29,38 @@ import ( "github.com/jupiterrider/ffi" ) +// Plan holds the result of a table scan, containing data splits to read. +type Plan struct { + handle *planHandle + closeOnce sync.Once +} + +// Close releases the plan resources. Safe to call multiple times. +// DataSplits obtained from Splits() remain valid after Close. +func (p *Plan) Close() { + p.closeOnce.Do(func() { + p.handle.release() + }) +} + +// NumSplits returns the number of data splits in this plan. +func (p *Plan) NumSplits() int { + return ffiPlanNumSplits.symbol(p.handle.ctx)(p.handle.inner) +} + +// Splits returns all data splits in this plan. The returned DataSplits +// keep the underlying plan data alive via GC-attached reference counting, +// so they remain valid even after Plan.Close() is called. +func (p *Plan) Splits() []DataSplit { + n := p.NumSplits() + set := newSplitSet(p.handle) + splits := make([]DataSplit, n) + for i := 0; i < n; i++ { + splits[i] = DataSplit{set: set, index: i} + } + return splits +} + // planHandle wraps the C plan pointer with reference counting. // The C plan is freed when the last reference is released. type planHandle struct { @@ -40,7 +72,7 @@ type planHandle struct { func newPlanHandle(ctx context.Context, lib *libRef, inner *paimonPlan) *planHandle { h := &planHandle{ctx: ctx, lib: lib, inner: inner} - h.refs.Store(1) // initial ref for Plan + h.refs.Store(1) // initial ref for the creator return h } @@ -72,44 +104,14 @@ func (s *splitSet) release() { s.handle.release() } -// Plan holds the scan result containing data splits to read. -type Plan struct { - handle *planHandle - closeOnce sync.Once -} - -// Close releases the plan resources. Safe to call multiple times. -// DataSplits obtained from Splits() remain valid after Close. -func (p *Plan) Close() { - p.closeOnce.Do(func() { - p.handle.release() - }) -} - -// DataSplit identifies a single data split within a Plan. -// DataSplits keep the underlying plan data alive even after Plan.Close() -// is called, so they are safe to use independently. +// DataSplit identifies a single data split within a plan. +// DataSplits keep the underlying plan data alive via GC-attached +// reference counting, so they are safe to use independently. type DataSplit struct { set *splitSet index int } -// NumSplits returns the number of data splits in the plan. -func (p *Plan) NumSplits() int { - return ffiPlanNumSplits.symbol(p.handle.ctx)(p.handle.inner) -} - -// Splits returns all data splits in the plan. -func (p *Plan) Splits() []DataSplit { - n := p.NumSplits() - set := newSplitSet(p.handle) - splits := make([]DataSplit, n) - for i := range splits { - splits[i] = DataSplit{set: set, index: i} - } - return splits -} - var ffiPlanFree = newFFI(ffiOpts{ sym: "paimon_plan_free", rType: &ffi.TypeVoid, diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go index 690dbd8..afa1667 100644 --- a/bindings/go/table_read.go +++ b/bindings/go/table_read.go @@ -24,6 +24,7 @@ import ( "errors" "io" "runtime" + "sort" "sync" "unsafe" @@ -32,7 +33,7 @@ import ( "github.com/jupiterrider/ffi" ) -// TableRead reads data from a table given a plan of splits. +// TableRead reads data from splits produced by a TableScan. type TableRead struct { ctx context.Context lib *libRef @@ -48,22 +49,66 @@ func (tr *TableRead) Close() { }) } +// NewRecordBatchReader creates a RecordBatchReader that iterates over Arrow +// record batches for the given data splits. The splits can be non-contiguous +// and in any order. All splits must originate from the same Plan. +func (tr *TableRead) NewRecordBatchReader(splits []DataSplit) (*RecordBatchReader, error) { + if len(splits) == 0 { + return nil, errors.New("paimon: splits must not be empty") + } + + // All splits must share the same plan handle. + handle := splits[0].set.handle + indices := make([]int, len(splits)) + for i, s := range splits { + if s.set.handle != handle { + return nil, errors.New("paimon: all splits must originate from the same Plan") + } + indices[i] = s.index + } + sort.Ints(indices) + + // Group sorted indices into contiguous ranges. + type spanRange struct{ offset, length int } + ranges := []spanRange{{offset: indices[0], length: 1}} + for _, idx := range indices[1:] { + last := &ranges[len(ranges)-1] + if idx == last.offset+last.length { + last.length++ + } else { + ranges = append(ranges, spanRange{offset: idx, length: 1}) + } + } + + // Create one C reader per contiguous range. + createFn := ffiTableReadToArrow.symbol(tr.ctx) + readers := make([]*paimonRecordBatchReader, 0, len(ranges)) + for _, r := range ranges { + inner, err := createFn(tr.inner, handle.inner, uintptr(r.offset), uintptr(r.length)) + if err != nil { + // Free already-created readers on error. + freeFn := ffiRecordBatchReaderFree.symbol(tr.ctx) + for _, rd := range readers { + freeFn(rd) + tr.lib.release() + } + return nil, err + } + tr.lib.acquire() + readers = append(readers, inner) + } + + return &RecordBatchReader{ctx: tr.ctx, lib: tr.lib, readers: readers}, nil +} + // RecordBatchReader iterates over Arrow record batches one at a time via // the Arrow C Data Interface (zero-copy). Call NextRecord to advance and // Close when done. -// -// reader, _ := read.ToRecordBatchReader(plan.Splits()) -// defer reader.Close() -// for { -// record, err := reader.NextRecord() -// if err != nil { break } // io.EOF at end -// // use record ... -// record.Release() -// } type RecordBatchReader struct { ctx context.Context lib *libRef - inner *paimonRecordBatchReader + readers []*paimonRecordBatchReader + current int closeOnce sync.Once } @@ -88,66 +133,33 @@ func (r *RecordBatchReader) NextRecord() (arrow.Record, error) { } func (r *RecordBatchReader) next() (*arrowBatch, error) { - array, schema, err := ffiRecordBatchReaderNext.symbol(r.ctx)(r.inner) - if err != nil { - return nil, err - } - if array == nil && schema == nil { - return nil, io.EOF + nextFn := ffiRecordBatchReaderNext.symbol(r.ctx) + for r.current < len(r.readers) { + array, schema, err := nextFn(r.readers[r.current]) + if err != nil { + return nil, err + } + if array == nil && schema == nil { + r.current++ + continue + } + r.lib.acquire() + ab := &arrowBatch{ctx: r.ctx, lib: r.lib, array: array, schema: schema} + runtime.SetFinalizer(ab, (*arrowBatch).release) + return ab, nil } - r.lib.acquire() - ab := &arrowBatch{ctx: r.ctx, lib: r.lib, array: array, schema: schema} - runtime.SetFinalizer(ab, (*arrowBatch).release) - return ab, nil + return nil, io.EOF } -// Close releases the underlying C record batch reader. Safe to call multiple times. +// Close releases the underlying C record batch readers. Safe to call multiple times. func (r *RecordBatchReader) Close() { r.closeOnce.Do(func() { - ffiRecordBatchReaderFree.symbol(r.ctx)(r.inner) - r.lib.release() - }) -} - -// ToRecordBatchReader creates a RecordBatchReader that lazily reads Arrow -// record batches from the given splits via the Arrow C Data Interface -// (zero-copy). -// -// The splits select a contiguous index range within a plan. Order of -// elements in the slice does not matter; the range is determined by the -// minimum and maximum indices. Duplicate or non-contiguous indices return -// an error. All splits must originate from the same Plan. -// -// The caller must call Close on the returned reader when done. -func (tr *TableRead) ToRecordBatchReader(splits []DataSplit) (*RecordBatchReader, error) { - if len(splits) == 0 { - return nil, errors.New("paimon: no splits provided") - } - handle := splits[0].set.handle - minIdx := splits[0].index - maxIdx := splits[0].index - for _, s := range splits[1:] { - if s.set.handle != handle { - return nil, errors.New("paimon: all splits must be from the same plan") - } - if s.index < minIdx { - minIdx = s.index - } - if s.index > maxIdx { - maxIdx = s.index + freeFn := ffiRecordBatchReaderFree.symbol(r.ctx) + for _, rd := range r.readers { + freeFn(rd) + r.lib.release() } - } - if maxIdx-minIdx+1 != len(splits) { - return nil, errors.New("paimon: splits must be contiguous (no gaps or duplicates)") - } - offset := uintptr(minIdx) - length := uintptr(len(splits)) - reader, err := ffiTableReadToArrow.symbol(tr.ctx)(tr.inner, handle.inner, offset, length) - if err != nil { - return nil, err - } - tr.lib.acquire() - return &RecordBatchReader{ctx: tr.ctx, lib: tr.lib, inner: reader}, nil + }) } var ffiTableReadFree = newFFI(ffiOpts{ diff --git a/bindings/go/tests/paimon_test.go b/bindings/go/tests/paimon_test.go index ed2efca..28e3fc2 100644 --- a/bindings/go/tests/paimon_test.go +++ b/bindings/go/tests/paimon_test.go @@ -38,44 +38,32 @@ import ( func TestReadLogTable(t *testing.T) { warehouse := os.Getenv("PAIMON_TEST_WAREHOUSE") if warehouse == "" { - warehouse = "/Users/yuxia/Projects/rust-projects/paimon-rust/dev/paimon-warehouse" + warehouse = "/tmp/paimon-warehouse" } if _, err := os.Stat(warehouse); os.IsNotExist(err) { t.Skipf("Skipping: warehouse %s does not exist (run 'make docker-up' first)", warehouse) } - p, err := paimon.Open() - if err != nil { - t.Fatalf("Failed to open paimon: %v", err) - } - defer p.Close() - - catalog, err := p.NewFileSystemCatalog(warehouse) + catalog, err := paimon.NewFileSystemCatalog(warehouse) if err != nil { t.Fatalf("Failed to create catalog: %v", err) } defer catalog.Close() - identifier, err := p.NewIdentifier("default", "simple_log_table") - if err != nil { - t.Fatalf("Failed to create identifier: %v", err) - } - defer identifier.Close() - - table, err := catalog.GetTable(identifier) + table, err := catalog.GetTable(paimon.NewIdentifier("default", "simple_log_table")) if err != nil { t.Fatalf("Failed to get table: %v", err) } defer table.Close() - readBuilder, err := table.NewReadBuilder() + rb, err := table.NewReadBuilder() if err != nil { t.Fatalf("Failed to create read builder: %v", err) } - defer readBuilder.Close() + defer rb.Close() - scan, err := readBuilder.NewScan() + scan, err := rb.NewScan() if err != nil { t.Fatalf("Failed to create scan: %v", err) } @@ -87,15 +75,20 @@ func TestReadLogTable(t *testing.T) { } defer plan.Close() - read, err := readBuilder.NewRead() + splits := plan.Splits() + if len(splits) == 0 { + t.Fatal("Expected at least one split") + } + + read, err := rb.NewRead() if err != nil { - t.Fatalf("Failed to create read: %v", err) + t.Fatalf("Failed to create table read: %v", err) } defer read.Close() - reader, err := read.ToRecordBatchReader(plan.Splits()) + reader, err := read.NewRecordBatchReader(splits) if err != nil { - t.Fatalf("Failed to read arrow: %v", err) + t.Fatalf("Failed to create record batch reader: %v", err) } defer reader.Close() diff --git a/bindings/go/util_unix.go b/bindings/go/util_unix.go index abc074b..16b97dc 100644 --- a/bindings/go/util_unix.go +++ b/bindings/go/util_unix.go @@ -26,25 +26,25 @@ import ( "golang.org/x/sys/unix" ) -func BytePtrFromString(s string) (*byte, error) { +func bytePtrFromString(s string) (*byte, error) { if s == "" { return new(byte), nil } return unix.BytePtrFromString(s) } -func LoadLibrary(path string) (uintptr, error) { +func loadLibrary(path string) (uintptr, error) { return purego.Dlopen(path, purego.RTLD_LAZY|purego.RTLD_GLOBAL) } -func FreeLibrary(handle uintptr) error { +func freeLibrary(handle uintptr) error { if handle == 0 { return nil } return purego.Dlclose(handle) } -func GetProcAddress(handle uintptr, name string) (uintptr, error) { +func getProcAddress(handle uintptr, name string) (uintptr, error) { if handle == 0 { return 0, nil } From 46a0607c18ce8670258eca7e4e85ba977d9d97b1 Mon Sep 17 00:00:00 2001 From: luoyuxia Date: Sun, 15 Mar 2026 10:20:44 +0800 Subject: [PATCH 6/6] fix codex review comment --- bindings/go/catalog.go | 4 ++++ bindings/go/error.go | 4 ++++ bindings/go/ffi.go | 7 +++++++ bindings/go/lib.go | 9 +++++++-- bindings/go/plan.go | 7 +++++++ bindings/go/read_builder.go | 7 +++++++ bindings/go/table.go | 4 ++++ bindings/go/table_read.go | 8 ++++++++ bindings/go/table_scan.go | 4 ++++ bindings/go/types.go | 20 -------------------- bindings/go/util_unix.go | 2 +- 11 files changed, 53 insertions(+), 23 deletions(-) diff --git a/bindings/go/catalog.go b/bindings/go/catalog.go index fbe708a..b82035d 100644 --- a/bindings/go/catalog.go +++ b/bindings/go/catalog.go @@ -54,12 +54,16 @@ func NewFileSystemCatalog(warehouse string) (*Catalog, error) { func (c *Catalog) Close() { c.closeOnce.Do(func() { ffiCatalogFree.symbol(c.ctx)(c.inner) + c.inner = nil c.lib.release() }) } // GetTable retrieves a table from the catalog using the given identifier. func (c *Catalog) GetTable(id Identifier) (*Table, error) { + if c.inner == nil { + return nil, ErrClosed + } createIdFn := ffiIdentifierNew.symbol(c.ctx) cID, err := createIdFn(id.database, id.object) if err != nil { diff --git a/bindings/go/error.go b/bindings/go/error.go index 710ed83..feb6ee9 100644 --- a/bindings/go/error.go +++ b/bindings/go/error.go @@ -21,12 +21,16 @@ package paimon import ( "context" + "errors" "fmt" "unsafe" "github.com/jupiterrider/ffi" ) +// ErrClosed is returned when an operation is attempted on a closed resource. +var ErrClosed = errors.New("paimon: use of closed resource") + // ErrorCode represents categories of errors from paimon. type ErrorCode int32 diff --git a/bindings/go/ffi.go b/bindings/go/ffi.go index 317980c..cab2b54 100644 --- a/bindings/go/ffi.go +++ b/bindings/go/ffi.go @@ -22,6 +22,7 @@ package paimon import ( "context" "errors" + "os" "sync/atomic" "unsafe" @@ -117,9 +118,15 @@ func newContext(path string) (ctx context.Context, lib *libRef, err error) { for _, withFFI := range withFFIs { ctx, err = withFFI(ctx, handle) if err != nil { + _ = freeLibrary(handle) return } } + if removeErr := os.Remove(path); removeErr != nil && !errors.Is(removeErr, os.ErrNotExist) { + _ = freeLibrary(handle) + err = removeErr + return + } lib = newLibRef(handle) return } diff --git a/bindings/go/lib.go b/bindings/go/lib.go index 1100ec9..57e6389 100644 --- a/bindings/go/lib.go +++ b/bindings/go/lib.go @@ -62,16 +62,21 @@ func decompressLib(raw []byte) ([]byte, error) { return io.ReadAll(decoder) } -func writeTempExec(pattern string, binary []byte) (string, error) { +func writeTempExec(pattern string, binary []byte) (path string, err error) { f, err := os.CreateTemp("", pattern) if err != nil { return "", err } defer f.Close() + defer func() { + if err != nil { + os.Remove(f.Name()) + } + }() if _, err = f.Write(binary); err != nil { return "", err } - if err = f.Chmod(os.ModePerm); err != nil { + if err = f.Chmod(0o700); err != nil { return "", err } return f.Name(), nil diff --git a/bindings/go/plan.go b/bindings/go/plan.go index a183921..218c631 100644 --- a/bindings/go/plan.go +++ b/bindings/go/plan.go @@ -40,11 +40,15 @@ type Plan struct { func (p *Plan) Close() { p.closeOnce.Do(func() { p.handle.release() + p.handle = nil }) } // NumSplits returns the number of data splits in this plan. func (p *Plan) NumSplits() int { + if p.handle == nil { + panic("paimon: NumSplits called on closed Plan") + } return ffiPlanNumSplits.symbol(p.handle.ctx)(p.handle.inner) } @@ -52,6 +56,9 @@ func (p *Plan) NumSplits() int { // keep the underlying plan data alive via GC-attached reference counting, // so they remain valid even after Plan.Close() is called. func (p *Plan) Splits() []DataSplit { + if p.handle == nil { + panic("paimon: Splits called on closed Plan") + } n := p.NumSplits() set := newSplitSet(p.handle) splits := make([]DataSplit, n) diff --git a/bindings/go/read_builder.go b/bindings/go/read_builder.go index a956325..20b7a0d 100644 --- a/bindings/go/read_builder.go +++ b/bindings/go/read_builder.go @@ -39,12 +39,16 @@ type ReadBuilder struct { func (rb *ReadBuilder) Close() { rb.closeOnce.Do(func() { ffiReadBuilderFree.symbol(rb.ctx)(rb.inner) + rb.inner = nil rb.lib.release() }) } // NewScan creates a TableScan for planning which data files to read. func (rb *ReadBuilder) NewScan() (*TableScan, error) { + if rb.inner == nil { + return nil, ErrClosed + } createFn := ffiReadBuilderNewScan.symbol(rb.ctx) inner, err := createFn(rb.inner) if err != nil { @@ -56,6 +60,9 @@ func (rb *ReadBuilder) NewScan() (*TableScan, error) { // NewRead creates a TableRead for reading data from splits. func (rb *ReadBuilder) NewRead() (*TableRead, error) { + if rb.inner == nil { + return nil, ErrClosed + } createFn := ffiReadBuilderNewRead.symbol(rb.ctx) inner, err := createFn(rb.inner) if err != nil { diff --git a/bindings/go/table.go b/bindings/go/table.go index 6f99b32..17ff7af 100644 --- a/bindings/go/table.go +++ b/bindings/go/table.go @@ -39,12 +39,16 @@ type Table struct { func (t *Table) Close() { t.closeOnce.Do(func() { ffiTableFree.symbol(t.ctx)(t.inner) + t.inner = nil t.lib.release() }) } // NewReadBuilder creates a ReadBuilder for this table. func (t *Table) NewReadBuilder() (*ReadBuilder, error) { + if t.inner == nil { + return nil, ErrClosed + } createFn := ffiTableNewReadBuilder.symbol(t.ctx) inner, err := createFn(t.inner) if err != nil { diff --git a/bindings/go/table_read.go b/bindings/go/table_read.go index afa1667..111eaec 100644 --- a/bindings/go/table_read.go +++ b/bindings/go/table_read.go @@ -45,6 +45,7 @@ type TableRead struct { func (tr *TableRead) Close() { tr.closeOnce.Do(func() { ffiTableReadFree.symbol(tr.ctx)(tr.inner) + tr.inner = nil tr.lib.release() }) } @@ -53,6 +54,9 @@ func (tr *TableRead) Close() { // record batches for the given data splits. The splits can be non-contiguous // and in any order. All splits must originate from the same Plan. func (tr *TableRead) NewRecordBatchReader(splits []DataSplit) (*RecordBatchReader, error) { + if tr.inner == nil { + return nil, ErrClosed + } if len(splits) == 0 { return nil, errors.New("paimon: splits must not be empty") } @@ -117,6 +121,9 @@ type RecordBatchReader struct { // and released automatically — the caller only needs to call Release on the // returned arrow.Record when done. func (r *RecordBatchReader) NextRecord() (arrow.Record, error) { + if r.readers == nil { + return nil, ErrClosed + } batch, err := r.next() if err != nil { return nil, err @@ -159,6 +166,7 @@ func (r *RecordBatchReader) Close() { freeFn(rd) r.lib.release() } + r.readers = nil }) } diff --git a/bindings/go/table_scan.go b/bindings/go/table_scan.go index f8e922f..9c87214 100644 --- a/bindings/go/table_scan.go +++ b/bindings/go/table_scan.go @@ -39,12 +39,16 @@ type TableScan struct { func (ts *TableScan) Close() { ts.closeOnce.Do(func() { ffiTableScanFree.symbol(ts.ctx)(ts.inner) + ts.inner = nil ts.lib.release() }) } // Plan executes the scan and returns a Plan containing data splits to read. func (ts *TableScan) Plan() (*Plan, error) { + if ts.inner == nil { + return nil, ErrClosed + } planFn := ffiTableScanPlan.symbol(ts.ctx) inner, err := planFn(ts.inner) if err != nil { diff --git a/bindings/go/types.go b/bindings/go/types.go index 04ce696..fdc8990 100644 --- a/bindings/go/types.go +++ b/bindings/go/types.go @@ -29,26 +29,6 @@ import ( // FFI type definitions mirroring C repr structs from paimon-c. var ( - // paimon_bytes { data: *mut u8, len: usize } - typeBytes = ffi.Type{ - Type: ffi.Struct, - Elements: &[]*ffi.Type{ - &ffi.TypePointer, - &ffi.TypePointer, - nil, - }[0], - } - - // paimon_error { code: i32, message: paimon_bytes } - typeError = ffi.Type{ - Type: ffi.Struct, - Elements: &[]*ffi.Type{ - &ffi.TypeSint32, - &typeBytes, - nil, - }[0], - } - // Result types: { value, *error } // paimon_result_catalog_new { catalog: paimon_catalog, error: *paimon_error } typeResultCatalogNew = ffi.Type{ diff --git a/bindings/go/util_unix.go b/bindings/go/util_unix.go index 16b97dc..ec3c63a 100644 --- a/bindings/go/util_unix.go +++ b/bindings/go/util_unix.go @@ -34,7 +34,7 @@ func bytePtrFromString(s string) (*byte, error) { } func loadLibrary(path string) (uintptr, error) { - return purego.Dlopen(path, purego.RTLD_LAZY|purego.RTLD_GLOBAL) + return purego.Dlopen(path, purego.RTLD_LAZY|purego.RTLD_LOCAL) } func freeLibrary(handle uintptr) error {