diff --git a/.env.example b/.env.example index be4a166..d732d11 100644 --- a/.env.example +++ b/.env.example @@ -8,11 +8,17 @@ MINIO_SECRET_KEY= MINIO_SECURE= GOOGLE_CREDS_PATH= -IMAGE_EMBEDDING_PROJECTNAME= +EMBEDDING_PROJECTNAME= SERVICE_VERSION= +RELEASE_VERSION= TELEGRAM_TOKEN= +TELEGRAM_WEBHOOK_EXTERNALURL= POSTGRES_USER= POSTGRES_PASSWORD= -POSTGRES_DB= \ No newline at end of file +POSTGRES_DB= + +FFMPEG_BINARY= +FFMPEG_CPULIMIT= +FFMPEG_THREADSLIMIT= \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ed8b0b4..70e6d3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -58,7 +58,7 @@ jobs: host: ${{ vars.SSH_HOST }} username: ${{ vars.SSH_USERNAME }} key: ${{ secrets.SSH_PRIVATE_KEY }} - envs: ELASTIC_USERNAME,ELASTIC_PASSWORD,ELASTIC_JAVA_OPTS,MINIO_ENDPOINT,MINIO_ACCESS_KEY,MINIO_SECRET_KEY,MINIO_SECURE,IMAGE_EMBEDDING_PROJECTNAME,GOOGLE_CREDS_PATH,REPO_URL,DEPLOY_PATH,RELEASE_VERSION,TELEGRAM_TOKEN,POSTGRES_DB,POSTGRES_PASSWORD,TAG + envs: ELASTIC_CLOUDID,ELASTIC_APIKEY,METADATA_ELASTIC_INDEX,TAG_ELASTIC_INDEX,MINIO_ENDPOINT,MINIO_ACCESS_KEY,MINIO_SECRET_KEY,MINIO_SECURE,EMBEDDING_PROJECTNAME,GOOGLE_CREDS_PATH,REPO_URL,DEPLOY_PATH,RELEASE_VERSION,TELEGRAM_TOKEN,POSTGRES_DB,POSTGRES_PASSWORD,TAG,TELEGRAM_WEBHOOK_EXTERNALURL,FFMPEG_CPULIMIT,MEDIA_STORAGE_BUCKET,TEMP_STORAGE_BUCKET script: | if [ ! -d "$DEPLOY_PATH/.git" ]; then git clone "$REPO_URL" "$DEPLOY_PATH" @@ -81,14 +81,17 @@ jobs: docker compose pull docker compose -p memelo up -d --wait --wait-timeout 300 env: - ELASTIC_USERNAME: ${{ vars.ELASTIC_USERNAME }} - ELASTIC_PASSWORD: ${{ secrets.ELASTIC_PASSWORD }} - ELASTIC_JAVA_OPTS: ${{ vars.ELASTIC_JAVA_OPTS }} + ELASTIC_CLOUDID: ${{ secrets.ELASTIC_CLOUDID }} + ELASTIC_APIKEY: ${{ secrets.ELASTIC_APIKEY }} + METADATA_ELASTIC_INDEX: ${{vars.METADATA_ELASTIC_INDEX}} + TAG_ELASTIC_INDEX: ${{vars.TAG_ELASTIC_INDEX}} + MINIO_ENDPOINT: ${{ vars.MINIO_ENDPOINT }} MINIO_ACCESS_KEY: ${{ vars.MINIO_ACCESS_KEY }} MINIO_SECRET_KEY: ${{ secrets.MINIO_SECRET_KEY }} MINIO_SECURE: ${{ vars.MINIO_SECURE }} - IMAGE_EMBEDDING_PROJECTNAME: ${{ vars.IMAGE_EMBEDDING_PROJECTNAME }} + + EMBEDDING_PROJECTNAME: ${{ vars.EMBEDDING_PROJECTNAME }} GOOGLE_CREDS_PATH: ${{ vars.GOOGLE_CREDS_PATH }} TELEGRAM_TOKEN: ${{secrets.TELEGRAM_TOKEN}} POSTGRES_DB: ${{vars.POSTGRES_DB}} @@ -98,6 +101,10 @@ jobs: DEPLOY_PATH: ${{ vars.DEPLOY_REPO_PATH }} RELEASE_VERSION: dev-${{ github.sha }} TAG: ${{ github.sha }} + TELEGRAM_WEBHOOK_EXTERNALURL: ${{vars.TELEGRAM_WEBHOOK_EXTERNALURL}} + FFMPEG_CPULIMIT: ${{vars.FFMPEG_CPULIMIT}} + MEDIA_STORAGE_BUCKET: ${{vars.MEDIA_STORAGE_BUCKET}} + TEMP_STORAGE_BUCKET: ${{vars.TEMP_STORAGE_BUCKET}} - name: Tests uses: robherley/go-test-action@v0 @@ -174,7 +181,7 @@ jobs: host: ${{ vars.SSH_HOST }} username: ${{ vars.SSH_USERNAME }} key: ${{ secrets.SSH_PRIVATE_KEY }} - envs: ELASTIC_USERNAME,ELASTIC_PASSWORD,ELASTIC_JAVA_OPTS,MINIO_ENDPOINT,MINIO_ACCESS_KEY,MINIO_SECRET_KEY,MINIO_SECURE,IMAGE_EMBEDDING_PROJECTNAME,GOOGLE_CREDS_PATH,REPO_URL,DEPLOY_PATH,RELEASE_VERSION,TELEGRAM_TOKEN,POSTGRES_DB,POSTGRES_PASSWORD,TAG + envs: ELASTIC_CLOUDID,ELASTIC_APIKEY,METADATA_ELASTIC_INDEX,TAG_ELASTIC_INDEX,MINIO_ENDPOINT,MINIO_ACCESS_KEY,MINIO_SECRET_KEY,MINIO_SECURE,EMBEDDING_PROJECTNAME,GOOGLE_CREDS_PATH,REPO_URL,DEPLOY_PATH,RELEASE_VERSION,TELEGRAM_TOKEN,POSTGRES_DB,POSTGRES_PASSWORD,TAG,TELEGRAM_WEBHOOK_EXTERNALURL,FFMPEG_CPULIMIT,MEDIA_STORAGE_BUCKET,TEMP_STORAGE_BUCKET script: | if [ ! -d "$DEPLOY_PATH/.git" ]; then git clone "$REPO_URL" "$DEPLOY_PATH" @@ -197,14 +204,16 @@ jobs: docker compose pull docker compose -p memelo up -d --wait --wait-timeout 300 env: - ELASTIC_USERNAME: ${{ vars.ELASTIC_USERNAME }} - ELASTIC_PASSWORD: ${{ secrets.ELASTIC_PASSWORD }} - ELASTIC_JAVA_OPTS: ${{ vars.ELASTIC_JAVA_OPTS }} + ELASTIC_CLOUDID: ${{ secrets.ELASTIC_CLOUDID }} + ELASTIC_APIKEY: ${{ secrets.ELASTIC_APIKEY }} + METADATA_ELASTIC_INDEX: ${{vars.METADATA_ELASTIC_INDEX}} + TAG_ELASTIC_INDEX: ${{vars.TAG_ELASTIC_INDEX}} + MINIO_ENDPOINT: ${{ vars.MINIO_ENDPOINT }} MINIO_ACCESS_KEY: ${{ vars.MINIO_ACCESS_KEY }} MINIO_SECRET_KEY: ${{ secrets.MINIO_SECRET_KEY }} MINIO_SECURE: ${{ vars.MINIO_SECURE }} - IMAGE_EMBEDDING_PROJECTNAME: ${{ vars.IMAGE_EMBEDDING_PROJECTNAME }} + EMBEDDING_PROJECTNAME: ${{ vars.EMBEDDING_PROJECTNAME }} GOOGLE_CREDS_PATH: ${{ vars.GOOGLE_CREDS_PATH }} TELEGRAM_TOKEN: ${{secrets.TELEGRAM_TOKEN}} POSTGRES_DB: ${{vars.POSTGRES_DB}} @@ -214,6 +223,10 @@ jobs: DEPLOY_PATH: ${{ vars.DEPLOY_REPO_PATH }} RELEASE_VERSION: ${{ github.ref_name }} TAG: ${{ github.sha }} + TELEGRAM_WEBHOOK_EXTERNALURL: ${{vars.TELEGRAM_WEBHOOK_EXTERNALURL}} + FFMPEG_CPULIMIT: ${{vars.FFMPEG_CPULIMIT}} + MEDIA_STORAGE_BUCKET: ${{vars.MEDIA_STORAGE_BUCKET}} + TEMP_STORAGE_BUCKET: ${{vars.TEMP_STORAGE_BUCKET}} - name: Tests uses: robherley/go-test-action@v0 diff --git a/Dockerfile-storage-service b/Dockerfile-storage-service index 2e3be4e..828edc7 100644 --- a/Dockerfile-storage-service +++ b/Dockerfile-storage-service @@ -1,4 +1,4 @@ -FROM golang:1.24.10-alpine AS builder +FROM golang:1.25.0-alpine AS builder WORKDIR /app RUN apk add --update --no-cache bash curl \ @@ -15,8 +15,7 @@ RUN go build -a -installsuffix cgo -o output.bin FROM alpine:latest WORKDIR /app -RUN apk add curl -RUN apk add vips vips-poppler +RUN apk add curl vips vips-poppler ffmpeg cpulimit COPY --from=builder /app/storage-service/output.bin storage-service WORKDIR /app/config diff --git a/Dockerfile-telegram-service b/Dockerfile-telegram-service index 50b25fa..d493185 100644 --- a/Dockerfile-telegram-service +++ b/Dockerfile-telegram-service @@ -1,4 +1,4 @@ -FROM golang:1.24.10-alpine AS builder +FROM golang:1.25.0-alpine AS builder WORKDIR /app RUN apk add --update --no-cache bash curl \ diff --git a/README.md b/README.md index 1223c94..fcffb2d 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ Integration services ──(gRPC)─► storage-service **Start dependencies:** ```sh - docker compose up elasticsearch minio -d + docker compose up elasticsearch service -d ``` This starts Elasticsearch (`:9200`), MinIO (`:9000`). diff --git a/common/config/InitConfig.go b/common/config/InitConfig.go index 5442e73..d0a2fd8 100644 --- a/common/config/InitConfig.go +++ b/common/config/InitConfig.go @@ -57,3 +57,11 @@ type ServerConfig struct { type LoggingConfig struct { Level string } + +type MediaStorageConfig struct { + Endpoint string + AccessKey string + SecretKey string + Bucket string + Secure bool +} diff --git a/common/go.mod b/common/go.mod index 832b773..97cc897 100644 --- a/common/go.mod +++ b/common/go.mod @@ -1,21 +1,29 @@ module github.com/weoses/memelo/common -go 1.24.10 +go 1.25.0 require ( - github.com/google/uuid v1.5.0 + github.com/google/uuid v1.6.0 github.com/joho/godotenv v1.5.1 github.com/spf13/viper v1.19.0 ) require ( + github.com/dustin/go-humanize v1.0.1 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/go-ini/ini v1.67.0 // indirect + github.com/goccy/go-json v0.10.4 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/magiconair/properties v1.8.7 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/minio/minio-go/v7 v7.0.84 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/rogpeppe/go-internal v1.13.1 // indirect + github.com/rs/xid v1.6.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect @@ -25,7 +33,9 @@ require ( github.com/stretchr/testify v1.11.1 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.uber.org/multierr v1.10.0 // indirect + golang.org/x/crypto v0.31.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/sys v0.38.0 // indirect golang.org/x/text v0.31.0 // indirect gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect diff --git a/common/go.sum b/common/go.sum index 802b954..b44101f 100644 --- a/common/go.sum +++ b/common/go.sum @@ -2,18 +2,31 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= +github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM= +github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= +github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -23,6 +36,10 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.84 h1:D1HVmAF8JF8Bpi6IU4V9vIEj+8pc+xU88EWMs2yed0E= +github.com/minio/minio-go/v7 v7.0.84/go.mod h1:57YXpvc5l3rjPdhqNrDsvVlY0qPI6UTk1bflAe+9doY= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= @@ -32,6 +49,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= @@ -60,8 +79,12 @@ github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= diff --git a/common/helper/util.go b/common/helper/util.go index 198e570..b8ba727 100644 --- a/common/helper/util.go +++ b/common/helper/util.go @@ -3,8 +3,31 @@ package helper import ( "crypto/md5" "encoding/hex" + "io" ) +type ErrLogger interface { + Error(msg string, args ...any) +} + +func QuietClose(c io.Closer, logger ErrLogger) { + if c == nil { + return + } + if err := c.Close(); err != nil { + logger.Error("failed to close", "error", err) + } +} + +func QuietCloseAll[T io.Closer](c []T, logger ErrLogger) { + if c == nil { + return + } + for _, c := range c { + QuietClose(c, logger) + } +} + func Addr[T any](v T) *T { return &v } func DefaultString(item *string) string { @@ -21,6 +44,17 @@ func TransformSlice[F any, T any](from []F, to []T, transformer func(F) T) []T { return to } +func TransformSliceErr[F any, T any](from []F, to []T, transformer func(F) (T, error)) ([]T, error) { + var err error + for i, f := range from { + to[i], err = transformer(f) + if err != nil { + return nil, err + } + } + return to, nil +} + func CalcHash(base64Image string) string { hasher := md5.New() hasher.Write([]byte(base64Image)) diff --git a/common/service/TmpDataService.go b/common/service/TmpDataService.go new file mode 100644 index 0000000..f81e934 --- /dev/null +++ b/common/service/TmpDataService.go @@ -0,0 +1,65 @@ +package service + +import ( + "context" + "fmt" + "io" + + "github.com/google/uuid" + "github.com/weoses/memelo/common/storage" + "github.com/weoses/memelo/common/temp" +) + +type TmpDataService interface { + ByBytes(ctx context.Context, data []byte) (temp.S3BackedData, error) + ByReader(ctx context.Context, reader io.Reader) (temp.S3BackedData, error) + + WrapData(context.Context, temp.Data) (temp.S3BackedData, error) + WrapS3Path(context.Context, string) (temp.S3BackedData, error) +} + +type TmpDataServiceImpl struct { + ops storage.S3OperationsAdapter +} + +func (s *TmpDataServiceImpl) ByBytes(ctx context.Context, data []byte) (temp.S3BackedData, error) { + return s.WrapData(ctx, temp.DataBytes(data)) +} + +func (s *TmpDataServiceImpl) ByReader(ctx context.Context, reader io.Reader) (temp.S3BackedData, error) { + data, err := temp.DataTemp(reader) + if err != nil { + return nil, err + } + return s.WrapData(ctx, data) +} + +func (s *TmpDataServiceImpl) WrapData(ctx context.Context, data temp.Data) (temp.S3BackedData, error) { + return temp.NewS3BackedDataFromLocal( + data, + s.ops.IsGs(), + func(ctx context.Context, d temp.Data) (string, error) { + path := uuid.NewString() + if err := s.ops.Save(ctx, path, d, storage.WithContentType("application/octet-stream")); err != nil { + return "", fmt.Errorf("upload failed: %w", err) + } + return path, nil + }, + s.ops.GetUrl, + s.ops.Delete, + ), nil +} + +func (s *TmpDataServiceImpl) WrapS3Path(ctx context.Context, path string) (temp.S3BackedData, error) { + return temp.NewS3BackedDataFromPath( + path, + s.ops.IsGs(), + s.ops.Read, + s.ops.GetUrl, + s.ops.Delete, + ), nil +} + +func NewTmpDataS3Service(ops storage.S3OperationsAdapter) (TmpDataService, error) { + return &TmpDataServiceImpl{ops: ops}, nil +} diff --git a/common/storage/S3OperationsAdapter.go b/common/storage/S3OperationsAdapter.go new file mode 100644 index 0000000..7acdb7b --- /dev/null +++ b/common/storage/S3OperationsAdapter.go @@ -0,0 +1,188 @@ +package storage + +import ( + "context" + "fmt" + "log/slog" + "net/url" + "strings" + "time" + + "github.com/minio/minio-go/v7" + "github.com/minio/minio-go/v7/pkg/credentials" + "github.com/weoses/memelo/common/temp" + + "github.com/weoses/memelo/common/config" + "github.com/weoses/memelo/common/helper" +) + +type SaveOptions func(options *SaveOptionsParameter) + +type S3OperationsAdapter interface { + Save(ctx context.Context, path string, data temp.Data, options ...SaveOptions) error + Read(ctx context.Context, path string) (temp.Data, error) + GetUrl(ctx context.Context, path string) (string, error) + GetPresignedUrl(ctx context.Context, path string) (string, error) + Delete(ctx context.Context, path string) error + IsGs() bool +} + +type SaveOptionsParameter struct { + Expires *time.Time + ContentType *string +} + +func WithExpires(expires time.Time) SaveOptions { + return func(options *SaveOptionsParameter) { + options.Expires = &expires + } +} + +func WithContentType(contentType string) SaveOptions { + return func(options *SaveOptionsParameter) { + options.ContentType = &contentType + } +} + +type S3OperationsAdapterService struct { + client minio.Client + slogger *slog.Logger + + BucketName string + Endpoint string + Secure bool +} + +func NewS3OperationsAdapter(cfg *config.MediaStorageConfig) (S3OperationsAdapter, error) { + minioClient, err := minio.New(cfg.Endpoint, &minio.Options{ + Creds: credentials.NewStaticV4(cfg.AccessKey, cfg.SecretKey, ""), + Secure: cfg.Secure, + }) + if err != nil { + return nil, fmt.Errorf("failed to create service client: %w", err) + } + + exists, err := minioClient.BucketExists(context.Background(), cfg.Bucket) + if err != nil { + return nil, fmt.Errorf("failed to check if bucket exists: %w", err) + } + + if !exists { + err = minioClient.MakeBucket(context.Background(), cfg.Bucket, minio.MakeBucketOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create service bucket: %w", err) + } + } + + return &S3OperationsAdapterService{ + client: *minioClient, + BucketName: cfg.Bucket, + Endpoint: cfg.Endpoint, + Secure: cfg.Secure, + slogger: slog.With("service", "S3OperationsAdapterService", "endpoint", cfg.Endpoint, "bucket", cfg.Bucket), + }, nil +} + +func (m *S3OperationsAdapterService) IsGs() bool { + return strings.Contains(m.Endpoint, "googleapis.com") +} + +func (m *S3OperationsAdapterService) Save(ctx context.Context, path string, data temp.Data, options ...SaveOptions) error { + size, err := data.Size() + if err != nil { + return fmt.Errorf("failed to get size of data: %w", err) + } + + dataReader, err := data.Reader() + if err != nil { + return fmt.Errorf("failed to get temp reader: %w", err) + } + defer helper.QuietClose(dataReader, m.slogger) + + parameters := SaveOptionsParameter{} + for _, option := range options { + option(¶meters) + } + + putOptions := minio.PutObjectOptions{} + if parameters.Expires != nil { + putOptions.Expires = *parameters.Expires + } + + if parameters.ContentType != nil { + putOptions.ContentType = *parameters.ContentType + } + + m.slogger.InfoContext(ctx, "service PutObject", "object", path) + _, err = m.client.PutObject( + ctx, + m.BucketName, + path, + dataReader, + size, + putOptions, + ) + if err != nil { + m.slogger.ErrorContext(ctx, "service PutObject failed", "object", path, "error", err) + return fmt.Errorf("PutObject failed for %s: %w", path, err) + } + m.slogger.DebugContext(ctx, "service PutObject ok", "object", path) + return nil +} + +func (m *S3OperationsAdapterService) Read(ctx context.Context, path string) (temp.Data, error) { + m.slogger.InfoContext(ctx, "service GetObject", "object", path) + obj, err := m.client.GetObject(ctx, m.BucketName, path, minio.GetObjectOptions{}) + if err != nil { + m.slogger.ErrorContext(ctx, "service GetObject failed", "object", path, "error", err) + return nil, err + } + defer helper.QuietClose(obj, m.slogger) + + d, err := temp.DataTemp(obj) + if err != nil { + m.slogger.ErrorContext(ctx, "service GetObject DataTemp failed", "object", path, "error", err) + return nil, err + } + + m.slogger.DebugContext(ctx, "service GetObject ok", "object", path) + return d, nil +} + +func (m *S3OperationsAdapterService) GetPresignedUrl(ctx context.Context, path string) (string, error) { + m.slogger.InfoContext(ctx, "service PresignedGetObject", "object", path) + u, err := m.client.PresignedGetObject( + ctx, + m.BucketName, + path, + time.Hour*5, + url.Values{}, + ) + if err != nil { + m.slogger.ErrorContext(ctx, "service PresignedGetObject failed", "object", path, "error", err) + return "", err + } + m.slogger.DebugContext(ctx, "service PresignedGetObject ok", "object", path) + return u.String(), nil +} + +func (m *S3OperationsAdapterService) GetUrl(ctx context.Context, path string) (string, error) { + scheme := "http" + if m.Secure { + scheme = "https" + } + u := fmt.Sprintf("%s://%s/%s/%s", scheme, m.Endpoint, m.BucketName, path) + m.slogger.DebugContext(ctx, "service GetUrl", "object", path, "url", u) + return u, nil +} + +func (m *S3OperationsAdapterService) Delete(ctx context.Context, path string) error { + m.slogger.InfoContext(ctx, "service RemoveObject", "object", path) + err := m.client.RemoveObject(ctx, m.BucketName, path, minio.RemoveObjectOptions{}) + if err != nil { + m.slogger.ErrorContext(ctx, "service RemoveObject failed", "object", path, "error", err) + return err + } + m.slogger.DebugContext(ctx, "service RemoveObject ok", "object", path) + return nil +} diff --git a/common/temp/ByteBasedData.go b/common/temp/ByteBasedData.go new file mode 100644 index 0000000..3d5e141 --- /dev/null +++ b/common/temp/ByteBasedData.go @@ -0,0 +1,29 @@ +package temp + +import ( + "bytes" + "io" + "slices" +) + +type byteBasedData struct { + data []byte +} + +func (m *byteBasedData) Reader() (io.ReadCloser, error) { + return io.NopCloser(bytes.NewReader(m.data)), nil +} + +func (m *byteBasedData) Size() (int64, error) { + return int64(len(m.data)), nil +} + +func (m *byteBasedData) ReadAll() ([]byte, error) { + return slices.Clone(m.data), nil +} + +func (m *byteBasedData) Close() error { return nil } + +func DataBytes(data []byte) Data { + return &byteBasedData{data: data} +} diff --git a/common/temp/Data.go b/common/temp/Data.go new file mode 100644 index 0000000..dd02fdf --- /dev/null +++ b/common/temp/Data.go @@ -0,0 +1,22 @@ +package temp + +import ( + "context" + "io" +) + +const MaxInmemSize = 1 * 1024 * 1024 + +type Data interface { + io.Closer + Size() (int64, error) + Reader() (io.ReadCloser, error) + ReadAll() ([]byte, error) +} + +type S3BackedData interface { + Data + IsGsSupported() bool + GetS3Path(ctx context.Context) (string, error) + GetS3Url(ctx context.Context) (string, error) +} diff --git a/common/temp/S3WrappedData.go b/common/temp/S3WrappedData.go new file mode 100644 index 0000000..a178a68 --- /dev/null +++ b/common/temp/S3WrappedData.go @@ -0,0 +1,173 @@ +package temp + +import ( + "context" + "errors" + "io" + "log/slog" + "sync" + "time" +) + +type UploadResult struct { + Url string + Path string +} + +type s3wrappedData struct { + physical Data + once sync.Once + s3path string + upload func(ctx context.Context, data Data) (string, error) + download func(ctx context.Context, s3path string) (Data, error) + delete func(ctx context.Context, s3path string) error + url func(ctx context.Context, s3path string) (string, error) + gsSupported bool + closed bool + slogger *slog.Logger + mu sync.Mutex +} + +func (m *s3wrappedData) IsGsSupported() bool { + return m.gsSupported +} + +func (m *s3wrappedData) Size() (int64, error) { + err := m.resolveDownload() + if err != nil { + return 0, err + } + + return m.physical.Size() +} + +func (m *s3wrappedData) Reader() (io.ReadCloser, error) { + err := m.resolveDownload() + if err != nil { + return nil, err + } + + return m.physical.Reader() +} + +func (m *s3wrappedData) ReadAll() ([]byte, error) { + reader, err := m.Reader() + if err != nil { + return nil, err + } + return io.ReadAll(reader) +} + +func (m *s3wrappedData) GetS3Path(ctx context.Context) (string, error) { + err := m.resolveUpload(ctx) + if err != nil { + return "", err + } + return m.s3path, err +} + +func (m *s3wrappedData) GetS3Url(ctx context.Context) (string, error) { + err := m.resolveUpload(ctx) + if err != nil { + return "", err + } + return m.url(ctx, m.s3path) +} + +func (m *s3wrappedData) Close() error { + if m.closed { + return nil + } + + closeCtx := context.Background() + ctx, cancel := context.WithTimeout(closeCtx, 10*time.Second) + + var errS3Delete error + var errDataClose error + defer cancel() + + if m.s3path != "" { + errS3Delete = m.delete(ctx, m.s3path) + } + if m.physical != nil { + errDataClose = m.physical.Close() + } + m.closed = true + return errors.Join(errDataClose, errS3Delete) +} + +func (m *s3wrappedData) resolveDownload() error { + if m.physical == nil { + m.mu.Lock() + defer m.mu.Unlock() + + var err error + m.once.Do(func() { + m.slogger.Info("s3wrappedData: downloaded object from s3", "uri", m.s3path) + m.physical, err = m.download(context.Background(), m.s3path) + }) + if err != nil { + return err + } + } + if m.physical == nil { + return errors.New("s3wrappedData: downloaded object from s3 doesn't exist") + } + + return nil +} + +func (m *s3wrappedData) resolveUpload(ctx context.Context) error { + var err error + if m.s3path == "" { + m.mu.Lock() + defer m.mu.Unlock() + + m.once.Do(func() { + m.s3path, err = m.upload(ctx, m.physical) + m.slogger.InfoContext(ctx, "s3wrappedData: uploaded object to s3", "uri", m.s3path) + }) + if err != nil { + return err + } + } + + if m.s3path == "" { + return errors.New("s3wrappedData: uploaded object from s3 doesn't exist") + } + return nil +} + +func NewS3BackedDataFromLocal( + physical Data, + gsSupported bool, + upload func(ctx context.Context, data Data) (string, error), + url func(ctx context.Context, s3path string) (string, error), + delete func(ctx context.Context, s3path string) error, +) S3BackedData { + return &s3wrappedData{ + physical: physical, + gsSupported: gsSupported, + upload: upload, + delete: delete, + url: url, + slogger: slog.With(slog.String("component", "s3wrappedData")), + } +} + +func NewS3BackedDataFromPath( + s3path string, + gsSupported bool, + download func(ctx context.Context, s3path string) (Data, error), + url func(ctx context.Context, s3path string) (string, error), + delete func(ctx context.Context, s3path string) error, +) S3BackedData { + return &s3wrappedData{ + s3path: s3path, + gsSupported: gsSupported, + download: download, + url: url, + delete: delete, + slogger: slog.With(slog.String("component", "s3wrappedData")), + } +} diff --git a/common/temp/TmpFileBasedData.go b/common/temp/TmpFileBasedData.go new file mode 100644 index 0000000..d617089 --- /dev/null +++ b/common/temp/TmpFileBasedData.go @@ -0,0 +1,72 @@ +package temp + +import ( + "errors" + "fmt" + "io" + "os" +) + +type tempBasedData struct { + path string + closed bool +} + +func (m *tempBasedData) Size() (int64, error) { + info, err := os.Stat(m.path) + if err != nil { + return 0, err + } + return info.Size(), nil +} + +func (m *tempBasedData) Reader() (io.ReadCloser, error) { + return os.Open(m.path) +} + +func (m *tempBasedData) ReadAll() ([]byte, error) { + return os.ReadFile(m.path) +} + +func (m *tempBasedData) Close() error { + if m.closed { + return nil + } + m.closed = true + return os.Remove(m.path) +} + +func DataTemp(r io.Reader) (Data, error) { + buf := make([]byte, MaxInmemSize) + n, err := io.ReadFull(r, buf) + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return &byteBasedData{data: buf[:n]}, nil + } + if err != nil { + return nil, fmt.Errorf("error reading temp: %w", err) + } + + // Hit the limit — spill to temp file + temp, err := os.CreateTemp("", "melo-tmp-file-*") + if err != nil { + return nil, fmt.Errorf("error creating temp file: %w", err) + } + name := temp.Name() + + if _, err = temp.Write(buf); err != nil { + _ = temp.Close() + _ = os.Remove(name) + return nil, fmt.Errorf("error writing buffer to temp file: %w", err) + } + if _, err = io.Copy(temp, r); err != nil { + _ = temp.Close() + _ = os.Remove(name) + return nil, fmt.Errorf("error writing rest to temp file: %w", err) + } + if err = temp.Close(); err != nil { + _ = os.Remove(name) + return nil, fmt.Errorf("error closing temp file: %w", err) + } + + return &tempBasedData{path: name}, nil +} diff --git a/docker-compose.yml b/docker-compose.yml index 23d8f2c..04b64e8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,67 +1,47 @@ services: - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:8.16.2 - environment: - ES_JAVA_OPTS: ${ELASTIC_JAVA_OPTS:--Xmx1g} - bootstrap.memory_lock: true - discovery.type: single-node - xpack.security.enabled: true - xpack.security.enrollment.enabled: false - ELASTIC_USERNAME: ${ELASTIC_USERNAME:-elastic} - ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-elastic} - ports: - - "127.0.0.1:9200:9200" - networks: - - env - healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:9200", "-u", "${ELASTIC_USERNAME:-elastic}:${ELASTIC_PASSWORD:-elastic}" ] - interval: 30s - timeout: 10s - retries: 5 - start_period: 90s - volumes: - - "elastic_data:/usr/share/elasticsearch/data" - - minio: - image: minio/minio:latest - environment: - MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minio123} - MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minio123} - ports: - - "127.0.0.1:9000:9000" - command: server /data - healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ] - interval: 30s - timeout: 10s - retries: 3 - start_period: 10s - networks: - - env - volumes: - - "minio_data:/data" - storage-service: build: dockerfile: Dockerfile-storage-service image: ghcr.io/weoses/storage-service:${RELEASE_VERSION:-latest} environment: - METADATA_STORAGE_ELASTIC_ADDRESSES: http://elasticsearch:9200 - METADATA_STORAGE_ELASTIC_USERNAME: ${ELASTIC_USERNAME:-elastic} - METADATA_STORAGE_ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-elastic} +# METADATA_DB_ELASTIC_ADDRESSES: http://elasticsearch:9200 +# METADATA_DB_ELASTIC_USERNAME: ${ELASTIC_USERNAME:-elastic} +# METADATA_DB_ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-elastic} + +# TAG_DB_ELASTIC_ADDRESSES: http://elasticsearch:9200 +# TAG_DB_ELASTIC_USERNAME: ${ELASTIC_USERNAME:-elastic} +# TAG_DB_ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-elastic} + + METADATA_DB_ELASTIC_CLOUDID: ${ELASTIC_CLOUDID} + METADATA_DB_ELASTIC_APIKEY: ${ELASTIC_APIKEY} + METADATA_DB_INDEX: ${METADATA_ELASTIC_INDEX} + + TAG_DB_ELASTIC_CLOUDID: ${ELASTIC_CLOUDID} + TAG_DB_ELASTIC_APIKEY: ${ELASTIC_APIKEY} + TAG_DB_INDEX: ${TAG_ELASTIC_INDEX} - TAG_STORAGE_ELASTIC_ADDRESSES: http://elasticsearch:9200 - TAG_STORAGE_ELASTIC_USERNAME: ${ELASTIC_USERNAME:-elastic} - TAG_STORAGE_ELASTIC_PASSWORD: ${ELASTIC_PASSWORD:-elastic} - IMAGE_STORAGE_S3_ENDPOINT: ${MINIO_ENDPOINT:-minio:9000} - IMAGE_STORAGE_S3_ACCESSKEY: ${MINIO_ACCESS_KEY:-minio123} - IMAGE_STORAGE_S3_SECRETKEY: ${MINIO_SECRET_KEY:-minio123} - IMAGE_STORAGE_S3_SECURE: ${MINIO_SECURE:-false} + MEDIA_STORAGE_ENDPOINT: ${MINIO_ENDPOINT:-service:9000} + MEDIA_STORAGE_ACCESSKEY: ${MINIO_ACCESS_KEY:-minio123} + MEDIA_STORAGE_SECRETKEY: ${MINIO_SECRET_KEY:-minio123} + MEDIA_STORAGE_SECURE: ${MINIO_SECURE:-false} + MEDIA_STORAGE_BUCKET: ${MEDIA_STORAGE_BUCKET} - IMAGE_EMBEDDING_PROJECTNAME: ${IMAGE_EMBEDDING_PROJECTNAME} + TEMP_STORAGE_ENDPOINT: ${MINIO_ENDPOINT:-service:9000} + TEMP_STORAGE_ACCESSKEY: ${MINIO_ACCESS_KEY:-minio123} + TEMP_STORAGE_SECRETKEY: ${MINIO_SECRET_KEY:-minio123} + TEMP_STORAGE_SECURE: ${MINIO_SECURE:-false} + TEMP_STORAGE_BUCKET: ${TEMP_STORAGE_BUCKET} + + MEDIA_EMBEDDING_PROJECTNAME: ${EMBEDDING_PROJECTNAME} + AUDIO_STT_PROJECTNAME: ${EMBEDDING_PROJECTNAME} GOOGLE_APPLICATION_CREDENTIALS: /app/creds/creds.json + + FFMPEG_BINARY: ${FFMPEG_BINARY} + FFMPEG_CPULIMIT: ${FFMPEG_CPULIMIT} + FFMPEG_THREADSLIMIT: ${FFMPEG_THREADSLIMIT} + restart: unless-stopped ports: - "127.0.0.1:7001:7001" @@ -70,17 +50,12 @@ services: - "${GOOGLE_CREDS_PATH:-$HOME/.config/gcloud/application_default_credentials.json}:/app/creds/creds.json" networks: - env - depends_on: - elasticsearch: - condition: service_healthy - minio: - condition: service_healthy healthcheck: test: [ "CMD", "curl", "-f", "http://localhost:7001/health" ] interval: 30s timeout: 10s - retries: 3 - start_period: 10s + retries: 6 + start_period: 20s postgres: image: postgres:16-alpine @@ -93,9 +68,9 @@ services: networks: - env volumes: - - "postgres_data:/var/lib/postgresql/data" + - "postgres_data:/var/lib/postgresql/temp" healthcheck: - test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-memelo} -d ${POSTGRES_DB:-telegram}"] + test: [ "CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-memelo} -d ${POSTGRES_DB:-telegram}" ] interval: 10s timeout: 5s retries: 5 @@ -106,9 +81,17 @@ services: image: ghcr.io/weoses/telegram-service:${RELEASE_VERSION:-latest} environment: TELEGRAM_TOKEN: ${TELEGRAM_TOKEN} + WEBHOOK_EXTERNALURL: ${TELEGRAM_WEBHOOK_EXTERNALURL} STORAGE_SERVICE_URI: http://storage-service:7001 POSTGRES_DSN: postgres://${POSTGRES_USER:-memelo}:${POSTGRES_PASSWORD:-memelo}@postgres:5432/${POSTGRES_DB:-telegram}?sslmode=disable + TEMP_STORAGE_ENDPOINT: ${MINIO_ENDPOINT:-service:9000} + TEMP_STORAGE_ACCESSKEY: ${MINIO_ACCESS_KEY:-minio123} + TEMP_STORAGE_SECRETKEY: ${MINIO_SECRET_KEY:-minio123} + TEMP_STORAGE_SECURE: ${MINIO_SECURE:-false} + TEMP_STORAGE_BUCKET: ${TEMP_STORAGE_BUCKET} restart: unless-stopped + ports: + - "127.0.0.1:7002:7002" volumes: - "./telegram-service/config.yaml:/app/config/config.yaml" networks: @@ -123,6 +106,5 @@ networks: env: volumes: - elastic_data: minio_data: postgres_data: \ No newline at end of file diff --git a/gen/go.mod b/gen/go.mod index 4d2c4e0..d9db912 100644 --- a/gen/go.mod +++ b/gen/go.mod @@ -1,6 +1,7 @@ module github.com/weoses/memelo/gen -go 1.24.10 +go 1.25.0 + require ( connectrpc.com/connect v1.19.1 // indirect google.golang.org/protobuf v1.36.11 // indirect diff --git a/gen/proto/v1/common.pb.go b/gen/proto/v1/common.pb.go index ccab630..a421887 100644 --- a/gen/proto/v1/common.pb.go +++ b/gen/proto/v1/common.pb.go @@ -24,8 +24,8 @@ const ( type ImageDto struct { state protoimpl.MessageState `protogen:"open.v1"` Url string `protobuf:"bytes,1,opt,name=url,proto3" json:"url,omitempty"` - Width int32 `protobuf:"varint,2,opt,name=width,proto3" json:"width,omitempty"` - Height int32 `protobuf:"varint,3,opt,name=height,proto3" json:"height,omitempty"` + ImageWidth *int32 `protobuf:"varint,2,opt,name=image_width,json=imageWidth,proto3,oneof" json:"image_width,omitempty"` + ImageHeight *int32 `protobuf:"varint,3,opt,name=image_height,json=imageHeight,proto3,oneof" json:"image_height,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -67,16 +67,16 @@ func (x *ImageDto) GetUrl() string { return "" } -func (x *ImageDto) GetWidth() int32 { - if x != nil { - return x.Width +func (x *ImageDto) GetImageWidth() int32 { + if x != nil && x.ImageWidth != nil { + return *x.ImageWidth } return 0 } -func (x *ImageDto) GetHeight() int32 { - if x != nil { - return x.Height +func (x *ImageDto) GetImageHeight() int32 { + if x != nil && x.ImageHeight != nil { + return *x.ImageHeight } return 0 } @@ -86,7 +86,9 @@ type MemeDto struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` OcrResult string `protobuf:"bytes,2,opt,name=ocr_result,json=ocrResult,proto3" json:"ocr_result,omitempty"` ImageThumbnail *ImageDto `protobuf:"bytes,3,opt,name=image_thumbnail,json=imageThumbnail,proto3" json:"image_thumbnail,omitempty"` - ImageOriginal *ImageDto `protobuf:"bytes,4,opt,name=image_original,json=imageOriginal,proto3" json:"image_original,omitempty"` + MediaOriginal *ImageDto `protobuf:"bytes,4,opt,name=media_original,json=mediaOriginal,proto3" json:"media_original,omitempty"` + Tags []string `protobuf:"bytes,5,rep,name=tags,proto3" json:"tags,omitempty"` + Type string `protobuf:"bytes,6,opt,name=type,proto3" json:"type,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -142,13 +144,27 @@ func (x *MemeDto) GetImageThumbnail() *ImageDto { return nil } -func (x *MemeDto) GetImageOriginal() *ImageDto { +func (x *MemeDto) GetMediaOriginal() *ImageDto { + if x != nil { + return x.MediaOriginal + } + return nil +} + +func (x *MemeDto) GetTags() []string { if x != nil { - return x.ImageOriginal + return x.Tags } return nil } +func (x *MemeDto) GetType() string { + if x != nil { + return x.Type + } + return "" +} + type DeleteAllRequest struct { state protoimpl.MessageState `protogen:"open.v1"` AccountId string `protobuf:"bytes,1,opt,name=account_id,json=accountId,proto3" json:"account_id,omitempty"` @@ -233,17 +249,22 @@ var File_proto_v1_common_proto protoreflect.FileDescriptor const file_proto_v1_common_proto_rawDesc = "" + "\n" + - "\x15proto/v1/common.proto\x12\x0fproto.memelo.v1\"J\n" + + "\x15proto/v1/common.proto\x12\x0fproto.memelo.v1\"\x8b\x01\n" + "\bImageDto\x12\x10\n" + - "\x03url\x18\x01 \x01(\tR\x03url\x12\x14\n" + - "\x05width\x18\x02 \x01(\x05R\x05width\x12\x16\n" + - "\x06height\x18\x03 \x01(\x05R\x06height\"\xbe\x01\n" + + "\x03url\x18\x01 \x01(\tR\x03url\x12$\n" + + "\vimage_width\x18\x02 \x01(\x05H\x00R\n" + + "imageWidth\x88\x01\x01\x12&\n" + + "\fimage_height\x18\x03 \x01(\x05H\x01R\vimageHeight\x88\x01\x01B\x0e\n" + + "\f_image_widthB\x0f\n" + + "\r_image_height\"\xe6\x01\n" + "\aMemeDto\x12\x0e\n" + "\x02id\x18\x01 \x01(\tR\x02id\x12\x1d\n" + "\n" + "ocr_result\x18\x02 \x01(\tR\tocrResult\x12B\n" + "\x0fimage_thumbnail\x18\x03 \x01(\v2\x19.proto.memelo.v1.ImageDtoR\x0eimageThumbnail\x12@\n" + - "\x0eimage_original\x18\x04 \x01(\v2\x19.proto.memelo.v1.ImageDtoR\rimageOriginal\"1\n" + + "\x0emedia_original\x18\x04 \x01(\v2\x19.proto.memelo.v1.ImageDtoR\rmediaOriginal\x12\x12\n" + + "\x04tags\x18\x05 \x03(\tR\x04tags\x12\x12\n" + + "\x04type\x18\x06 \x01(\tR\x04type\"1\n" + "\x10DeleteAllRequest\x12\x1d\n" + "\n" + "account_id\x18\x01 \x01(\tR\taccountId\"\x13\n" + @@ -271,7 +292,7 @@ var file_proto_v1_common_proto_goTypes = []any{ } var file_proto_v1_common_proto_depIdxs = []int32{ 0, // 0: proto.memelo.v1.MemeDto.image_thumbnail:type_name -> proto.memelo.v1.ImageDto - 0, // 1: proto.memelo.v1.MemeDto.image_original:type_name -> proto.memelo.v1.ImageDto + 0, // 1: proto.memelo.v1.MemeDto.media_original:type_name -> proto.memelo.v1.ImageDto 2, // [2:2] is the sub-list for method output_type 2, // [2:2] is the sub-list for method input_type 2, // [2:2] is the sub-list for extension type_name @@ -284,6 +305,7 @@ func file_proto_v1_common_proto_init() { if File_proto_v1_common_proto != nil { return } + file_proto_v1_common_proto_msgTypes[0].OneofWrappers = []any{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/gen/proto/v1/export_service.pb.go b/gen/proto/v1/export_service.pb.go index f5579ca..8e0b928 100644 --- a/gen/proto/v1/export_service.pb.go +++ b/gen/proto/v1/export_service.pb.go @@ -75,7 +75,7 @@ func (x *ExportRequest) GetId() string { type ExportImageDto struct { state protoimpl.MessageState `protogen:"open.v1"` - Data []byte `protobuf:"bytes,1,opt,name=data,proto3" json:"data,omitempty"` + Data []byte `protobuf:"bytes,1,opt,name=temp,proto3" json:"temp,omitempty"` Width int32 `protobuf:"varint,2,opt,name=width,proto3" json:"width,omitempty"` Height int32 `protobuf:"varint,3,opt,name=height,proto3" json:"height,omitempty"` unknownFields protoimpl.UnknownFields @@ -136,7 +136,10 @@ func (x *ExportImageDto) GetHeight() int32 { type ExportImageEmbedding struct { state protoimpl.MessageState `protogen:"open.v1"` Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"` - Data []float32 `protobuf:"fixed32,2,rep,packed,name=data,proto3" json:"data,omitempty"` + Data []float32 `protobuf:"fixed32,2,rep,packed,name=temp,proto3" json:"temp,omitempty"` + TimeStart int32 `protobuf:"varint,3,opt,name=time_start,json=timeStart,proto3" json:"time_start,omitempty"` + TimeEnd int32 `protobuf:"varint,4,opt,name=time_end,json=timeEnd,proto3" json:"time_end,omitempty"` + Type string `protobuf:"bytes,5,opt,name=type,proto3" json:"type,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -185,12 +188,33 @@ func (x *ExportImageEmbedding) GetData() []float32 { return nil } +func (x *ExportImageEmbedding) GetTimeStart() int32 { + if x != nil { + return x.TimeStart + } + return 0 +} + +func (x *ExportImageEmbedding) GetTimeEnd() int32 { + if x != nil { + return x.TimeEnd + } + return 0 +} + +func (x *ExportImageEmbedding) GetType() string { + if x != nil { + return x.Type + } + return "" +} + type ExportResponseChunk struct { - state protoimpl.MessageState `protogen:"open.v1"` - OcrResult string `protobuf:"bytes,1,opt,name=ocr_result,json=ocrResult,proto3" json:"ocr_result,omitempty"` - Embedding *ExportImageEmbedding `protobuf:"bytes,2,opt,name=embedding,proto3" json:"embedding,omitempty"` - OriginalImage *ExportImageDto `protobuf:"bytes,3,opt,name=original_image,json=originalImage,proto3" json:"original_image,omitempty"` - ThumbnailImage *ExportImageDto `protobuf:"bytes,4,opt,name=thumbnail_image,json=thumbnailImage,proto3" json:"thumbnail_image,omitempty"` + state protoimpl.MessageState `protogen:"open.v1"` + OcrResult string `protobuf:"bytes,1,opt,name=ocr_result,json=ocrResult,proto3" json:"ocr_result,omitempty"` + Embedding []*ExportImageEmbedding `protobuf:"bytes,2,rep,name=embedding,proto3" json:"embedding,omitempty"` + OriginalImage *ExportImageDto `protobuf:"bytes,3,opt,name=original_image,json=originalImage,proto3" json:"original_image,omitempty"` + ThumbnailImage *ExportImageDto `protobuf:"bytes,4,opt,name=thumbnail_image,json=thumbnailImage,proto3" json:"thumbnail_image,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -232,7 +256,7 @@ func (x *ExportResponseChunk) GetOcrResult() string { return "" } -func (x *ExportResponseChunk) GetEmbedding() *ExportImageEmbedding { +func (x *ExportResponseChunk) GetEmbedding() []*ExportImageEmbedding { if x != nil { return x.Embedding } @@ -267,14 +291,18 @@ const file_proto_v1_export_service_proto_rawDesc = "" + "\x0eExportImageDto\x12\x12\n" + "\x04data\x18\x01 \x01(\fR\x04data\x12\x14\n" + "\x05width\x18\x02 \x01(\x05R\x05width\x12\x16\n" + - "\x06height\x18\x03 \x01(\x05R\x06height\"@\n" + + "\x06height\x18\x03 \x01(\x05R\x06height\"\x8e\x01\n" + "\x14ExportImageEmbedding\x12\x14\n" + "\x05model\x18\x01 \x01(\tR\x05model\x12\x12\n" + - "\x04data\x18\x02 \x03(\x02R\x04data\"\x8b\x02\n" + + "\x04data\x18\x02 \x03(\x02R\x04data\x12\x1d\n" + + "\n" + + "time_start\x18\x03 \x01(\x05R\ttimeStart\x12\x19\n" + + "\btime_end\x18\x04 \x01(\x05R\atimeEnd\x12\x12\n" + + "\x04type\x18\x05 \x01(\tR\x04type\"\x8b\x02\n" + "\x13ExportResponseChunk\x12\x1d\n" + "\n" + "ocr_result\x18\x01 \x01(\tR\tocrResult\x12C\n" + - "\tembedding\x18\x02 \x01(\v2%.proto.memelo.v1.ExportImageEmbeddingR\tembedding\x12F\n" + + "\tembedding\x18\x02 \x03(\v2%.proto.memelo.v1.ExportImageEmbeddingR\tembedding\x12F\n" + "\x0eoriginal_image\x18\x03 \x01(\v2\x1f.proto.memelo.v1.ExportImageDtoR\roriginalImage\x12H\n" + "\x0fthumbnail_image\x18\x04 \x01(\v2\x1f.proto.memelo.v1.ExportImageDtoR\x0ethumbnailImage2g\n" + "\rExportService\x12V\n" + diff --git a/gen/proto/v1/search_service.pb.go b/gen/proto/v1/search_service.pb.go index 6e46b4b..470856c 100644 --- a/gen/proto/v1/search_service.pb.go +++ b/gen/proto/v1/search_service.pb.go @@ -71,6 +71,58 @@ func (CreateMemeStatus) EnumDescriptor() ([]byte, []int) { return file_proto_v1_search_service_proto_rawDescGZIP(), []int{0} } +type MediaDataDto struct { + state protoimpl.MessageState `protogen:"open.v1"` + S3Path *string `protobuf:"bytes,1,opt,name=s3_path,json=s3Path,proto3,oneof" json:"s3_path,omitempty"` + Data []byte `protobuf:"bytes,2,opt,name=temp,proto3,oneof" json:"temp,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *MediaDataDto) Reset() { + *x = MediaDataDto{} + mi := &file_proto_v1_search_service_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *MediaDataDto) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MediaDataDto) ProtoMessage() {} + +func (x *MediaDataDto) ProtoReflect() protoreflect.Message { + mi := &file_proto_v1_search_service_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MediaDataDto.ProtoReflect.Descriptor instead. +func (*MediaDataDto) Descriptor() ([]byte, []int) { + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{0} +} + +func (x *MediaDataDto) GetS3Path() string { + if x != nil && x.S3Path != nil { + return *x.S3Path + } + return "" +} + +func (x *MediaDataDto) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + type SearchMemeRequest struct { state protoimpl.MessageState `protogen:"open.v1"` AccountId string `protobuf:"bytes,1,opt,name=account_id,json=accountId,proto3" json:"account_id,omitempty"` @@ -83,7 +135,7 @@ type SearchMemeRequest struct { func (x *SearchMemeRequest) Reset() { *x = SearchMemeRequest{} - mi := &file_proto_v1_search_service_proto_msgTypes[0] + mi := &file_proto_v1_search_service_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -95,7 +147,7 @@ func (x *SearchMemeRequest) String() string { func (*SearchMemeRequest) ProtoMessage() {} func (x *SearchMemeRequest) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[0] + mi := &file_proto_v1_search_service_proto_msgTypes[1] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -108,7 +160,7 @@ func (x *SearchMemeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use SearchMemeRequest.ProtoReflect.Descriptor instead. func (*SearchMemeRequest) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{0} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{1} } func (x *SearchMemeRequest) GetAccountId() string { @@ -149,7 +201,7 @@ type SearchMemeResponse struct { func (x *SearchMemeResponse) Reset() { *x = SearchMemeResponse{} - mi := &file_proto_v1_search_service_proto_msgTypes[1] + mi := &file_proto_v1_search_service_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -161,7 +213,7 @@ func (x *SearchMemeResponse) String() string { func (*SearchMemeResponse) ProtoMessage() {} func (x *SearchMemeResponse) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[1] + mi := &file_proto_v1_search_service_proto_msgTypes[2] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -174,7 +226,7 @@ func (x *SearchMemeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use SearchMemeResponse.ProtoReflect.Descriptor instead. func (*SearchMemeResponse) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{1} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{2} } func (x *SearchMemeResponse) GetResults() []*MemeDto { @@ -194,14 +246,15 @@ func (x *SearchMemeResponse) GetSearcherName() string { type CreateMemeRequest struct { state protoimpl.MessageState `protogen:"open.v1"` AccountId string `protobuf:"bytes,1,opt,name=account_id,json=accountId,proto3" json:"account_id,omitempty"` - RawImage []byte `protobuf:"bytes,2,opt,name=raw_image,json=rawImage,proto3" json:"raw_image,omitempty"` + Image *MediaDataDto `protobuf:"bytes,2,opt,name=image,proto3,oneof" json:"image,omitempty"` + Video *MediaDataDto `protobuf:"bytes,3,opt,name=video,proto3,oneof" json:"video,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } func (x *CreateMemeRequest) Reset() { *x = CreateMemeRequest{} - mi := &file_proto_v1_search_service_proto_msgTypes[2] + mi := &file_proto_v1_search_service_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -213,7 +266,7 @@ func (x *CreateMemeRequest) String() string { func (*CreateMemeRequest) ProtoMessage() {} func (x *CreateMemeRequest) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[2] + mi := &file_proto_v1_search_service_proto_msgTypes[3] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -226,7 +279,7 @@ func (x *CreateMemeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use CreateMemeRequest.ProtoReflect.Descriptor instead. func (*CreateMemeRequest) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{2} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{3} } func (x *CreateMemeRequest) GetAccountId() string { @@ -236,9 +289,16 @@ func (x *CreateMemeRequest) GetAccountId() string { return "" } -func (x *CreateMemeRequest) GetRawImage() []byte { +func (x *CreateMemeRequest) GetImage() *MediaDataDto { if x != nil { - return x.RawImage + return x.Image + } + return nil +} + +func (x *CreateMemeRequest) GetVideo() *MediaDataDto { + if x != nil { + return x.Video } return nil } @@ -253,7 +313,7 @@ type CreateMemeResponse struct { func (x *CreateMemeResponse) Reset() { *x = CreateMemeResponse{} - mi := &file_proto_v1_search_service_proto_msgTypes[3] + mi := &file_proto_v1_search_service_proto_msgTypes[4] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -265,7 +325,7 @@ func (x *CreateMemeResponse) String() string { func (*CreateMemeResponse) ProtoMessage() {} func (x *CreateMemeResponse) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[3] + mi := &file_proto_v1_search_service_proto_msgTypes[4] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -278,7 +338,7 @@ func (x *CreateMemeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use CreateMemeResponse.ProtoReflect.Descriptor instead. func (*CreateMemeResponse) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{3} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{4} } func (x *CreateMemeResponse) GetResult() *MemeDto { @@ -304,7 +364,7 @@ type GetMemeRequest struct { func (x *GetMemeRequest) Reset() { *x = GetMemeRequest{} - mi := &file_proto_v1_search_service_proto_msgTypes[4] + mi := &file_proto_v1_search_service_proto_msgTypes[5] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -316,7 +376,7 @@ func (x *GetMemeRequest) String() string { func (*GetMemeRequest) ProtoMessage() {} func (x *GetMemeRequest) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[4] + mi := &file_proto_v1_search_service_proto_msgTypes[5] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -329,7 +389,7 @@ func (x *GetMemeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use GetMemeRequest.ProtoReflect.Descriptor instead. func (*GetMemeRequest) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{4} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{5} } func (x *GetMemeRequest) GetId() string { @@ -348,7 +408,7 @@ type GetMemeResponse struct { func (x *GetMemeResponse) Reset() { *x = GetMemeResponse{} - mi := &file_proto_v1_search_service_proto_msgTypes[5] + mi := &file_proto_v1_search_service_proto_msgTypes[6] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -360,7 +420,7 @@ func (x *GetMemeResponse) String() string { func (*GetMemeResponse) ProtoMessage() {} func (x *GetMemeResponse) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[5] + mi := &file_proto_v1_search_service_proto_msgTypes[6] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -373,7 +433,7 @@ func (x *GetMemeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use GetMemeResponse.ProtoReflect.Descriptor instead. func (*GetMemeResponse) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{5} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{6} } func (x *GetMemeResponse) GetResult() *MemeDto { @@ -393,7 +453,7 @@ type DeleteMemeRequest struct { func (x *DeleteMemeRequest) Reset() { *x = DeleteMemeRequest{} - mi := &file_proto_v1_search_service_proto_msgTypes[6] + mi := &file_proto_v1_search_service_proto_msgTypes[7] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -405,7 +465,7 @@ func (x *DeleteMemeRequest) String() string { func (*DeleteMemeRequest) ProtoMessage() {} func (x *DeleteMemeRequest) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[6] + mi := &file_proto_v1_search_service_proto_msgTypes[7] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -418,7 +478,7 @@ func (x *DeleteMemeRequest) ProtoReflect() protoreflect.Message { // Deprecated: Use DeleteMemeRequest.ProtoReflect.Descriptor instead. func (*DeleteMemeRequest) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{6} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{7} } func (x *DeleteMemeRequest) GetAccountId() string { @@ -444,7 +504,7 @@ type DeleteMemeResponse struct { func (x *DeleteMemeResponse) Reset() { *x = DeleteMemeResponse{} - mi := &file_proto_v1_search_service_proto_msgTypes[7] + mi := &file_proto_v1_search_service_proto_msgTypes[8] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } @@ -456,7 +516,7 @@ func (x *DeleteMemeResponse) String() string { func (*DeleteMemeResponse) ProtoMessage() {} func (x *DeleteMemeResponse) ProtoReflect() protoreflect.Message { - mi := &file_proto_v1_search_service_proto_msgTypes[7] + mi := &file_proto_v1_search_service_proto_msgTypes[8] if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -469,7 +529,7 @@ func (x *DeleteMemeResponse) ProtoReflect() protoreflect.Message { // Deprecated: Use DeleteMemeResponse.ProtoReflect.Descriptor instead. func (*DeleteMemeResponse) Descriptor() ([]byte, []int) { - return file_proto_v1_search_service_proto_rawDescGZIP(), []int{7} + return file_proto_v1_search_service_proto_rawDescGZIP(), []int{8} } func (x *DeleteMemeResponse) GetId() string { @@ -483,7 +543,13 @@ var File_proto_v1_search_service_proto protoreflect.FileDescriptor const file_proto_v1_search_service_proto_rawDesc = "" + "\n" + - "\x1dproto/v1/search_service.proto\x12\x0fproto.memelo.v1\x1a\x15proto/v1/common.proto\"\xb3\x01\n" + + "\x1dproto/v1/search_service.proto\x12\x0fproto.memelo.v1\x1a\x15proto/v1/common.proto\"Z\n" + + "\fMediaDataDto\x12\x1c\n" + + "\as3_path\x18\x01 \x01(\tH\x00R\x06s3Path\x88\x01\x01\x12\x17\n" + + "\x04data\x18\x02 \x01(\fH\x01R\x04data\x88\x01\x01B\n" + + "\n" + + "\b_s3_pathB\a\n" + + "\x05_data\"\xb3\x01\n" + "\x11SearchMemeRequest\x12\x1d\n" + "\n" + "account_id\x18\x01 \x01(\tR\taccountId\x12\x14\n" + @@ -495,11 +561,14 @@ const file_proto_v1_search_service_proto_rawDesc = "" + "_page_size\"m\n" + "\x12SearchMemeResponse\x122\n" + "\aresults\x18\x01 \x03(\v2\x18.proto.memelo.v1.MemeDtoR\aresults\x12#\n" + - "\rsearcher_name\x18\x02 \x01(\tR\fsearcherName\"O\n" + + "\rsearcher_name\x18\x02 \x01(\tR\fsearcherName\"\xba\x01\n" + "\x11CreateMemeRequest\x12\x1d\n" + "\n" + - "account_id\x18\x01 \x01(\tR\taccountId\x12\x1b\n" + - "\traw_image\x18\x02 \x01(\fR\brawImage\"\x81\x01\n" + + "account_id\x18\x01 \x01(\tR\taccountId\x128\n" + + "\x05image\x18\x02 \x01(\v2\x1d.proto.memelo.v1.MediaDataDtoH\x00R\x05image\x88\x01\x01\x128\n" + + "\x05video\x18\x03 \x01(\v2\x1d.proto.memelo.v1.MediaDataDtoH\x01R\x05video\x88\x01\x01B\b\n" + + "\x06_imageB\b\n" + + "\x06_video\"\x81\x01\n" + "\x12CreateMemeResponse\x120\n" + "\x06result\x18\x01 \x01(\v2\x18.proto.memelo.v1.MemeDtoR\x06result\x129\n" + "\x06status\x18\x02 \x01(\x0e2!.proto.memelo.v1.CreateMemeStatusR\x06status\" \n" + @@ -542,41 +611,44 @@ func file_proto_v1_search_service_proto_rawDescGZIP() []byte { } var file_proto_v1_search_service_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_proto_v1_search_service_proto_msgTypes = make([]protoimpl.MessageInfo, 8) +var file_proto_v1_search_service_proto_msgTypes = make([]protoimpl.MessageInfo, 9) var file_proto_v1_search_service_proto_goTypes = []any{ (CreateMemeStatus)(0), // 0: proto.memelo.v1.CreateMemeStatus - (*SearchMemeRequest)(nil), // 1: proto.memelo.v1.SearchMemeRequest - (*SearchMemeResponse)(nil), // 2: proto.memelo.v1.SearchMemeResponse - (*CreateMemeRequest)(nil), // 3: proto.memelo.v1.CreateMemeRequest - (*CreateMemeResponse)(nil), // 4: proto.memelo.v1.CreateMemeResponse - (*GetMemeRequest)(nil), // 5: proto.memelo.v1.GetMemeRequest - (*GetMemeResponse)(nil), // 6: proto.memelo.v1.GetMemeResponse - (*DeleteMemeRequest)(nil), // 7: proto.memelo.v1.DeleteMemeRequest - (*DeleteMemeResponse)(nil), // 8: proto.memelo.v1.DeleteMemeResponse - (*MemeDto)(nil), // 9: proto.memelo.v1.MemeDto - (*DeleteAllRequest)(nil), // 10: proto.memelo.v1.DeleteAllRequest - (*DeleteAllResponse)(nil), // 11: proto.memelo.v1.DeleteAllResponse + (*MediaDataDto)(nil), // 1: proto.memelo.v1.MediaDataDto + (*SearchMemeRequest)(nil), // 2: proto.memelo.v1.SearchMemeRequest + (*SearchMemeResponse)(nil), // 3: proto.memelo.v1.SearchMemeResponse + (*CreateMemeRequest)(nil), // 4: proto.memelo.v1.CreateMemeRequest + (*CreateMemeResponse)(nil), // 5: proto.memelo.v1.CreateMemeResponse + (*GetMemeRequest)(nil), // 6: proto.memelo.v1.GetMemeRequest + (*GetMemeResponse)(nil), // 7: proto.memelo.v1.GetMemeResponse + (*DeleteMemeRequest)(nil), // 8: proto.memelo.v1.DeleteMemeRequest + (*DeleteMemeResponse)(nil), // 9: proto.memelo.v1.DeleteMemeResponse + (*MemeDto)(nil), // 10: proto.memelo.v1.MemeDto + (*DeleteAllRequest)(nil), // 11: proto.memelo.v1.DeleteAllRequest + (*DeleteAllResponse)(nil), // 12: proto.memelo.v1.DeleteAllResponse } var file_proto_v1_search_service_proto_depIdxs = []int32{ - 9, // 0: proto.memelo.v1.SearchMemeResponse.results:type_name -> proto.memelo.v1.MemeDto - 9, // 1: proto.memelo.v1.CreateMemeResponse.result:type_name -> proto.memelo.v1.MemeDto - 0, // 2: proto.memelo.v1.CreateMemeResponse.status:type_name -> proto.memelo.v1.CreateMemeStatus - 9, // 3: proto.memelo.v1.GetMemeResponse.result:type_name -> proto.memelo.v1.MemeDto - 1, // 4: proto.memelo.v1.SearchService.SearchMeme:input_type -> proto.memelo.v1.SearchMemeRequest - 3, // 5: proto.memelo.v1.SearchService.CreateMeme:input_type -> proto.memelo.v1.CreateMemeRequest - 5, // 6: proto.memelo.v1.SearchService.GetMeme:input_type -> proto.memelo.v1.GetMemeRequest - 7, // 7: proto.memelo.v1.SearchService.DeleteMeme:input_type -> proto.memelo.v1.DeleteMemeRequest - 10, // 8: proto.memelo.v1.SearchService.DeleteAll:input_type -> proto.memelo.v1.DeleteAllRequest - 2, // 9: proto.memelo.v1.SearchService.SearchMeme:output_type -> proto.memelo.v1.SearchMemeResponse - 4, // 10: proto.memelo.v1.SearchService.CreateMeme:output_type -> proto.memelo.v1.CreateMemeResponse - 6, // 11: proto.memelo.v1.SearchService.GetMeme:output_type -> proto.memelo.v1.GetMemeResponse - 8, // 12: proto.memelo.v1.SearchService.DeleteMeme:output_type -> proto.memelo.v1.DeleteMemeResponse - 11, // 13: proto.memelo.v1.SearchService.DeleteAll:output_type -> proto.memelo.v1.DeleteAllResponse - 9, // [9:14] is the sub-list for method output_type - 4, // [4:9] is the sub-list for method input_type - 4, // [4:4] is the sub-list for extension type_name - 4, // [4:4] is the sub-list for extension extendee - 0, // [0:4] is the sub-list for field type_name + 10, // 0: proto.memelo.v1.SearchMemeResponse.results:type_name -> proto.memelo.v1.MemeDto + 1, // 1: proto.memelo.v1.CreateMemeRequest.image:type_name -> proto.memelo.v1.MediaDataDto + 1, // 2: proto.memelo.v1.CreateMemeRequest.video:type_name -> proto.memelo.v1.MediaDataDto + 10, // 3: proto.memelo.v1.CreateMemeResponse.result:type_name -> proto.memelo.v1.MemeDto + 0, // 4: proto.memelo.v1.CreateMemeResponse.status:type_name -> proto.memelo.v1.CreateMemeStatus + 10, // 5: proto.memelo.v1.GetMemeResponse.result:type_name -> proto.memelo.v1.MemeDto + 2, // 6: proto.memelo.v1.SearchService.SearchMeme:input_type -> proto.memelo.v1.SearchMemeRequest + 4, // 7: proto.memelo.v1.SearchService.CreateMeme:input_type -> proto.memelo.v1.CreateMemeRequest + 6, // 8: proto.memelo.v1.SearchService.GetMeme:input_type -> proto.memelo.v1.GetMemeRequest + 8, // 9: proto.memelo.v1.SearchService.DeleteMeme:input_type -> proto.memelo.v1.DeleteMemeRequest + 11, // 10: proto.memelo.v1.SearchService.DeleteAll:input_type -> proto.memelo.v1.DeleteAllRequest + 3, // 11: proto.memelo.v1.SearchService.SearchMeme:output_type -> proto.memelo.v1.SearchMemeResponse + 5, // 12: proto.memelo.v1.SearchService.CreateMeme:output_type -> proto.memelo.v1.CreateMemeResponse + 7, // 13: proto.memelo.v1.SearchService.GetMeme:output_type -> proto.memelo.v1.GetMemeResponse + 9, // 14: proto.memelo.v1.SearchService.DeleteMeme:output_type -> proto.memelo.v1.DeleteMemeResponse + 12, // 15: proto.memelo.v1.SearchService.DeleteAll:output_type -> proto.memelo.v1.DeleteAllResponse + 11, // [11:16] is the sub-list for method output_type + 6, // [6:11] is the sub-list for method input_type + 6, // [6:6] is the sub-list for extension type_name + 6, // [6:6] is the sub-list for extension extendee + 0, // [0:6] is the sub-list for field type_name } func init() { file_proto_v1_search_service_proto_init() } @@ -586,13 +658,15 @@ func file_proto_v1_search_service_proto_init() { } file_proto_v1_common_proto_init() file_proto_v1_search_service_proto_msgTypes[0].OneofWrappers = []any{} + file_proto_v1_search_service_proto_msgTypes[1].OneofWrappers = []any{} + file_proto_v1_search_service_proto_msgTypes[3].OneofWrappers = []any{} type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: unsafe.Slice(unsafe.StringData(file_proto_v1_search_service_proto_rawDesc), len(file_proto_v1_search_service_proto_rawDesc)), NumEnums: 1, - NumMessages: 8, + NumMessages: 9, NumExtensions: 0, NumServices: 1, }, diff --git a/proto/v1/common.proto b/proto/v1/common.proto index 0d336b0..f7f0b0a 100644 --- a/proto/v1/common.proto +++ b/proto/v1/common.proto @@ -1,21 +1,23 @@ syntax = "proto3"; -option go_package="github.com/weoses/memelo/gen/proto/v1"; +option go_package = "github.com/weoses/memelo/gen/proto/v1"; package proto.memelo.v1; message ImageDto { string url = 1; - int32 width = 2 ; - int32 height = 3; + optional int32 image_width = 2; + optional int32 image_height = 3; } message MemeDto { string id = 1; string ocr_result = 2; ImageDto image_thumbnail = 3; - ImageDto image_original = 4; + ImageDto media_original = 4; + repeated string tags = 5; + string type = 6; } message DeleteAllRequest { diff --git a/proto/v1/export_service.proto b/proto/v1/export_service.proto index 6054167..889e651 100644 --- a/proto/v1/export_service.proto +++ b/proto/v1/export_service.proto @@ -18,13 +18,16 @@ message ExportImageDto { message ExportImageEmbedding { string model = 1; repeated float data = 2; + int32 time_start = 3; + int32 time_end = 4; + string type = 5; } message ExportResponseChunk { - string ocr_result = 1; - ExportImageEmbedding embedding = 2; - ExportImageDto original_image = 3; - ExportImageDto thumbnail_image = 4; + string ocr_result = 1; + repeated ExportImageEmbedding embedding = 2; + ExportImageDto original_image = 3; + ExportImageDto thumbnail_image = 4; } diff --git a/proto/v1/search_service.proto b/proto/v1/search_service.proto index c8b070b..4c3ff74 100644 --- a/proto/v1/search_service.proto +++ b/proto/v1/search_service.proto @@ -11,6 +11,11 @@ enum CreateMemeStatus { STATUS_DUPLICATE = 2; } +message MediaDataDto { + optional string s3_path = 1; + optional bytes data = 2; +} + message SearchMemeRequest { string account_id = 1; string query = 2; @@ -25,7 +30,8 @@ message SearchMemeResponse { message CreateMemeRequest { string account_id = 1; - bytes raw_image = 2; + optional MediaDataDto image = 2; + optional MediaDataDto video = 3; } message CreateMemeResponse { diff --git a/requests.http b/requests.http index 6a5c14f..4118cef 100644 --- a/requests.http +++ b/requests.http @@ -1,3 +1,28 @@ + +### GET index (img) +GET http://localhost:9200/melo-image-metadata +Accept: application/json +Authorization: basic ZWxhc3RpYzplbGFzdGlj + + +### Search index (img) +GET http://localhost:9200/melo-image-metadata/_search +Accept: application/json +Authorization: basic ZWxhc3RpYzplbGFzdGlj + + +### GET index (tags) +GET http://localhost:9200/melo-tags +Accept: application/json +Authorization: basic ZWxhc3RpYzplbGFzdGlj + + +### Search index (tags) +GET http://localhost:9200/melo-tags/_search +Accept: application/json +Authorization: basic ZWxhc3RpYzplbGFzdGlj + + ### Search by text query GRPC localhost:7001/proto.memelo.v1.SearchService/SearchMeme @@ -54,3 +79,10 @@ GRPC localhost:7001/proto.memelo.v1.RecomputeService/RecomputeOcrData { } + +### +GRPC localhost:7001/proto.memelo.v1.SearchService/DeleteAll + +{ +"account_id": "00000000-0000-0000-0000-000000000000" +} \ No newline at end of file diff --git a/storage-service/.env.example b/storage-service/.env.example index ce3f28a..453d6c0 100644 --- a/storage-service/.env.example +++ b/storage-service/.env.example @@ -1,13 +1,24 @@ -IMAGE_EMBEDDING_PROJECTNAME= +MEDIA_EMBEDDING_PROJECTNAME= +AUDIO_STT_PROJECTNAME= -IMAGE_STORAGE_S3_ENDPOINT=localhost:9000 -IMAGE_STORAGE_S3_ACCESSKEY=minio123 -IMAGE_STORAGE_S3_SECRETKEY=minio123 +MEDIA_STORAGE_ENDPOINT=localhost:9000 +MEDIA_STORAGE_ACCESSKEY=minio123 +MEDIA_STORAGE_SECRETKEY=minio123 +MEDIA_STORAGE_BUCKET= -METADATA_STORAGE_ELASTIC_ADDRESSES=http://localhost:9200 -METADATA_STORAGE_ELASTIC_USERNAME=elastic -METADATA_STORAGE_ELASTIC_PASSWORD=elastic +TEMP_STORAGE_ENDPOINT=localhost:9000 +TEMP_STORAGE_ACCESSKEY=minio123 +TEMP_STORAGE_SECRETKEY=minio123 +TEMP_STORAGE_BUCKET= -TAG_STORAGE_ELASTIC_ADDRESSES=http://localhost:9200 -TAG_STORAGE_ELASTIC_USERNAME=elastic -TAG_STORAGE_ELASTIC_PASSWORD=elastic \ No newline at end of file +METADATA_DB_ELASTIC_ADDRESSES=http://localhost:9200 +METADATA_DB_ELASTIC_USERNAME=elastic +METADATA_DB_ELASTIC_PASSWORD=elastic + +TAG_DB_ELASTIC_ADDRESSES=http://localhost:9200 +TAG_DB_ELASTIC_USERNAME=elastic +TAG_DB_ELASTIC_PASSWORD=elastic + +FFMPEG_BINARY= +FFMPEG_CPULIMIT= +FFMPEG_THREADSLIMIT= \ No newline at end of file diff --git a/storage-service/api/ExportGrpcApi.go b/storage-service/api/ExportGrpcApi.go index b1d536f..3f2bc09 100644 --- a/storage-service/api/ExportGrpcApi.go +++ b/storage-service/api/ExportGrpcApi.go @@ -5,7 +5,7 @@ import ( "fmt" "log/slog" - connect "connectrpc.com/connect" + "connectrpc.com/connect" "github.com/google/uuid" "github.com/weoses/memelo/common/helper" v1 "github.com/weoses/memelo/gen/proto/v1" @@ -49,10 +49,14 @@ func (e ExportServiceApi) ExportImages(ctx context.Context, request *v1.ExportRe }, } - if item.Metadata.EmbeddingV1 != nil { - chunk.Embedding = &v1.ExportImageEmbedding{ - Data: *item.Metadata.EmbeddingV1.Data, - Model: item.Metadata.EmbeddingV1.Model, + chunk.Embedding = make([]*v1.ExportImageEmbedding, len(item.Metadata.EmbeddingList)) + for i, emb := range item.Metadata.EmbeddingList { + chunk.Embedding[i] = &v1.ExportImageEmbedding{ + Data: emb.Data, + Model: emb.Model, + Type: emb.Type, + TimeStart: int32(emb.TimeStart), + TimeEnd: int32(emb.TimeEnd), } } diff --git a/storage-service/api/RecomputeGrpcApi.go b/storage-service/api/RecomputeGrpcApi.go index 7e66bdc..a858a52 100644 --- a/storage-service/api/RecomputeGrpcApi.go +++ b/storage-service/api/RecomputeGrpcApi.go @@ -5,7 +5,7 @@ import ( "fmt" "log/slog" - connect "connectrpc.com/connect" + "connectrpc.com/connect" "github.com/weoses/memelo/common/helper" v1 "github.com/weoses/memelo/gen/proto/v1" "github.com/weoses/memelo/gen/proto/v1/v1connect" diff --git a/storage-service/api/SearchGrpcApi.go b/storage-service/api/SearchGrpcApi.go index 90d4f5d..9b001f4 100644 --- a/storage-service/api/SearchGrpcApi.go +++ b/storage-service/api/SearchGrpcApi.go @@ -9,15 +9,20 @@ import ( "connectrpc.com/connect" "github.com/google/uuid" "github.com/weoses/memelo/common/helper" + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/common/temp" + v1 "github.com/weoses/memelo/gen/proto/v1" "github.com/weoses/memelo/gen/proto/v1/v1connect" + "github.com/weoses/memelo/storage-service/entity" "github.com/weoses/memelo/storage-service/key" "github.com/weoses/memelo/storage-service/service" ) type SearchServiceApi struct { - crud service.MemeCrudService - slogger *slog.Logger + crud service.MemeCrudService + dataService commonservice.TmpDataService + slogger *slog.Logger } func (api *SearchServiceApi) DeleteAll(ctx context.Context, request *v1.DeleteAllRequest) (*v1.DeleteAllResponse, error) { @@ -69,19 +74,21 @@ func (api *SearchServiceApi) metadataToMemeDto(urls *service.MetadataWithUrls) * dto := &v1.MemeDto{ Id: urls.Metadata.ImageId.String(), OcrResult: urls.Metadata.Result, - ImageOriginal: &v1.ImageDto{ + MediaOriginal: &v1.ImageDto{ Url: urls.UrlOriginal, }, ImageThumbnail: &v1.ImageDto{ - Url: urls.UrlThumb, - Width: int32(urls.Metadata.ThumbSize.Width), - Height: int32(urls.Metadata.ThumbSize.Height), + Url: urls.UrlThumb, + ImageWidth: helper.Addr(int32(urls.Metadata.ThumbSize.Width)), + ImageHeight: helper.Addr(int32(urls.Metadata.ThumbSize.Height)), }, + Tags: urls.Metadata.Tags, + Type: string(urls.Metadata.Type), } if urls.Metadata.ImageSize != nil { - dto.ImageOriginal.Width = int32(urls.Metadata.ImageSize.Width) - dto.ImageOriginal.Height = int32(urls.Metadata.ImageSize.Height) + dto.MediaOriginal.ImageWidth = helper.Addr(int32(urls.Metadata.ImageSize.Width)) + dto.MediaOriginal.ImageHeight = helper.Addr(int32(urls.Metadata.ImageSize.Height)) } return dto @@ -140,8 +147,28 @@ func (api *SearchServiceApi) CreateMeme(ctx context.Context, req *v1.CreateMemeR return nil, fmt.Errorf("error parsing AccountId: %w", err) } - meme, err := api.crud.CreateMeme(ctx, accountIdUuid, req.RawImage) - if err != nil { + var meme *service.CreateResult + var data temp.S3BackedData + var metadataType entity.MetadataType + if req.GetImage() != nil { + metadataType = entity.ImageMetadataType + data, err = api.toData(ctx, req.GetImage()) + if err != nil { + return nil, fmt.Errorf("error reading image: %w", err) + } + } else if req.GetVideo() != nil { + metadataType = entity.VideoMetadataType + data, err = api.toData(ctx, req.GetVideo()) + if err != nil { + return nil, fmt.Errorf("error reading video: %w", err) + } + } + + defer helper.QuietClose(data, api.slogger) + meme, err = api.crud.CreateMeme(ctx, accountIdUuid, metadataType, data) + + defer helper.QuietClose(data, api.slogger) + if err != nil || meme == nil { api.slogger.ErrorContext(ctx, "CreateMeme error", "err", err) return nil, err } @@ -160,9 +187,31 @@ func (api *SearchServiceApi) GetMeme(context.Context, *v1.GetMemeRequest) (*v1.G return nil, connect.NewError(connect.CodeUnimplemented, errors.New("proto.memelo.v1.SearchService.GetMeme is not implemented")) } -func NewSearchServiceApi(crud service.MemeCrudService) v1connect.SearchServiceHandler { +func (api *SearchServiceApi) toData(ctx context.Context, media *v1.MediaDataDto) (temp.S3BackedData, error) { + if media.GetS3Path() != "" { + result, err := api.dataService.WrapS3Path(ctx, media.GetS3Path()) + if err != nil { + return nil, fmt.Errorf("failed to create backed temp by s3 path: %w", err) + } + return result, nil + } + + if media.GetData() != nil { + data, err := api.dataService.ByBytes(ctx, media.GetData()) + if err != nil { + return nil, fmt.Errorf("failed to get data from bytes: %w", err) + } + return data, nil + } + + return nil, errors.New("media temp is empty") +} + +func NewSearchServiceApi(crud service.MemeCrudService, dataService commonservice.TmpDataService) v1connect.SearchServiceHandler { return &SearchServiceApi{ - crud: crud, + crud: crud, + dataService: dataService, + slogger: slog.With("service", "SearchServiceApi"), } } diff --git a/storage-service/conf/config.go b/storage-service/conf/config.go index d34f345..e419c01 100644 --- a/storage-service/conf/config.go +++ b/storage-service/conf/config.go @@ -1,33 +1,34 @@ package conf import ( + "fmt" + "time" + elasticsearch8 "github.com/elastic/go-elasticsearch/v8" "github.com/spf13/viper" + commonconfig "github.com/weoses/memelo/common/config" ) -type MetadataStorageConfig struct { - Elastic *elasticsearch8.Config - Index string - EmbeddingV1Dimensions int - EmbeddingMatchTreshold float64 +type CommonEmbeddingsConfig struct { + Dimensions int + VideoEmbeddingIntervalSec int } -type ElasticTagConfig struct { - Elastic *elasticsearch8.Config - Index string - EmbeddingV1Dimensions int +type SearchConfig struct { + SemanticDuplicateThreshold float64 + SemanticTextSearchThreshold float64 + Fuzziness string } -type ImageS3StorageConfig struct { - Endpoint string - AccessKey string - SecretKey string - Bucket string - Secure bool +type MetadataDbConfig struct { + Elastic *elasticsearch8.Config + Index string + EmbeddingMatchTreshold float64 } -type ImageStorageConfig struct { - S3 *ImageS3StorageConfig +type TagDbConfig struct { + Elastic *elasticsearch8.Config + Index string } type ImageConverterConfig struct { @@ -35,11 +36,10 @@ type ImageConverterConfig struct { ThumbSize int } -type ImageEmbeddingConfig struct { +type MediaEmbedderConfig struct { ApiEndpoint string ApiLocation string ProjectName string - Dimension int Model string } @@ -47,38 +47,42 @@ type ImageOcrConfig struct { ApiEndpoint string } -func NewImageConverterConfig() (*ImageConverterConfig, error) { - conf := new(ImageConverterConfig) - err := viper.UnmarshalKey("image-converter", conf) - return conf, err -} - -func NewImageEmbeddingConfig() (*ImageEmbeddingConfig, error) { - conf := new(ImageEmbeddingConfig) - err := viper.UnmarshalKey("image-embedding", conf) - return conf, err -} - -func NewMetadataStorageConfig() (*MetadataStorageConfig, error) { - conf := &MetadataStorageConfig{} - err := viper.UnmarshalKey("metadata-storage", conf) - return conf, err +type AudioSttConfig struct { + ApiEndpoint string + ApiLocation string + ProjectName string + Recognizer string + Model string + LanguageCodes []string } -func NewImageStorageConfig() (*ImageStorageConfig, error) { - conf := &ImageStorageConfig{} - err := viper.UnmarshalKey("image-storage", conf) - return conf, err +type FfmpegConfig struct { + Binary string + CpuLimit int + ThreadsLimit int } -func NewImageOcrConfig() (*ImageOcrConfig, error) { - conf := &ImageOcrConfig{} - err := viper.UnmarshalKey("image-ocr", conf) - return conf, err +type Config struct { + Server *commonconfig.ServerConfig `mapstructure:"server"` + Log *commonconfig.LoggingConfig `mapstructure:"log"` + Search *SearchConfig `mapstructure:"search"` + Embeddings *CommonEmbeddingsConfig `mapstructure:"embeddings"` + MediaStorage *commonconfig.MediaStorageConfig `mapstructure:"media-storage"` + TempStorage *commonconfig.MediaStorageConfig `mapstructure:"temp-storage"` + TempStorageExpiry time.Duration `mapstructure:"temp-storage-expiry"` + MetadataDb *MetadataDbConfig `mapstructure:"metadata-db"` + TagDb *TagDbConfig `mapstructure:"tag-db"` + MediaEmbedding *MediaEmbedderConfig `mapstructure:"media-embedding"` + ImageConverter *ImageConverterConfig `mapstructure:"image-converter"` + ImageOcr *ImageOcrConfig `mapstructure:"image-ocr"` + AudioStt *AudioSttConfig `mapstructure:"audio-stt"` + Ffmpeg *FfmpegConfig `mapstructure:"ffmpeg"` } -func NewElasticTagConfig() (*ElasticTagConfig, error) { - conf := &ElasticTagConfig{} - err := viper.UnmarshalKey("tag-storage", conf) - return conf, err +func NewConfig() (*Config, error) { + cfg := &Config{} + if err := viper.Unmarshal(cfg); err != nil { + return nil, fmt.Errorf("error reading config: %w", err) + } + return cfg, nil } diff --git a/storage-service/config.yaml b/storage-service/config.yaml index 0637037..7a8fbe7 100644 --- a/storage-service/config.yaml +++ b/storage-service/config.yaml @@ -4,40 +4,73 @@ log: server: ListenAddress: :7001 -image-storage: - S3: - Endpoint: - AccessKey: - SecretKey: - Bucket: melo-images - Secure: false - -metadata-storage: +embeddings: + Dimensions: 1408 + VideoEmbeddingIntervalSec: 10 + +search: + SemanticDuplicateThreshold: 0.955 + SemanticTextSearchThreshold: 0.5 + Fuzziness: "AUTO:4,8" + + +media-storage: + Endpoint: + AccessKey: + SecretKey: + Bucket: melo-images + Secure: false + +temp-storage: + Endpoint: + AccessKey: + SecretKey: + Bucket: melo-temp + Secure: false + +temp-storage-expiry: 24h + +metadata-db: Elastic: Addresses: - Username: - Password: - Index: melo-image-metadata - EmbeddingV1Dimensions: 1408 - EmbeddingMatchTreshold: 0.955 + Username: + Password: + CloudID: + ApiKey: + Index: melo-media-metadata -tag-storage: +tag-db: Elastic: Addresses: Username: Password: + CloudID: + ApiKey: Index: melo-tags - EmbeddingV1Dimensions: 1408 -image-embedding: +media-embedding: ApiEndpoint: us-central1-aiplatform.googleapis.com:443 ApiLocation: us-central1 ProjectName: Model: multimodalembedding@001 - Dimension: 1408 + +audio-stt: + ApiEndpoint: us-speech.googleapis.com:443 + ApiLocation: us + ProjectName: + Recognizer: "_" + Model: chirp_3 + LanguageCodes: + - 'en-US' + - 'ru-RU' image-converter: ThumbSize: 360 image-ocr: - ApiEndpoint: vision.googleapis.com:443 \ No newline at end of file + ApiEndpoint: vision.googleapis.com:443 + +ffmpeg: + Binary: ffmpeg + CpuLimit: 0 + ThreadsLimit: 0 \ No newline at end of file diff --git a/storage-service/entity/ElasticImageMetaData.go b/storage-service/entity/ElasticImageMetaData.go deleted file mode 100644 index 9574d7f..0000000 --- a/storage-service/entity/ElasticImageMetaData.go +++ /dev/null @@ -1,28 +0,0 @@ -package entity - -import "github.com/google/uuid" - -type ElasticSizes struct { - Width int `validator:"required"` - Height int `validator:"required"` -} - -type ElasticImageMetaData struct { - ImageId uuid.UUID `validator:"required"` - S3Id uuid.UUID `validator:"required"` - AccountId uuid.UUID `validator:"required"` - Hash string - HashV2 []string - Result string - ThumbSize *ElasticSizes `validator:"required"` - ImageSize *ElasticSizes - Created int64 `validator:"required"` - Updated int64 - EmbeddingV1 *ElasticEmbeddingV1 `validator:"required"` - Tags []string -} - -type ElasticEmbeddingV1 struct { - Data *[]float32 `validator:"required"` - Model string `validator:"required"` -} diff --git a/storage-service/entity/ElasticMetaData.go b/storage-service/entity/ElasticMetaData.go new file mode 100644 index 0000000..1a5c95e --- /dev/null +++ b/storage-service/entity/ElasticMetaData.go @@ -0,0 +1,26 @@ +package entity + +import "github.com/google/uuid" + +type MetadataType string + +const ImageMetadataType MetadataType = "image" +const VideoMetadataType MetadataType = "video" + +type ElasticImageMetaData struct { + ImageId uuid.UUID `validator:"required"` + S3Id uuid.UUID `validator:"required"` + AccountId uuid.UUID `validator:"required"` + Hash string + Result string + ThumbSize *Sizes `validator:"required"` + ImageSize *Sizes + Created int64 `validator:"required"` + Updated int64 + + EmbeddingList []EmbeddingItem + + Tags []string + + Type MetadataType `validator:"required"` +} diff --git a/storage-service/entity/ElasticTag.go b/storage-service/entity/ElasticTag.go index 0da234d..499bcb9 100644 --- a/storage-service/entity/ElasticTag.go +++ b/storage-service/entity/ElasticTag.go @@ -7,7 +7,8 @@ type ElasticTag struct { AccountId uuid.UUID Tag string Description string - EmbeddingV1 *ElasticEmbeddingV1 - Created int64 - Updated int64 + + Embedding *EmbeddingItem + Created int64 + Updated int64 } diff --git a/storage-service/entity/Nested.go b/storage-service/entity/Nested.go new file mode 100644 index 0000000..2bad885 --- /dev/null +++ b/storage-service/entity/Nested.go @@ -0,0 +1,21 @@ +package entity + +const ( + EmbeddingTypeAudio = "audio" + EmbeddingTypeVideo = "video" + EmbeddingTypeImage = "image" + EmbeddingTypeText = "text" +) + +type EmbeddingItem struct { + Data []float32 `validator:"required"` + Model string `validator:"required"` + TimeStart int + TimeEnd int + Type string `validator:"required"` +} + +type Sizes struct { + Width int `validator:"required"` + Height int `validator:"required"` +} diff --git a/storage-service/go.mod b/storage-service/go.mod index 279658c..7a8cffa 100644 --- a/storage-service/go.mod +++ b/storage-service/go.mod @@ -1,11 +1,13 @@ module github.com/weoses/memelo/storage-service -go 1.24.10 +go 1.25.0 require ( cloud.google.com/go/aiplatform v1.117.0 + cloud.google.com/go/speech v1.30.0 cloud.google.com/go/vision v1.2.0 connectrpc.com/connect v1.19.1 + github.com/agnivade/levenshtein v1.2.1 github.com/google/uuid v1.6.0 github.com/h2non/bimg v1.1.9 github.com/pkg/errors v0.9.1 diff --git a/storage-service/go.sum b/storage-service/go.sum index 29d2c59..cbc3e6c 100644 --- a/storage-service/go.sum +++ b/storage-service/go.sum @@ -55,6 +55,8 @@ cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2k cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= +cloud.google.com/go/speech v1.30.0 h1:R+KGIbRMrj8jA4U6Qea8hqCMsAEdg576ShNsmRr4gcQ= +cloud.google.com/go/speech v1.30.0/go.mod h1:F2+NJujR8uzDLd6bwy5kgtVycxvEq06nzvzz5eQ/gMo= cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= @@ -70,7 +72,11 @@ dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/agnivade/levenshtein v1.2.1 h1:EHBY3UOn1gwdy/VbFwgo4cxecRznFk7fKWN1KOX7eoM= +github.com/agnivade/levenshtein v1.2.1/go.mod h1:QVVI16kDrtSuwcpd0p1+xMC6Z/VfhtCyDIjcwga4/DU= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= +github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q= +github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -94,6 +100,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54 h1:SG7nF6SRlWhcT7cNTs5R6Hk4V2lcmLz2NsG2VnInyNo= +github.com/dgryski/trifles v0.0.0-20230903005119-f50d829f2e54/go.mod h1:if7Fbed8SFyPtHLHbg49SI7NAdJiC5WIA09pe59rfAA= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/elastic/elastic-transport-go/v8 v8.6.0 h1:Y2S/FBjx1LlCv5m6pWAF2kDJAHoSjSRSJCApolgfthA= diff --git a/storage-service/main.go b/storage-service/main.go index aac70af..37685ed 100644 --- a/storage-service/main.go +++ b/storage-service/main.go @@ -6,6 +6,7 @@ import ( "log/slog" "net" "net/http" + "time" "github.com/go-playground/validator/v10" "github.com/weoses/memelo/common/config" @@ -13,6 +14,7 @@ import ( "github.com/weoses/memelo/storage-service/api" "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/ocr" + "github.com/weoses/memelo/storage-service/ocr/ffmpeg" "github.com/weoses/memelo/storage-service/ocr/gapi" "github.com/weoses/memelo/storage-service/service" storage2 "github.com/weoses/memelo/storage-service/storage" @@ -24,37 +26,55 @@ import ( func main() { config.InitConfig() - loggingConfig, err := config.NewLoggingConfig() + cfg, err := conf.NewConfig() if err != nil { log.Fatal(err) } - config.InitLogs(loggingConfig) + config.InitLogs(cfg.Log) fx.New( fx.WithLogger(func() fxevent.Logger { return &fxevent.SlogLogger{Logger: slog.With()} }), fx.Provide(NewValidator), - fx.Provide(config.NewServerConfig), - - fx.Provide(conf.NewImageEmbeddingConfig), - fx.Provide(conf.NewImageConverterConfig), - fx.Provide(conf.NewImageStorageConfig), - fx.Provide(conf.NewMetadataStorageConfig), - fx.Provide(conf.NewImageOcrConfig), - fx.Provide(conf.NewElasticTagConfig), + fx.Supply(cfg), + fx.Provide(func(c *conf.Config) *conf.FfmpegConfig { return c.Ffmpeg }), fx.Provide(gapi.NewOcrProcessor), fx.Provide(ocr.NewImageConverter), fx.Provide(gapi.NewImageEmbeddingExtractor), + fx.Provide(gapi.NewAudio2TextExtractor), + fx.Provide(ffmpeg.NewVideo2Mp4Converter), + fx.Provide(ffmpeg.NewVideo2FrameExtractor), + fx.Provide(ffmpeg.NewVideo2AudioExtractor), fx.Provide(storage2.NewElasticTagStorage), + fx.Provide( + fx.Annotate( + func(s storage2.ElasticTagStorage) storage2.ElasticMigrating { + return s.(storage2.ElasticMigrating) + }, + fx.ResultTags(`group:"migrators"`), + ), + ), fx.Provide(service.NewTagMetadataExtractService), fx.Provide(service.NewTagService), fx.Provide(api.NewTagsGrpcApi), fx.Provide(storage2.NewMetadataStorageService), - fx.Provide(storage2.NewImageStorageService), + fx.Provide( + fx.Annotate( + func(s storage2.MetadataStorageService) storage2.ElasticMigrating { + return s.(storage2.ElasticMigrating) + }, + fx.ResultTags(`group:"migrators"`), + ), + ), + + fx.Provide(storage2.NewMediaStorageServiceS3Adapter), + fx.Provide(storage2.NewMediaStorageService), + fx.Provide(storage2.NewTmpDataServiceS3Adapter), + fx.Provide(storage2.NewTmpDataService), fx.Provide(service.NewExportService), fx.Provide(service.NewMemeCrudService), @@ -70,19 +90,19 @@ func main() { ), fx.Provide( fx.Annotate( - service.NewCheckDuplicateByHashPipelineStep, + service.NewImageCheckDuplicateByHashPipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), fx.Provide( fx.Annotate( - service.NewToJpegPipelineStep, + service.NewImageToJpegPipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), fx.Provide( fx.Annotate( - service.NewCalcEmbeddingPipelineStep, + service.NewImageCalcEmbeddingPipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), @@ -94,19 +114,19 @@ func main() { ), fx.Provide( fx.Annotate( - service.NewOcrImagePipelineStep, + service.NewImageOcrImagePipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), fx.Provide( fx.Annotate( - service.NewCreateThumbnailPipelineStep, + service.NewImageCreateThumbnailPipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), fx.Provide( fx.Annotate( - service.NewCalcSizesPipelineStep, + service.NewImageCalcSizesPipelineStep, fx.ResultTags(`group:"pipeline_steps"`), ), ), @@ -116,6 +136,48 @@ func main() { fx.ResultTags(`group:"pipeline_steps"`), ), ), + fx.Provide( + fx.Annotate( + service.NewVidToMp4PipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidExtractFramesPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidExtractAudioPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidCalcEmbeddingsPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidSttPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidOcrFramesPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), + fx.Provide( + fx.Annotate( + service.NewVidCreateThumbnailPipelineStep, + fx.ResultTags(`group:"pipeline_steps"`), + ), + ), fx.Provide( fx.Annotate( service.NewImageMetadataExtractService, @@ -163,6 +225,12 @@ func main() { fx.Provide(api.NewSearchServiceApi), fx.Provide(api.NewExportServiceApi), fx.Provide(NewHealthCheck), + fx.Invoke( + fx.Annotate( + storage2.RunMigrations, + fx.ParamTags(`group:"migrators"`), + ), + ), fx.Invoke(Startup), ).Run() } @@ -174,7 +242,7 @@ func Startup( tagsApi v1connect.TagsServiceHandler, recomputeApi v1connect.RecomputeServiceHandler, check *HealthCheck, - cfg *config.ServerConfig, + cfg *conf.Config, ) { mux := http.NewServeMux() pathSearch, handlerSearch := v1connect.NewSearchServiceHandler(searchApi) @@ -189,8 +257,9 @@ func Startup( mux.Handle("/health", check) srv := &http.Server{ - Addr: cfg.ListenAddress, - Handler: h2c.NewHandler(mux, &http2.Server{}), + Addr: cfg.Server.ListenAddress, + Handler: h2c.NewHandler(mux, &http2.Server{}), + WriteTimeout: time.Second * 300, } lc.Append(fx.Hook{ diff --git a/storage-service/ocr/Audio2TextExtractor.go b/storage-service/ocr/Audio2TextExtractor.go new file mode 100644 index 0000000..065c107 --- /dev/null +++ b/storage-service/ocr/Audio2TextExtractor.go @@ -0,0 +1,11 @@ +package ocr + +import ( + "context" + + "github.com/weoses/memelo/common/temp" +) + +type Audio2TextExtractor interface { + Transcript(ctx context.Context, audio temp.Data) (string, error) +} diff --git a/storage-service/ocr/EmbeddingExtractor.go b/storage-service/ocr/EmbeddingExtractor.go index c5df89b..7ecca32 100644 --- a/storage-service/ocr/EmbeddingExtractor.go +++ b/storage-service/ocr/EmbeddingExtractor.go @@ -3,10 +3,12 @@ package ocr import ( "context" + "github.com/weoses/memelo/common/temp" "github.com/weoses/memelo/storage-service/entity" ) type EmbeddingExtractor interface { - GetImageEmbeddingV1(ctx context.Context, image []byte) (*entity.ElasticEmbeddingV1, error) - GetTextEmbeddingV1(ctx context.Context, text string) (*entity.ElasticEmbeddingV1, error) + GetImageEmbedding(ctx context.Context, image temp.Data) (*entity.EmbeddingItem, error) + GetTextEmbedding(ctx context.Context, text string) (*entity.EmbeddingItem, error) + GetVideoEmbedding(ctx context.Context, video temp.Data) ([]*entity.EmbeddingItem, error) } diff --git a/storage-service/ocr/Image2TextExtractor.go b/storage-service/ocr/Image2TextExtractor.go new file mode 100644 index 0000000..8eba25f --- /dev/null +++ b/storage-service/ocr/Image2TextExtractor.go @@ -0,0 +1,12 @@ +package ocr + +import ( + "context" + + "github.com/weoses/memelo/common/temp" +) + +type Image2TextExtractor interface { + GetName() string + DoOcr(ctx context.Context, image temp.Data) (string, error) +} diff --git a/storage-service/ocr/ImageConverter.go b/storage-service/ocr/ImageConverter.go index d2ce5bc..7baf185 100644 --- a/storage-service/ocr/ImageConverter.go +++ b/storage-service/ocr/ImageConverter.go @@ -6,21 +6,28 @@ import ( "github.com/h2non/bimg" "github.com/pkg/errors" + + "github.com/weoses/memelo/common/temp" "github.com/weoses/memelo/storage-service/conf" ) type ImageConveter interface { - ProcessOriginalImage(ctx context.Context, rawImage []byte) ([]byte, error) - MakeThumbnail(ctx context.Context, rawImage []byte) ([]byte, error) - GetSize(ctx context.Context, rawImage []byte) (int, int, error) + Convert2Jpeg(ctx context.Context, rawImage temp.Data) (temp.Data, error) + MakeThumbnail(ctx context.Context, rawImage temp.Data) (temp.Data, error) + GetSize(ctx context.Context, rawImage temp.Data) (int, int, error) } type ImageConveterImpl struct { config *conf.ImageConverterConfig } -func (i *ImageConveterImpl) GetSize(ctx context.Context, rawImage []byte) (int, int, error) { - img := bimg.NewImage(rawImage) +func (i *ImageConveterImpl) GetSize(ctx context.Context, rawImage temp.Data) (int, int, error) { + imgData, err := rawImage.ReadAll() + if err != nil { + return 0, 0, fmt.Errorf("error reading image bytes: %w", err) + } + + img := bimg.NewImage(imgData) size, err := img.Size() if err != nil { return 0, 0, fmt.Errorf("error getting size of image: %w", err) @@ -28,18 +35,27 @@ func (i *ImageConveterImpl) GetSize(ctx context.Context, rawImage []byte) (int, return size.Width, size.Height, nil } -func (i *ImageConveterImpl) ProcessOriginalImage(ctx context.Context, rawImage []byte) ([]byte, error) { - img := bimg.NewImage(rawImage) +func (i *ImageConveterImpl) Convert2Jpeg(ctx context.Context, rawImage temp.Data) (temp.Data, error) { + imgData, err := rawImage.ReadAll() + if err != nil { + return nil, fmt.Errorf("error reading image bytes: %w", err) + } + + img := bimg.NewImage(imgData) bytesData, err := img.Convert(bimg.JPEG) if err != nil { return nil, errors.Wrap(err, "Image Convert() to JPEG failed") } - return bytesData, nil + return temp.DataBytes(bytesData), nil } -func (i *ImageConveterImpl) MakeThumbnail(ctx context.Context, rawImage []byte) ([]byte, error) { - img := bimg.NewImage(rawImage) +func (i *ImageConveterImpl) MakeThumbnail(ctx context.Context, rawImage temp.Data) (temp.Data, error) { + imgData, err := rawImage.ReadAll() + if err != nil { + return nil, fmt.Errorf("error reading image bytes: %w", err) + } + img := bimg.NewImage(imgData) size, err := img.Size() if err != nil { return nil, fmt.Errorf("error getting size of image: %w", err) @@ -53,11 +69,11 @@ func (i *ImageConveterImpl) MakeThumbnail(ctx context.Context, rawImage []byte) return nil, errors.Wrap(err, "Image Resize() failed") } - return bytesData, nil + return temp.DataBytes(bytesData), nil } -func NewImageConverter(config *conf.ImageConverterConfig) (ImageConveter, error) { +func NewImageConverter(cfg *conf.Config) (ImageConveter, error) { return &ImageConveterImpl{ - config: config, + config: cfg.ImageConverter, }, nil } diff --git a/storage-service/ocr/TextExtractor.go b/storage-service/ocr/TextExtractor.go deleted file mode 100644 index 5a50b83..0000000 --- a/storage-service/ocr/TextExtractor.go +++ /dev/null @@ -1,8 +0,0 @@ -package ocr - -import "context" - -type TextExtractor interface { - GetName() string - DoOcr(ctx context.Context, image []byte) (string, error) -} diff --git a/storage-service/ocr/Video2AudioExtractor.go b/storage-service/ocr/Video2AudioExtractor.go new file mode 100644 index 0000000..86d2b30 --- /dev/null +++ b/storage-service/ocr/Video2AudioExtractor.go @@ -0,0 +1,11 @@ +package ocr + +import ( + "context" + + "github.com/weoses/memelo/common/temp" +) + +type Video2AudioExtractor interface { + ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error) +} diff --git a/storage-service/ocr/Video2FrameExtractor.go b/storage-service/ocr/Video2FrameExtractor.go new file mode 100644 index 0000000..a3c98f2 --- /dev/null +++ b/storage-service/ocr/Video2FrameExtractor.go @@ -0,0 +1,11 @@ +package ocr + +import ( + "context" + + "github.com/weoses/memelo/common/temp" +) + +type Video2FrameExtractor interface { + ExtractFrames(ctx context.Context, video temp.Data) ([]temp.Data, error) +} diff --git a/storage-service/ocr/Video2Mp4Converter.go b/storage-service/ocr/Video2Mp4Converter.go new file mode 100644 index 0000000..c14f0dc --- /dev/null +++ b/storage-service/ocr/Video2Mp4Converter.go @@ -0,0 +1,11 @@ +package ocr + +import ( + "context" + + "github.com/weoses/memelo/common/temp" +) + +type Video2Mp4Converter interface { + ConvertToMp4(ctx context.Context, video temp.Data) (temp.Data, error) +} diff --git a/storage-service/ocr/ffmpeg/FfmpegVideo2AudioExtractor.go b/storage-service/ocr/ffmpeg/FfmpegVideo2AudioExtractor.go new file mode 100644 index 0000000..df0317b --- /dev/null +++ b/storage-service/ocr/ffmpeg/FfmpegVideo2AudioExtractor.go @@ -0,0 +1,93 @@ +package ffmpeg + +import ( + "context" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" + "github.com/weoses/memelo/storage-service/ocr" +) + +type Video2AudioExtractorImpl struct { + slogger *slog.Logger + cfg *conf.FfmpegConfig +} + +var _ ocr.Video2AudioExtractor = (*Video2AudioExtractorImpl)(nil) + +func (f *Video2AudioExtractorImpl) ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error) { + f.slogger.InfoContext(ctx, "ExtractAudio: start") + + dir, err := os.MkdirTemp("", "video2audio-*") + if err != nil { + return nil, fmt.Errorf("ExtractAudio: create temp dir: %w", err) + } + defer func(path string) { + errRemoveAll := os.RemoveAll(path) + if errRemoveAll != nil { + f.slogger.WarnContext(ctx, "ExtractAudio: remove temp dir failed: ", "error", errRemoveAll) + } + }(dir) + + ffmpegInputPath := filepath.Join(dir, "input.mp4") + ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return nil, fmt.Errorf("ExtractAudio: create input file: %w", err) + } + + videoInputReader, err := video.Reader() + if err != nil { + helper.QuietClose(ffmpegInputFile, f.slogger) + return nil, fmt.Errorf("ExtractAudio: get videoInputReader: %w", err) + } + + _, err = io.Copy(ffmpegInputFile, videoInputReader) + helper.QuietClose(ffmpegInputFile, f.slogger) + helper.QuietClose(videoInputReader, f.slogger) + if err != nil { + return nil, fmt.Errorf("ExtractAudio: write input: %w", err) + } + + outputPath := filepath.Join(dir, "audio.wav") + cmd := buildCmd(ctx, f.cfg, + "-i", ffmpegInputPath, + "-vn", + "-acodec", "pcm_s16le", + "-ar", "44100", + "-ac", "1", + outputPath, + ) + f.slogger.InfoContext(ctx, "ExtractAudio: running ffmpeg", "cmd", cmd.String()) + out, err := cmd.CombinedOutput() + f.slogger.DebugContext(ctx, "ExtractAudio: ffmpeg output", "output", string(out)) + if err != nil { + return nil, fmt.Errorf("ExtractAudio: ffmpeg failed: %w\n%s", err, out) + } + + outputFile, err := os.Open(outputPath) + if err != nil { + return nil, fmt.Errorf("ExtractAudio: open output: %w", err) + } + defer helper.QuietClose(outputFile, f.slogger) + + data, err := temp.DataTemp(outputFile) + if err != nil { + return nil, fmt.Errorf("ExtractAudio: read output: %w", err) + } + + f.slogger.InfoContext(ctx, "ExtractAudio: done") + return data, nil +} + +func NewVideo2AudioExtractor(cfg *conf.FfmpegConfig) ocr.Video2AudioExtractor { + return &Video2AudioExtractorImpl{ + slogger: slog.With("service", "Video2AudioExtractor"), + cfg: cfg, + } +} diff --git a/storage-service/ocr/ffmpeg/FfmpegVideo2FrameExtractor.go b/storage-service/ocr/ffmpeg/FfmpegVideo2FrameExtractor.go new file mode 100644 index 0000000..3f57df6 --- /dev/null +++ b/storage-service/ocr/ffmpeg/FfmpegVideo2FrameExtractor.go @@ -0,0 +1,114 @@ +package ffmpeg + +import ( + "context" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" + "github.com/weoses/memelo/storage-service/ocr" +) + +type Video2FrameExtractorImpl struct { + slogger *slog.Logger + cfg *conf.FfmpegConfig +} + +var _ ocr.Video2FrameExtractor = (*Video2FrameExtractorImpl)(nil) + +func (f *Video2FrameExtractorImpl) ExtractFrames(ctx context.Context, video temp.Data) ([]temp.Data, error) { + f.slogger.InfoContext(ctx, "ExtractFrames: start") + + dir, err := os.MkdirTemp("", "video2frames-*") + if err != nil { + return nil, fmt.Errorf("ExtractFrames: create temp dir: %w", err) + } + defer func(path string) { + errRemoveAll := os.RemoveAll(path) + if errRemoveAll != nil { + f.slogger.WarnContext(ctx, "ExtractFrames: remove temp dir failed: ", "error", errRemoveAll) + } + }(dir) + + ffmpegInputPath := filepath.Join(dir, "input.mp4") + ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return nil, fmt.Errorf("ExtractFrames: create input file: %w", err) + } + + videoInputReader, err := video.Reader() + if err != nil { + helper.QuietClose(ffmpegInputFile, f.slogger) + return nil, fmt.Errorf("ExtractFrames: get videoInputReader: %w", err) + } + + _, err = io.Copy(ffmpegInputFile, videoInputReader) + helper.QuietClose(videoInputReader, f.slogger) + helper.QuietClose(ffmpegInputFile, f.slogger) + if err != nil { + return nil, fmt.Errorf("ExtractFrames: write input: %w", err) + } + + outputPattern := filepath.Join(dir, "frame%06d.jpg") + cmd := buildCmd(ctx, f.cfg, + "-i", ffmpegInputPath, + "-vf", "fps=1", + "-f", "image2", + outputPattern, + ) + f.slogger.InfoContext(ctx, "ExtractFrames: running ffmpeg", "cmd", cmd.String()) + out, err := cmd.CombinedOutput() + if out != nil { + f.slogger.DebugContext(ctx, "ExtractFrames: ffmpeg output", "output", string(out)) + } + + if err != nil { + return nil, fmt.Errorf("ExtractFrames: ffmpeg failed: %w\n%s", err, out) + } + + matches, err := filepath.Glob(filepath.Join(dir, "frame*.jpg")) + if err != nil { + return nil, fmt.Errorf("ExtractFrames: glob frames: %w", err) + } + f.slogger.InfoContext(ctx, "ExtractFrames: frames produced", "count", len(matches), "files", matches) + + frames := make([]temp.Data, len(matches)) + for i, path := range matches { + frameData, err := f.processExtractedFrame(path) + if err != nil { + helper.QuietCloseAll(frames, f.slogger) + return nil, err + } + frames[i] = frameData + } + + f.slogger.InfoContext(ctx, "ExtractFrames: done", "frames", len(frames)) + return frames, nil +} + +func (f *Video2FrameExtractorImpl) processExtractedFrame(path string) (temp.Data, error) { + frameFile, err := os.Open(path) + defer helper.QuietClose(frameFile, f.slogger) + + if err != nil { + return nil, fmt.Errorf("ExtractFrames: open frame %s: %w", path, err) + } + data, err := temp.DataTemp(frameFile) + + if err != nil { + return nil, fmt.Errorf("ExtractFrames: read frame %s: %w", path, err) + } + return data, nil +} + +func NewVideo2FrameExtractor(cfg *conf.FfmpegConfig) ocr.Video2FrameExtractor { + return &Video2FrameExtractorImpl{ + slogger: slog.With("service", "Video2FrameExtractor"), + cfg: cfg, + } +} diff --git a/storage-service/ocr/ffmpeg/FfmpegVideo2Mp4Converter.go b/storage-service/ocr/ffmpeg/FfmpegVideo2Mp4Converter.go new file mode 100644 index 0000000..e650e27 --- /dev/null +++ b/storage-service/ocr/ffmpeg/FfmpegVideo2Mp4Converter.go @@ -0,0 +1,92 @@ +package ffmpeg + +import ( + "context" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" + "github.com/weoses/memelo/storage-service/ocr" +) + +type Video2Mp4ConverterImpl struct { + slogger *slog.Logger + cfg *conf.FfmpegConfig +} + +var _ ocr.Video2Mp4Converter = (*Video2Mp4ConverterImpl)(nil) + +func (f *Video2Mp4ConverterImpl) ConvertToMp4(ctx context.Context, video temp.Data) (temp.Data, error) { + f.slogger.InfoContext(ctx, "ConvertToMp4: start") + + dir, err := os.MkdirTemp("", "video2mp4-*") + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: create temp dir: %w", err) + } + defer func(path string) { + errRemoveAll := os.RemoveAll(path) + if errRemoveAll != nil { + f.slogger.WarnContext(ctx, "ConvertToMp4: remove temp dir failed: ", "error", errRemoveAll) + } + }(dir) + + ffmpegInputPath := filepath.Join(dir, "input") + ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: create input file: %w", err) + } + + videoInputReader, err := video.Reader() + if err != nil { + helper.QuietClose(ffmpegInputFile, f.slogger) + return nil, fmt.Errorf("ConvertToMp4: get videoInputReader: %w", err) + } + + _, err = io.Copy(ffmpegInputFile, videoInputReader) + helper.QuietClose(ffmpegInputFile, f.slogger) + helper.QuietClose(videoInputReader, f.slogger) + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: write input: %w", err) + } + + outputPath := filepath.Join(dir, "output.mp4") + cmd := buildCmd(ctx, f.cfg, + "-i", ffmpegInputPath, + "-c:v", "libx264", + "-c:a", "aac", + "-movflags", "+faststart", + outputPath, + ) + f.slogger.InfoContext(ctx, "ConvertToMp4: running ffmpeg", "cmd", cmd.String()) + out, err := cmd.CombinedOutput() + f.slogger.DebugContext(ctx, "ConvertToMp4: ffmpeg output", "output", string(out)) + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: ffmpeg failed: %w\n%s", err, out) + } + + outputFile, err := os.Open(outputPath) + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: open output: %w", err) + } + defer helper.QuietClose(outputFile, f.slogger) + + data, err := temp.DataTemp(outputFile) + if err != nil { + return nil, fmt.Errorf("ConvertToMp4: read output: %w", err) + } + + f.slogger.InfoContext(ctx, "ConvertToMp4: done") + return data, nil +} + +func NewVideo2Mp4Converter(cfg *conf.FfmpegConfig) ocr.Video2Mp4Converter { + return &Video2Mp4ConverterImpl{ + slogger: slog.With("service", "Video2Mp4Converter"), + cfg: cfg, + } +} diff --git a/storage-service/ocr/ffmpeg/ffmpeg.go b/storage-service/ocr/ffmpeg/ffmpeg.go new file mode 100644 index 0000000..2411847 --- /dev/null +++ b/storage-service/ocr/ffmpeg/ffmpeg.go @@ -0,0 +1,29 @@ +package ffmpeg + +import ( + "context" + "os/exec" + "strconv" + + "github.com/weoses/memelo/storage-service/conf" +) + +// buildCmd constructs an exec.Cmd for ffmpeg, applying optional thread limit +// and optionally wrapping the invocation with cpulimit. +func buildCmd(ctx context.Context, cfg *conf.FfmpegConfig, args ...string) *exec.Cmd { + if cfg.ThreadsLimit > 0 { + args = append([]string{"-threads", strconv.Itoa(cfg.ThreadsLimit)}, args...) + } + + binary := cfg.Binary + if binary == "" { + binary = "ffmpeg" + } + + if cfg.CpuLimit > 0 { + cpuArgs := append([]string{"-l", strconv.Itoa(cfg.CpuLimit), "--", binary}, args...) + return exec.CommandContext(ctx, "cpulimit", cpuArgs...) + } + + return exec.CommandContext(ctx, binary, args...) +} diff --git a/storage-service/ocr/gapi/GapiAudio2TextExtractor.go b/storage-service/ocr/gapi/GapiAudio2TextExtractor.go new file mode 100644 index 0000000..46ab92a --- /dev/null +++ b/storage-service/ocr/gapi/GapiAudio2TextExtractor.go @@ -0,0 +1,91 @@ +package gapi + +import ( + "context" + "fmt" + "log/slog" + "strings" + + speech "cloud.google.com/go/speech/apiv2" + "cloud.google.com/go/speech/apiv2/speechpb" + "github.com/weoses/memelo/common/temp" + + "github.com/weoses/memelo/storage-service/conf" + "github.com/weoses/memelo/storage-service/ocr" + "google.golang.org/api/option" +) + +type GcloudAudio2TextExtractorImpl struct { + client *speech.Client + languageCodes []string + recognizer string + model string + slogger *slog.Logger +} + +func (g *GcloudAudio2TextExtractorImpl) Transcript(ctx context.Context, audio temp.Data) (string, error) { + g.slogger.InfoContext(ctx, "Transcript start") + + req := &speechpb.RecognizeRequest{ + Recognizer: g.recognizer, + Config: &speechpb.RecognitionConfig{ + DecodingConfig: &speechpb.RecognitionConfig_AutoDecodingConfig{}, + LanguageCodes: g.languageCodes, + Model: g.model, + }, + } + + usedGcs := false + if s3data, ok := audio.(temp.S3BackedData); ok && s3data.IsGsSupported() { + if s3path, pathErr := s3data.GetS3Url(ctx); pathErr == nil { + if gcsUri, ok := toGcsUri(s3path); ok { + g.slogger.InfoContext(ctx, "Transcript using gcsUri", "uri", gcsUri) + req.AudioSource = &speechpb.RecognizeRequest_Uri{Uri: gcsUri} + usedGcs = true + } + } + } + if !usedGcs { + g.slogger.InfoContext(ctx, "Transcript using base64") + audioBytes, err := audio.ReadAll() + if err != nil { + return "", fmt.Errorf("transcript: read audio: %w", err) + } + req.AudioSource = &speechpb.RecognizeRequest_Content{Content: audioBytes} + } + + resp, err := g.client.Recognize(ctx, req) + if err != nil { + return "", fmt.Errorf("transcript: recognize failed: %w", err) + } + + var parts []string + for _, result := range resp.Results { + if len(result.Alternatives) > 0 { + parts = append(parts, result.Alternatives[0].Transcript) + } + } + result := strings.Join(parts, "\n") + g.slogger.InfoContext(ctx, "Transcript done", "chars", len(result)) + return result, nil +} + +func NewAudio2TextExtractor(cfg *conf.Config) (ocr.Audio2TextExtractor, error) { + sttConfig := cfg.AudioStt + client, err := speech.NewClient( + context.Background(), + option.WithEndpoint(sttConfig.ApiEndpoint), + ) + recognizer := fmt.Sprintf("projects/%s/locations/%s/recognizers/%s", sttConfig.ProjectName, sttConfig.ApiLocation, sttConfig.Recognizer) + + if err != nil { + return nil, fmt.Errorf("NewAudio2TextExtractor: create client: %w", err) + } + return &GcloudAudio2TextExtractorImpl{ + client: client, + languageCodes: sttConfig.LanguageCodes, + recognizer: recognizer, + model: sttConfig.Model, + slogger: slog.With("service", "GcloudAudio2TextExtractor"), + }, nil +} diff --git a/storage-service/ocr/gapi/GapiEmbedder.go b/storage-service/ocr/gapi/GapiEmbedder.go index eafe616..6d27542 100644 --- a/storage-service/ocr/gapi/GapiEmbedder.go +++ b/storage-service/ocr/gapi/GapiEmbedder.go @@ -6,57 +6,127 @@ import ( "encoding/base64" "encoding/json" "fmt" + "io" + "log/slog" + "net/url" + "strings" + "time" aiplatform "cloud.google.com/go/aiplatform/apiv1beta1" - aiplatformpb "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb" + "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb" + "github.com/googleapis/gax-go/v2" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/entity" "github.com/weoses/memelo/storage-service/ocr" + "golang.org/x/time/rate" "google.golang.org/api/option" "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/types/known/structpb" ) +const embeddingRatePerSecond = 2 + type GcloudImageEmbeddingExtractorImpl struct { - client *aiplatform.PredictionClient - endpoint string - dimension int - model string + client *aiplatform.PredictionClient + endpoint string + dimension int + model string + videoIntervalSec int + limiter *rate.Limiter + slogger *slog.Logger } -// GetIconMatrix implements ImageCompareKeyExtractor. -func (i *GcloudImageEmbeddingExtractorImpl) GetImageEmbeddingV1(ctx context.Context, rawImage []byte) (*entity.ElasticEmbeddingV1, error) { - bufBase64Reader := bytes.NewBufferString("") - bufBytesWriter := base64.NewEncoder(base64.RawStdEncoding, bufBase64Reader) - _, err := bufBytesWriter.Write(rawImage) - if err != nil { - return nil, fmt.Errorf("failed to write image: %w", err) +// toGcsUri converts a Google Storage HTTP URL to a gs:// URI. +// Accepts gs:// URIs as-is, and converts storage.googleapis.com / storage.google.com +// HTTP URLs to gs://bucket/path form. Returns ("", false) for non-GCS URLs. +func toGcsUri(rawURL string) (string, bool) { + if strings.HasPrefix(rawURL, "gs://") { + return rawURL, true + } + // Normalise: add a scheme so url.Parse works for scheme-less URLs. + normalized := rawURL + if !strings.Contains(rawURL, "://") { + normalized = "https://" + rawURL } - err = bufBytesWriter.Close() + u, err := url.Parse(normalized) if err != nil { - return nil, fmt.Errorf("failed to close image writer: %w", err) + return "", false + } + host := strings.ToLower(u.Hostname()) + if host != "storage.googleapis.com" && host != "storage.google.com" { + return "", false + } + // Path is /bucket/rest/of/object — trim leading slash. + return "gs:/" + u.Path, true +} + +func (i *GcloudImageEmbeddingExtractorImpl) GetImageEmbedding(ctx context.Context, rawImage temp.Data) (*entity.EmbeddingItem, error) { + i.slogger.InfoContext(ctx, "GetImageEmbedding start") + if err := i.limiter.Wait(ctx); err != nil { + return nil, fmt.Errorf("rate limiter: %w", err) } - base64Str := bufBase64Reader.String() - return i.generateWithLowerDimension(&base64Str) + var embeddingItem *entity.EmbeddingItem + var err error + if s3data, ok := rawImage.(temp.S3BackedData); ok && s3data.IsGsSupported() { + if s3path, pathErr := s3data.GetS3Url(ctx); pathErr == nil { + if gcsURI, ok := toGcsUri(s3path); ok { + i.slogger.InfoContext(ctx, "GetImageEmbedding using gcsUri", "uri", gcsURI) + embeddingItem, err = i.generateWithLowerDimension(nil, &gcsURI) + if err != nil { + return nil, fmt.Errorf("failed to generate embedding: %w", err) + } + i.slogger.InfoContext(ctx, "GetImageEmbedding done") + return embeddingItem, nil + } + } + } + { + i.slogger.InfoContext(ctx, "GetImageEmbedding using base64") + bufBase64 := bytes.NewBufferString("") + base64encoder := base64.NewEncoder(base64.RawStdEncoding, bufBase64) + var reader io.ReadCloser + reader, err = rawImage.Reader() + if err != nil { + return nil, fmt.Errorf("error reading temp: %w", err) + } + defer helper.QuietClose(reader, i.slogger) + if _, err = io.Copy(base64encoder, reader); err != nil { + return nil, fmt.Errorf("error encoding temp: %w", err) + } + base64Str := bufBase64.String() + embeddingItem, err = i.generateWithLowerDimension(&base64Str, nil) + } + + if err != nil { + return nil, fmt.Errorf("failed to generate embedding: %w", err) + } + i.slogger.InfoContext(ctx, "GetImageEmbedding done") + return embeddingItem, nil } // generateWithLowerDimension shows how to generate lower-dimensional embeddings for text and image inputs. +// Exactly one of dataImageBase64 or gcsUri must be non-nil. func (i *GcloudImageEmbeddingExtractorImpl) generateWithLowerDimension( dataImageBase64 *string, -) (*entity.ElasticEmbeddingV1, error) { - // location = "us-central1" + gcsUri *string, +) (*entity.EmbeddingItem, error) { ctx := context.Background() + imagePayload := map[string]any{} + if gcsUri != nil { + imagePayload["gcsUri"] = *gcsUri + } else { + imagePayload["bytesBase64Encoded"] = *dataImageBase64 + } + // This is the input to the model's prediction call. For schema, see: // https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#request_body instance, err := structpb.NewValue(map[string]any{ - "image": map[string]any{ - // Image input can be provided either as a Google Cloud Storage URI or as - // base64-encoded bytes using the "bytesBase64Encoded" field. - //"gcsUri": "gs://cloud-samples-data/vertex-ai/llm/prompts/landmark1.png", - "bytesBase64Encoded": *dataImageBase64, - }, + "image": imagePayload, }) if err != nil { @@ -101,17 +171,115 @@ func (i *GcloudImageEmbeddingExtractorImpl) generateWithLowerDimension( imageEmbedding := instanceEmbeddings.ImageEmbeddings //textEmbedding := instanceEmbeddings.TextEmbeddings - return &entity.ElasticEmbeddingV1{ - Data: &imageEmbedding, + return &entity.EmbeddingItem{ + Data: imageEmbedding, Model: i.model, + Type: entity.EmbeddingTypeImage, }, nil } -func (i *GcloudImageEmbeddingExtractorImpl) GetTextEmbeddingV1(ctx context.Context, text string) (*entity.ElasticEmbeddingV1, error) { - return i.generateTextEmbedding(ctx, text) +func (i *GcloudImageEmbeddingExtractorImpl) GetVideoEmbedding(ctx context.Context, video temp.Data) ([]*entity.EmbeddingItem, error) { + i.slogger.InfoContext(ctx, "GetVideoEmbedding start") + if err := i.limiter.Wait(ctx); err != nil { + return nil, fmt.Errorf("rate limiter: %w", err) + } + + videoPayload := map[string]any{} + if s3data, ok := video.(temp.S3BackedData); ok && s3data.IsGsSupported() { + if s3path, pathErr := s3data.GetS3Url(ctx); pathErr == nil { + if gcsUri, ok := toGcsUri(s3path); ok { + i.slogger.InfoContext(ctx, "GetVideoEmbedding using gcsUri", "uri", gcsUri) + videoPayload["gcsUri"] = gcsUri + } + } + } + if _, hasGcs := videoPayload["gcsUri"]; !hasGcs { + i.slogger.InfoContext(ctx, "GetVideoEmbedding using base64") + bufBase64 := bytes.NewBufferString("") + base64encoder := base64.NewEncoder(base64.RawStdEncoding, bufBase64) + reader, err := video.Reader() + if err != nil { + return nil, fmt.Errorf("error reading temp: %w", err) + } + defer helper.QuietClose(reader, i.slogger) + if _, err = io.Copy(base64encoder, reader); err != nil { + return nil, fmt.Errorf("error encoding temp: %w", err) + } + videoPayload["bytesBase64Encoded"] = bufBase64.String() + } + + videoPayload["videoSegmentConfig"] = map[string]any{ + "intervalSec": i.videoIntervalSec, + } + // This is the input to the model's prediction call. For schema, see: + // https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-embeddings-api#request_body + instance, err := structpb.NewValue(map[string]any{ + "video": videoPayload, + }) + + if err != nil { + return nil, fmt.Errorf("failed to construct request payload: %w", err) + } + + params, err := structpb.NewValue(map[string]any{ + "dimension": i.dimension, + }) + if err != nil { + return nil, fmt.Errorf("failed to construct request params: %w", err) + } + + req := &aiplatformpb.PredictRequest{ + Endpoint: i.endpoint, + Instances: []*structpb.Value{instance}, + Parameters: params, + } + + resp, err := i.client.Predict(ctx, req, gax.WithTimeout(40*time.Second)) + if err != nil { + return nil, fmt.Errorf("failed to generate video embeddings: %w", err) + } + + predictionJSON, err := protojson.Marshal(resp.GetPredictions()[0]) + if err != nil { + return nil, fmt.Errorf("failed to convert protobuf value to JSON: %w", err) + } + + var prediction struct { + VideoEmbeddings []struct { + Embedding []float32 `json:"embedding"` + } `json:"videoEmbeddings"` + } + if err := json.Unmarshal(predictionJSON, &prediction); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON: %w", err) + } + + items := make([]*entity.EmbeddingItem, len(prediction.VideoEmbeddings)) + for idx, ve := range prediction.VideoEmbeddings { + items[idx] = &entity.EmbeddingItem{ + Data: ve.Embedding, + Model: i.model, + Type: entity.EmbeddingTypeVideo, + } + } + + i.slogger.InfoContext(ctx, "GetVideoEmbedding done", "segments", len(items)) + return items, nil +} + +func (i *GcloudImageEmbeddingExtractorImpl) GetTextEmbedding(ctx context.Context, text string) (*entity.EmbeddingItem, error) { + i.slogger.InfoContext(ctx, "GetTextEmbedding start") + if err := i.limiter.Wait(ctx); err != nil { + return nil, fmt.Errorf("rate limiter: %w", err) + } + embedding, err := i.generateTextEmbedding(ctx, text) + if err != nil { + return nil, fmt.Errorf("failed to generate embedding: %w", err) + } + i.slogger.InfoContext(ctx, "GetTextEmbedding done") + return embedding, nil } -func (i *GcloudImageEmbeddingExtractorImpl) generateTextEmbedding(ctx context.Context, text string) (*entity.ElasticEmbeddingV1, error) { +func (i *GcloudImageEmbeddingExtractorImpl) generateTextEmbedding(ctx context.Context, text string) (*entity.EmbeddingItem, error) { instance, err := structpb.NewValue(map[string]any{ "text": text, }) @@ -150,15 +318,17 @@ func (i *GcloudImageEmbeddingExtractorImpl) generateTextEmbedding(ctx context.Co } textEmbedding := instanceEmbeddings.TextEmbeddings - return &entity.ElasticEmbeddingV1{ - Data: &textEmbedding, + return &entity.EmbeddingItem{ + Data: textEmbedding, Model: i.model, + Type: entity.EmbeddingTypeText, }, nil } -func NewImageEmbeddingExtractor(cnf *conf.ImageEmbeddingConfig) (ocr.EmbeddingExtractor, error) { - apiEndpoint := cnf.ApiEndpoint - client, err := aiplatform.NewPredictionClient(context.Background(), option.WithEndpoint(apiEndpoint)) +func NewImageEmbeddingExtractor(cfg *conf.Config) (ocr.EmbeddingExtractor, error) { + cnf := cfg.MediaEmbedding + cnfEmbeddings := cfg.Embeddings + client, err := aiplatform.NewPredictionClient(context.Background(), option.WithEndpoint(cnf.ApiEndpoint)) if err != nil { return nil, fmt.Errorf("failed to construct API client: %w", err) } @@ -166,9 +336,12 @@ func NewImageEmbeddingExtractor(cnf *conf.ImageEmbeddingConfig) (ocr.EmbeddingEx endpoint := fmt.Sprintf("projects/%s/locations/%s/publishers/google/models/%s", cnf.ProjectName, cnf.ApiLocation, cnf.Model) return &GcloudImageEmbeddingExtractorImpl{ - client: client, - endpoint: endpoint, - dimension: cnf.Dimension, - model: cnf.Model, + client: client, + endpoint: endpoint, + dimension: cnfEmbeddings.Dimensions, + model: cnf.Model, + videoIntervalSec: cnfEmbeddings.VideoEmbeddingIntervalSec, + limiter: rate.NewLimiter(embeddingRatePerSecond, embeddingRatePerSecond), + slogger: slog.With("service", "EmbeddingExtractor"), }, nil } diff --git a/storage-service/ocr/gapi/GapiTextExtractor.go b/storage-service/ocr/gapi/GapiTextExtractor.go index 0f4b384..aad1e9c 100644 --- a/storage-service/ocr/gapi/GapiTextExtractor.go +++ b/storage-service/ocr/gapi/GapiTextExtractor.go @@ -1,28 +1,61 @@ package gapi import ( - "bytes" "context" + "fmt" + "log/slog" vision "cloud.google.com/go/vision/apiv1" + pb "cloud.google.com/go/vision/v2/apiv1/visionpb" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/ocr" + "golang.org/x/time/rate" "google.golang.org/api/option" ) +const ocrRatePerSecond = 25 + type GcloudTextExtractorImpl struct { - client *vision.ImageAnnotatorClient + client *vision.ImageAnnotatorClient + limiter *rate.Limiter + slogger *slog.Logger } -// GetName implements TextExtractor. +// GetName implements Image2TextExtractor. func (m *GcloudTextExtractorImpl) GetName() string { return "GCloud" } -func (m *GcloudTextExtractorImpl) DoOcr(ctx context.Context, image []byte) (string, error) { - img, err := vision.NewImageFromReader(bytes.NewReader(image)) - if err != nil { - return "", err +func (m *GcloudTextExtractorImpl) DoOcr(ctx context.Context, image temp.Data) (string, error) { + m.slogger.InfoContext(ctx, "DoOcr start") + + if err := m.limiter.Wait(ctx); err != nil { + return "", fmt.Errorf("rate limiter: %w", err) + } + + var img *pb.Image + if s3data, ok := image.(temp.S3BackedData); ok && s3data.IsGsSupported() { + if s3path, pathErr := s3data.GetS3Url(ctx); pathErr == nil { + if gcsUri, ok := toGcsUri(s3path); ok { + m.slogger.InfoContext(ctx, "DoOcr using gcsUri", "uri", gcsUri) + img = vision.NewImageFromURI(gcsUri) + } + } + } + if img == nil { + m.slogger.InfoContext(ctx, "DoOcr using base64") + reader, err := image.Reader() + if err != nil { + return "", err + } + defer helper.QuietClose(reader, m.slogger) + img, err = vision.NewImageFromReader(reader) + if err != nil { + return "", err + } } texts, err := m.client.DetectTexts(ctx, img, nil, 100) @@ -31,21 +64,26 @@ func (m *GcloudTextExtractorImpl) DoOcr(ctx context.Context, image []byte) (stri } if len(texts) > 0 { + m.slogger.InfoContext(ctx, "DoOcr done", "chars", len(texts[0].Description)) return texts[0].Description, nil } + + m.slogger.InfoContext(ctx, "DoOcr done", "chars", 0) return "", nil } -func NewOcrProcessor(ocrConf *conf.ImageOcrConfig) (ocr.TextExtractor, error) { +func NewOcrProcessor(cfg *conf.Config) (ocr.Image2TextExtractor, error) { visionClient, err := vision.NewImageAnnotatorClient( context.Background(), - option.WithEndpoint(ocrConf.ApiEndpoint), + option.WithEndpoint(cfg.ImageOcr.ApiEndpoint), ) if err != nil { return nil, err } return &GcloudTextExtractorImpl{ - client: visionClient, + client: visionClient, + limiter: rate.NewLimiter(ocrRatePerSecond, ocrRatePerSecond/2), + slogger: slog.With("service", "GcloudTextExtractor"), }, nil } diff --git a/storage-service/service/EP00CalcHash.go b/storage-service/service/EP00CalcHash.go new file mode 100644 index 0000000..42a2a6d --- /dev/null +++ b/storage-service/service/EP00CalcHash.go @@ -0,0 +1,52 @@ +package service + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + "io" + "log/slog" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/entity" +) + +type CalcHashPipelineStep struct { + BasePipelineStep +} + +func (c *CalcHashPipelineStep) Do(_ context.Context, inputContext MetadataInputContext, pipelineContext *MetadataPipelineContext) error { + hash, err := calcRawImageHash(inputContext.RawInput) + if err != nil { + return fmt.Errorf("create pipeline: error calculating hash: %w", err) + } + pipelineContext.Hash = hash + return nil +} + +func calcRawImageHash(raw temp.Data) (string, error) { + base64DataBuffer := bytes.NewBuffer(make([]byte, 0)) + encoder := base64.NewEncoder(base64.RawStdEncoding, base64DataBuffer) + reader, err := raw.Reader() + if err != nil { + return "", fmt.Errorf("failed to read incoming temp %w", err) + } + defer helper.QuietClose(reader, slog.With("calcRawImageHash")) + _, err = io.Copy(encoder, reader) + if err != nil { + return "", fmt.Errorf("failed to write raw temp to base64 encode buffer: %w", err) + } + hash := helper.CalcHash(base64DataBuffer.String()) + return hash, nil +} + +func NewCalcHashPipelineStep() ExtractPipelineStep { + return &CalcHashPipelineStep{ + BasePipelineStep{ + pos: 0, + typ: []entity.MetadataType{entity.ImageMetadataType, entity.VideoMetadataType}, + }, + } +} diff --git a/storage-service/service/EP10CheckDuplicateByHash.go b/storage-service/service/EP10CheckDuplicateByHash.go new file mode 100644 index 0000000..c1eb8bb --- /dev/null +++ b/storage-service/service/EP10CheckDuplicateByHash.go @@ -0,0 +1,37 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/storage" +) + +type ImageCheckDuplicateByHashPipelineStep struct { + BasePipelineStep + + metadataService storage.MetadataStorageService +} + +func (s *ImageCheckDuplicateByHashPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + items, err := s.metadataService.GetByHash(ctx, inputContext.AccountId, pCtx.Hash, helper.Addr(1)) + if err != nil { + return fmt.Errorf("error getting items by hash: %w", err) + } + if len(items) > 0 { + pCtx.Duplicate = items[0] + } + return nil +} + +func NewImageCheckDuplicateByHashPipelineStep(metadata storage.MetadataStorageService) ExtractPipelineStep { + return &ImageCheckDuplicateByHashPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 10, + typ: []entity.MetadataType{entity.ImageMetadataType, entity.VideoMetadataType}, + }, + metadataService: metadata, + } +} diff --git a/storage-service/service/EP20ImgToJpeg.go b/storage-service/service/EP20ImgToJpeg.go new file mode 100644 index 0000000..b9990a8 --- /dev/null +++ b/storage-service/service/EP20ImgToJpeg.go @@ -0,0 +1,44 @@ +package service //nolint:dupl + +import ( + "context" + "fmt" + + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type ImageToJpegPipelineStep struct { + BasePipelineStep + + imageConverter ocr.ImageConveter + tmpDataService commonservice.TmpDataService +} + +func (s *ImageToJpegPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + imgJpeg, err := s.imageConverter.Convert2Jpeg(ctx, inputContext.RawInput) + if err != nil { + return fmt.Errorf("ImageToJpegPipelineStep: cannot process original image: %w", err) + } + + s3WrappedData, err := s.tmpDataService.WrapData(ctx, imgJpeg) + if err != nil { + return fmt.Errorf("ImageToJpegPipelineStep: cannot wrap s3 for image: %w", err) + } + + pCtx.ImageOriginalJpeg = s3WrappedData + pCtx.StorageArtifacts = append(pCtx.StorageArtifacts, MetadataStorageArtifact{Type: SavedOriginal, Data: s3WrappedData}) + return nil +} + +func NewImageToJpegPipelineStep(imageConverter ocr.ImageConveter, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &ImageToJpegPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 20, + typ: []entity.MetadataType{entity.ImageMetadataType}, + }, + imageConverter: imageConverter, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP21VidToMp4.go b/storage-service/service/EP21VidToMp4.go new file mode 100644 index 0000000..63b95b8 --- /dev/null +++ b/storage-service/service/EP21VidToMp4.go @@ -0,0 +1,44 @@ +package service //nolint:dupl + +import ( + "context" + "fmt" + + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidToMp4PipelineStep struct { + BasePipelineStep + + converter ocr.Video2Mp4Converter + tmpDataService commonservice.TmpDataService +} + +func (s *VidToMp4PipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + resultMp4, err := s.converter.ConvertToMp4(ctx, inputContext.RawInput) + if err != nil { + return fmt.Errorf("VidToMp4PipelineStep: cannot convert video to mp4: %w", err) + } + + s3WrappedMp4, err := s.tmpDataService.WrapData(ctx, resultMp4) + if err != nil { + return fmt.Errorf("VidToMp4PipelineStep: cannot wrap s3 for image: %w", err) + } + + pCtx.VideoMp4 = s3WrappedMp4 + pCtx.StorageArtifacts = append(pCtx.StorageArtifacts, MetadataStorageArtifact{Type: SavedOriginal, Data: s3WrappedMp4}) + return nil +} + +func NewVidToMp4PipelineStep(converter ocr.Video2Mp4Converter, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &VidToMp4PipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 21, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + converter: converter, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP22VidExtractFrames.go b/storage-service/service/EP22VidExtractFrames.go new file mode 100644 index 0000000..46a7f2f --- /dev/null +++ b/storage-service/service/EP22VidExtractFrames.go @@ -0,0 +1,54 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/common/helper" + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidExtractFramesPipelineStep struct { + BasePipelineStep + + extractor ocr.Video2FrameExtractor + tmpDataService commonservice.TmpDataService +} + +func (s *VidExtractFramesPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if pCtx.VideoMp4 == nil { + return nil + } + frames, err := s.extractor.ExtractFrames(ctx, pCtx.VideoMp4) + if err != nil { + return fmt.Errorf("cannot extract frames from video: %w", err) + } + + framesS3Backed, err := helper.TransformSliceErr[temp.Data, temp.S3BackedData]( + frames, + make([]temp.S3BackedData, len(frames)), + func(data temp.Data) (temp.S3BackedData, error) { + return s.tmpDataService.WrapData(ctx, data) + }) + + if err != nil { + return fmt.Errorf("cannot transform frames to s3 backed data: %w", err) + } + + pCtx.VideoFrames = framesS3Backed + return nil +} + +func NewVidExtractFramesPipelineStep(extractor ocr.Video2FrameExtractor, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &VidExtractFramesPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 22, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + extractor: extractor, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP23VidExtractAudio.go b/storage-service/service/EP23VidExtractAudio.go new file mode 100644 index 0000000..6de492b --- /dev/null +++ b/storage-service/service/EP23VidExtractAudio.go @@ -0,0 +1,46 @@ +package service + +import ( + "context" + "fmt" + + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidExtractAudioPipelineStep struct { + BasePipelineStep + + extractor ocr.Video2AudioExtractor + tmpDataService commonservice.TmpDataService +} + +func (s *VidExtractAudioPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if pCtx.VideoMp4 == nil { + return nil + } + audio, err := s.extractor.ExtractAudio(ctx, pCtx.VideoMp4) + if err != nil { + return fmt.Errorf("cannot extract audio from video: %w", err) + } + + s3WrappedAudio, err := s.tmpDataService.WrapData(ctx, audio) + if err != nil { + return fmt.Errorf("cannot wrap s3 for image: %w", err) + } + + pCtx.VideoAudio = s3WrappedAudio + return nil +} + +func NewVidExtractAudioPipelineStep(extractor ocr.Video2AudioExtractor, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &VidExtractAudioPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 23, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + extractor: extractor, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP30ImgCalcEmbedding.go b/storage-service/service/EP30ImgCalcEmbedding.go new file mode 100644 index 0000000..6f6b1a4 --- /dev/null +++ b/storage-service/service/EP30ImgCalcEmbedding.go @@ -0,0 +1,33 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type ImageCalcEmbeddingPipelineStep struct { + BasePipelineStep + + imageEmbedder ocr.EmbeddingExtractor +} + +func (s *ImageCalcEmbeddingPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + embedding, err := s.imageEmbedder.GetImageEmbedding(ctx, pCtx.ImageOriginalJpeg) + if err != nil { + return fmt.Errorf("error getting image embedding: %w", err) + } + pCtx.Embedding = append(pCtx.Embedding, *embedding) + return nil +} + +func NewImageCalcEmbeddingPipelineStep(imageEmbedder ocr.EmbeddingExtractor) ExtractPipelineStep { + return &ImageCalcEmbeddingPipelineStep{ + BasePipelineStep: BasePipelineStep{ + typ: []entity.MetadataType{entity.ImageMetadataType}, + pos: 30, + }, + imageEmbedder: imageEmbedder} +} diff --git a/storage-service/service/EP31VidCalcEmbeddings.go b/storage-service/service/EP31VidCalcEmbeddings.go new file mode 100644 index 0000000..ba5ad9d --- /dev/null +++ b/storage-service/service/EP31VidCalcEmbeddings.go @@ -0,0 +1,40 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidCalcEmbeddingsPipelineStep struct { + BasePipelineStep + + embedder ocr.EmbeddingExtractor +} + +func (s *VidCalcEmbeddingsPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if pCtx.VideoMp4 == nil { + return nil + } + + embeddings, err := s.embedder.GetVideoEmbedding(ctx, pCtx.VideoMp4) + if err != nil { + return fmt.Errorf("cannot get embedding for video: %w", err) + } + for _, e := range embeddings { + pCtx.Embedding = append(pCtx.Embedding, *e) + } + return nil +} + +func NewVidCalcEmbeddingsPipelineStep(embedder ocr.EmbeddingExtractor) ExtractPipelineStep { + return &VidCalcEmbeddingsPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 31, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + embedder: embedder, + } +} diff --git a/storage-service/service/EP33VidStt.go b/storage-service/service/EP33VidStt.go new file mode 100644 index 0000000..01a652f --- /dev/null +++ b/storage-service/service/EP33VidStt.go @@ -0,0 +1,37 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidSttPipelineStep struct { + BasePipelineStep + + stt ocr.Audio2TextExtractor +} + +func (s *VidSttPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if pCtx.VideoAudio == nil { + return nil + } + transcript, err := s.stt.Transcript(ctx, pCtx.VideoAudio) + if err != nil { + return fmt.Errorf("cannot transcribe video audio: %w", err) + } + pCtx.Transcription = transcript + return nil +} + +func NewVidSttPipelineStep(stt ocr.Audio2TextExtractor) ExtractPipelineStep { + return &VidSttPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 33, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + stt: stt, + } +} diff --git a/storage-service/service/EP40CheckDuplicateByEmbedding.go b/storage-service/service/EP40CheckDuplicateByEmbedding.go new file mode 100644 index 0000000..b4fc1fb --- /dev/null +++ b/storage-service/service/EP40CheckDuplicateByEmbedding.go @@ -0,0 +1,44 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/storage-service/conf" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/storage" +) + +type CheckDuplicateByEmbeddingPipelineStep struct { + BasePipelineStep + + metadata storage.MetadataStorageService + searchConfig *conf.SearchConfig +} + +func (s *CheckDuplicateByEmbeddingPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + for i := range len(pCtx.Embedding) { + items, err := s.metadata.SearchByEmbeddingV1(ctx, inputContext.AccountId, pCtx.Embedding[i], helper.Addr(1), s.searchConfig.SemanticDuplicateThreshold) + if err != nil { + return fmt.Errorf("error getting items by embedding: %w", err) + } + if len(items) > 0 { + pCtx.Duplicate = items[0] + break + } + } + + return nil +} + +func NewCheckDuplicateByEmbeddingPipelineStep(metadata storage.MetadataStorageService, cfg *conf.Config) ExtractPipelineStep { + return &CheckDuplicateByEmbeddingPipelineStep{ + BasePipelineStep: BasePipelineStep{ + typ: []entity.MetadataType{entity.ImageMetadataType, entity.VideoMetadataType}, + pos: 40, + }, + metadata: metadata, + searchConfig: cfg.Search, + } +} diff --git a/storage-service/service/EP50ImgOcr.go b/storage-service/service/EP50ImgOcr.go new file mode 100644 index 0000000..0f64852 --- /dev/null +++ b/storage-service/service/EP50ImgOcr.go @@ -0,0 +1,33 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type ImageOcrImagePipelineStep struct { + BasePipelineStep + + image2text ocr.Image2TextExtractor +} + +func (s *ImageOcrImagePipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + ocrResult, err := s.image2text.DoOcr(ctx, pCtx.ImageOriginalJpeg) + if err != nil { + return fmt.Errorf("error ocring image: %w", err) + } + pCtx.Transcription = ocrResult + return nil +} + +func NewImageOcrImagePipelineStep(image2text ocr.Image2TextExtractor) ExtractPipelineStep { + return &ImageOcrImagePipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 50, + typ: []entity.MetadataType{entity.ImageMetadataType}, + }, + image2text: image2text} +} diff --git a/storage-service/service/EP51VidOcrFrames.go b/storage-service/service/EP51VidOcrFrames.go new file mode 100644 index 0000000..f1b1f95 --- /dev/null +++ b/storage-service/service/EP51VidOcrFrames.go @@ -0,0 +1,98 @@ +package service + +import ( + "context" + "fmt" + "math" + "strings" + "sync" + + "github.com/agnivade/levenshtein" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidOcrFramesPipelineStep struct { + BasePipelineStep + + image2text ocr.Image2TextExtractor +} + +func (s *VidOcrFramesPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if len(pCtx.VideoFrames) == 0 { + return nil + } + var results []string + var errors []error + + waitGroup := &sync.WaitGroup{} + for _, frame := range pCtx.VideoFrames { + resultsPtr := &results + errorsPtr := &errors + + waitGroup.Go(func() { + text, err := s.image2text.DoOcr(ctx, frame) + if err != nil { + *errorsPtr = append(*errorsPtr, fmt.Errorf("cannot ocr video frame: %w", err)) + return + } + + if text != "" { + *resultsPtr = append(*resultsPtr, text) + } + }) + } + waitGroup.Wait() + if len(errors) > 0 { + return fmt.Errorf("cannot ocr video frames: %w", errors[0]) + } + + var mergedResults []string + + for _, result := range results { + duplicate := false + duplicateIdx := -1 + + for j, prevResult := range mergedResults { + length := len(result) + distance := levenshtein.ComputeDistance(prevResult, result) + percentChange := math.Abs(float64(distance) / float64(length)) + if percentChange < 0.35 { + duplicate = true + duplicateIdx = j + break + } + } + + if duplicate { + duplicateItem := mergedResults[duplicateIdx] + if len(duplicateItem) < len(result) { + mergedResults[duplicateIdx] = result + } + } else { + mergedResults = append(mergedResults, result) + } + } + + if len(mergedResults) > 0 { + joined := strings.Join(mergedResults, "\n") + if pCtx.Transcription != "" { + pCtx.Transcription = pCtx.Transcription + "\n\n" + joined + } else { + pCtx.Transcription = joined + + } + } + + return nil +} + +func NewVidOcrFramesPipelineStep(image2text ocr.Image2TextExtractor) ExtractPipelineStep { + return &VidOcrFramesPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 51, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + image2text: image2text, + } +} diff --git a/storage-service/service/EP60ImgCreateThumbnail.go b/storage-service/service/EP60ImgCreateThumbnail.go new file mode 100644 index 0000000..4679688 --- /dev/null +++ b/storage-service/service/EP60ImgCreateThumbnail.go @@ -0,0 +1,44 @@ +package service //nolint:dupl + +import ( + "context" + "fmt" + + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type ImageCreateThumbnailPipelineStep struct { + BasePipelineStep + + imageConverter ocr.ImageConveter + tmpDataService commonservice.TmpDataService +} + +func (s *ImageCreateThumbnailPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + imgThumb, err := s.imageConverter.MakeThumbnail(ctx, pCtx.ImageOriginalJpeg) + if err != nil { + return fmt.Errorf("cannot create thumbnail: %w", err) + } + + s3WrappedImgThumb, err := s.tmpDataService.WrapData(ctx, imgThumb) + if err != nil { + return fmt.Errorf("cannot wrap data to s3 data: %w", err) + } + + pCtx.ImageThumbnail = s3WrappedImgThumb + pCtx.StorageArtifacts = append(pCtx.StorageArtifacts, MetadataStorageArtifact{Type: SavedThumb, Data: s3WrappedImgThumb}) + return nil +} + +func NewImageCreateThumbnailPipelineStep(imageConverter ocr.ImageConveter, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &ImageCreateThumbnailPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 60, + typ: []entity.MetadataType{entity.ImageMetadataType}, + }, + imageConverter: imageConverter, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP61VidCreateThumbnail.go b/storage-service/service/EP61VidCreateThumbnail.go new file mode 100644 index 0000000..c725418 --- /dev/null +++ b/storage-service/service/EP61VidCreateThumbnail.go @@ -0,0 +1,53 @@ +package service + +import ( + "context" + "fmt" + + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type VidCreateThumbnailPipelineStep struct { + BasePipelineStep + + imageConverter ocr.ImageConveter + tmpDataService commonservice.TmpDataService +} + +func (s *VidCreateThumbnailPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if len(pCtx.VideoFrames) == 0 { + return nil + } + thumb, err := s.imageConverter.MakeThumbnail(ctx, pCtx.VideoFrames[0]) + if err != nil { + return fmt.Errorf("cannot create video thumbnail: %w", err) + } + + s3WrappedThumb, err := s.tmpDataService.WrapData(ctx, thumb) + if err != nil { + return fmt.Errorf("cannot wrap data to s3 data: %w", err) + } + + pCtx.VideoThumbnail = s3WrappedThumb + pCtx.StorageArtifacts = append(pCtx.StorageArtifacts, MetadataStorageArtifact{Type: SavedThumb, Data: s3WrappedThumb}) + + w, h, err := s.imageConverter.GetSize(ctx, thumb) + if err != nil { + return fmt.Errorf("cannot get video thumbnail size: %w", err) + } + pCtx.VideoThumbnailSizes = entity.Sizes{Width: w, Height: h} + return nil +} + +func NewVidCreateThumbnailPipelineStep(imageConverter ocr.ImageConveter, tmpDataService commonservice.TmpDataService) ExtractPipelineStep { + return &VidCreateThumbnailPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 61, + typ: []entity.MetadataType{entity.VideoMetadataType}, + }, + imageConverter: imageConverter, + tmpDataService: tmpDataService, + } +} diff --git a/storage-service/service/EP70ImgCalcSizes.go b/storage-service/service/EP70ImgCalcSizes.go new file mode 100644 index 0000000..27d8a56 --- /dev/null +++ b/storage-service/service/EP70ImgCalcSizes.go @@ -0,0 +1,44 @@ +package service + +import ( + "context" + "fmt" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/ocr" +) + +type ImageCalcSizesPipelineStep struct { + BasePipelineStep + + imageConverter ocr.ImageConveter +} + +func (s *ImageCalcSizesPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + if pCtx.ImageThumbnail == nil { + return fmt.Errorf("error: image thumbnail can't be nil") + } + + wRaw, hRaw, err := s.imageConverter.GetSize(ctx, pCtx.ImageOriginalJpeg) + if err != nil { + return fmt.Errorf("error getting size of raw image: %w", err) + } + + wThumb, hThumb, err := s.imageConverter.GetSize(ctx, pCtx.ImageThumbnail) + if err != nil { + return fmt.Errorf("error getting size of thumbnail: %w", err) + } + + pCtx.ImageOriginalSize = entity.Sizes{Width: wRaw, Height: hRaw} + pCtx.ImageThumbnailSize = entity.Sizes{Width: wThumb, Height: hThumb} + return nil +} + +func NewImageCalcSizesPipelineStep(imageConverter ocr.ImageConveter) ExtractPipelineStep { + return &ImageCalcSizesPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 70, + typ: []entity.MetadataType{entity.ImageMetadataType}, + }, + imageConverter: imageConverter} +} diff --git a/storage-service/service/EP80CalcTags.go b/storage-service/service/EP80CalcTags.go new file mode 100644 index 0000000..34dcd76 --- /dev/null +++ b/storage-service/service/EP80CalcTags.go @@ -0,0 +1,37 @@ +package service + +import ( + "context" + + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/storage" +) + +type CalcTagsPipelineStep struct { + BasePipelineStep + + tagStorage storage.ElasticTagStorage +} + +func (s *CalcTagsPipelineStep) Do(ctx context.Context, inputContext MetadataInputContext, pCtx *MetadataPipelineContext) error { + tags := make([]entity.ElasticTag, 0) + for i := range len(pCtx.Embedding) { + tagsChunk, err := s.tagStorage.SearchTagsByEmbedding(ctx, inputContext.AccountId, pCtx.Embedding[i], 0.8, 0.0) + if err != nil { + return err + } + tags = append(tags, tagsChunk...) + } + pCtx.Tags = tags + return nil +} + +func NewCalcTagsPipelineStep(tagStorage storage.ElasticTagStorage) ExtractPipelineStep { + return &CalcTagsPipelineStep{ + BasePipelineStep: BasePipelineStep{ + pos: 80, + typ: []entity.MetadataType{entity.ImageMetadataType, entity.VideoMetadataType}, + }, + tagStorage: tagStorage, + } +} diff --git a/storage-service/service/ExportService.go b/storage-service/service/ExportService.go index cf76c51..7932558 100644 --- a/storage-service/service/ExportService.go +++ b/storage-service/service/ExportService.go @@ -6,6 +6,7 @@ import ( "log/slog" "github.com/google/uuid" + "github.com/weoses/memelo/common/helper" "github.com/weoses/memelo/storage-service/entity" storage2 "github.com/weoses/memelo/storage-service/storage" ) @@ -13,7 +14,7 @@ import ( const exportPageSize = 100 type ExportService interface { - // Export streams a "ready to pack" dtos of all exists images in database with metadata + // Export streams a "ready to pack" dtos of all exists images in database with metadataService Export(ctx context.Context, accountId *uuid.UUID, id *uuid.UUID, @@ -28,7 +29,7 @@ type ExportItem struct { } type ExportServiceImpl struct { - imageStorageService storage2.ImageStorageService + imageStorageService storage2.MediaStorageService metadataStorageService storage2.MetadataStorageService slogger *slog.Logger } @@ -47,7 +48,7 @@ func (e *ExportServiceImpl) Export( for { page, err := e.metadataStorageService.List(ctx, accountId, id, afterId, &pageSize) if err != nil { - return fmt.Errorf("export: query metadata page failed: %w", err) + return fmt.Errorf("export: query metadataService page failed: %w", err) } if len(page) == 0 { break @@ -55,25 +56,11 @@ func (e *ExportServiceImpl) Export( items := make([]ExportItem, len(page)) for i, meta := range page { - e.slogger.DebugContext(ctx, "export: processing item", "imageId", meta.ImageId) - item := ExportItem{} - // Original image - origBytes, err := e.imageStorageService.GetImageBytes(ctx, meta.S3Id) + item, err := e.exportOne(ctx, meta) if err != nil { - return fmt.Errorf("export: fetch original image %s failed: %w", meta.ImageId, err) + return fmt.Errorf("export: export item failed: %w", err) } - item.ImageOriginal = origBytes - - // CreateThumbnail - thumbBytes, err := e.imageStorageService.GetImageThumbBytes(ctx, meta.S3Id) - if err != nil { - return fmt.Errorf("export: fetch thumbnail %s failed: %w", meta.ImageId, err) - } - item.ImageThumbnail = thumbBytes - - // Metadata JSON → metadata/{imageId}.json - item.Metadata = meta - items[i] = item + items[i] = *item processed++ } @@ -86,7 +73,7 @@ func (e *ExportServiceImpl) Export( return fmt.Errorf("export: callback failed: %w", err) } - // Last page — no more data + // Last page — no more temp if len(page) < pageSize { break } @@ -96,7 +83,39 @@ func (e *ExportServiceImpl) Export( return nil } -func NewExportService(imageStore storage2.ImageStorageService, metadataStore storage2.MetadataStorageService) ExportService { +func (e *ExportServiceImpl) exportOne(ctx context.Context, meta *entity.ElasticImageMetaData) (*ExportItem, error) { + e.slogger.DebugContext(ctx, "export: processing item", "imageId", meta.ImageId) + item := &ExportItem{} + // Original image + origData, err := e.imageStorageService.Read(ctx, meta.S3Id, storageMediaType(meta.Type, SavedOriginal)) + if err != nil { + return nil, fmt.Errorf("export: fetch original image %s failed: %w", meta.ImageId, err) + } + defer helper.QuietClose(origData, e.slogger) + item.ImageOriginal, err = origData.ReadAll() + + if err != nil { + return nil, fmt.Errorf("export: read original image %s failed: %w", meta.ImageId, err) + } + + // CreateThumbnail + thumbData, err := e.imageStorageService.Read(ctx, meta.S3Id, storageMediaType(meta.Type, SavedThumb)) + if err != nil { + return nil, fmt.Errorf("export: fetch thumbnail %s failed: %w", meta.ImageId, err) + } + defer helper.QuietClose(thumbData, e.slogger) + + item.ImageThumbnail, err = thumbData.ReadAll() + if err != nil { + return nil, fmt.Errorf("export: read thumbnail %s failed: %w", meta.ImageId, err) + } + + // Metadata JSON → metadataService/{imageId}.json + item.Metadata = meta + return item, nil +} + +func NewExportService(imageStore storage2.MediaStorageService, metadataStore storage2.MetadataStorageService) ExportService { return &ExportServiceImpl{ imageStorageService: imageStore, metadataStorageService: metadataStore, diff --git a/storage-service/service/ExtractPipeline00CalcHash.go b/storage-service/service/ExtractPipeline00CalcHash.go deleted file mode 100644 index 8fed853..0000000 --- a/storage-service/service/ExtractPipeline00CalcHash.go +++ /dev/null @@ -1,40 +0,0 @@ -package service - -import ( - "bytes" - "context" - "encoding/base64" - "fmt" - - "github.com/weoses/memelo/common/helper" -) - -type CalcHashPipelineStep struct{} - -func (c *CalcHashPipelineStep) GetPos() int { - return 0 -} - -func (c *CalcHashPipelineStep) Do(_ context.Context, pipelineContext *ImageMetadataPipelineContext) error { - hash, err := calcRawImageHash(pipelineContext.ImageRaw) - if err != nil { - return fmt.Errorf("create pipeline: error calculating hash: %w", err) - } - pipelineContext.ImageHash = hash - return nil -} - -func calcRawImageHash(raw []byte) (string, error) { - base64DataBuffer := bytes.NewBuffer(make([]byte, 0)) - encoder := base64.NewEncoder(base64.RawStdEncoding, base64DataBuffer) - _, err := encoder.Write(raw) - if err != nil { - return "", fmt.Errorf("failed to write raw data to base64 encode buffer: %w", err) - } - hash := helper.CalcHash(base64DataBuffer.String()) - return hash, nil -} - -func NewCalcHashPipelineStep() ExtractPipelineStep { - return &CalcHashPipelineStep{} -} diff --git a/storage-service/service/ExtractPipeline10CheckDuplicateByHash.go b/storage-service/service/ExtractPipeline10CheckDuplicateByHash.go deleted file mode 100644 index 34efd39..0000000 --- a/storage-service/service/ExtractPipeline10CheckDuplicateByHash.go +++ /dev/null @@ -1,32 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/common/helper" - "github.com/weoses/memelo/storage-service/storage" -) - -type CheckDuplicateByHashPipelineStep struct { - metadata storage.MetadataStorageService -} - -func (s *CheckDuplicateByHashPipelineStep) GetPos() int { - return 10 -} - -func (s *CheckDuplicateByHashPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - items, err := s.metadata.GetByHash(ctx, pCtx.AccountId, pCtx.ImageHash, helper.Addr(1)) - if err != nil { - return fmt.Errorf("error getting items by hash: %w", err) - } - if len(items) > 0 { - pCtx.Duplicate = items[0] - } - return nil -} - -func NewCheckDuplicateByHashPipelineStep(metadata storage.MetadataStorageService) ExtractPipelineStep { - return &CheckDuplicateByHashPipelineStep{metadata: metadata} -} diff --git a/storage-service/service/ExtractPipeline20ToJpeg.go b/storage-service/service/ExtractPipeline20ToJpeg.go deleted file mode 100644 index babcb71..0000000 --- a/storage-service/service/ExtractPipeline20ToJpeg.go +++ /dev/null @@ -1,29 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/ocr" -) - -type ToJpegPipelineStep struct { - imageConverter ocr.ImageConveter -} - -func (s *ToJpegPipelineStep) GetPos() int { - return 20 -} - -func (s *ToJpegPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - imgJpeg, err := s.imageConverter.ProcessOriginalImage(ctx, pCtx.ImageRaw) - if err != nil { - return fmt.Errorf("cannot process original image: %w", err) - } - pCtx.ImageRaw = imgJpeg - return nil -} - -func NewToJpegPipelineStep(imageConverter ocr.ImageConveter) ExtractPipelineStep { - return &ToJpegPipelineStep{imageConverter: imageConverter} -} diff --git a/storage-service/service/ExtractPipeline30CalcEmbedding.go b/storage-service/service/ExtractPipeline30CalcEmbedding.go deleted file mode 100644 index 3b2b0b2..0000000 --- a/storage-service/service/ExtractPipeline30CalcEmbedding.go +++ /dev/null @@ -1,29 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/ocr" -) - -type CalcEmbeddingPipelineStep struct { - imageEmbedder ocr.EmbeddingExtractor -} - -func (s *CalcEmbeddingPipelineStep) GetPos() int { - return 30 -} - -func (s *CalcEmbeddingPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - embedding, err := s.imageEmbedder.GetImageEmbeddingV1(ctx, pCtx.ImageRaw) - if err != nil { - return fmt.Errorf("error getting image embedding: %w", err) - } - pCtx.ImageEmbedding = *embedding - return nil -} - -func NewCalcEmbeddingPipelineStep(imageEmbedder ocr.EmbeddingExtractor) ExtractPipelineStep { - return &CalcEmbeddingPipelineStep{imageEmbedder: imageEmbedder} -} diff --git a/storage-service/service/ExtractPipeline40CheckDuplicateByEmbedding.go b/storage-service/service/ExtractPipeline40CheckDuplicateByEmbedding.go deleted file mode 100644 index a7efe15..0000000 --- a/storage-service/service/ExtractPipeline40CheckDuplicateByEmbedding.go +++ /dev/null @@ -1,32 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/storage" -) - -type CheckDuplicateByEmbeddingPipelineStep struct { - metadata storage.MetadataStorageService -} - -func (s *CheckDuplicateByEmbeddingPipelineStep) GetPos() int { - return 40 -} - -func (s *CheckDuplicateByEmbeddingPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - - items, err := s.metadata.SearchByEmbeddingV1(ctx, pCtx.AccountId, pCtx.ImageEmbedding, 1, true) - if err != nil { - return fmt.Errorf("error getting items by embedding: %w", err) - } - if len(items) > 0 { - pCtx.Duplicate = items[0] - } - return nil -} - -func NewCheckDuplicateByEmbeddingPipelineStep(metadata storage.MetadataStorageService) ExtractPipelineStep { - return &CheckDuplicateByEmbeddingPipelineStep{metadata: metadata} -} diff --git a/storage-service/service/ExtractPipeline50OcrImage.go b/storage-service/service/ExtractPipeline50OcrImage.go deleted file mode 100644 index c469bd1..0000000 --- a/storage-service/service/ExtractPipeline50OcrImage.go +++ /dev/null @@ -1,29 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/ocr" -) - -type OcrImagePipelineStep struct { - image2text ocr.TextExtractor -} - -func (s *OcrImagePipelineStep) GetPos() int { - return 50 -} - -func (s *OcrImagePipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - ocrResult, err := s.image2text.DoOcr(ctx, pCtx.ImageRaw) - if err != nil { - return fmt.Errorf("error ocring image: %w", err) - } - pCtx.ImageOcrResult = ocrResult - return nil -} - -func NewOcrImagePipelineStep(image2text ocr.TextExtractor) ExtractPipelineStep { - return &OcrImagePipelineStep{image2text: image2text} -} diff --git a/storage-service/service/ExtractPipeline60CreateThumbnail.go b/storage-service/service/ExtractPipeline60CreateThumbnail.go deleted file mode 100644 index 3105f83..0000000 --- a/storage-service/service/ExtractPipeline60CreateThumbnail.go +++ /dev/null @@ -1,29 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/ocr" -) - -type CreateThumbnailPipelineStep struct { - imageConverter ocr.ImageConveter -} - -func (s *CreateThumbnailPipelineStep) GetPos() int { - return 60 -} - -func (s *CreateThumbnailPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - imgThumb, err := s.imageConverter.MakeThumbnail(ctx, pCtx.ImageRaw) - if err != nil { - return fmt.Errorf("cannot create thumbnail: %w", err) - } - pCtx.ImageThumbnail = imgThumb - return nil -} - -func NewCreateThumbnailPipelineStep(imageConverter ocr.ImageConveter) ExtractPipelineStep { - return &CreateThumbnailPipelineStep{imageConverter: imageConverter} -} diff --git a/storage-service/service/ExtractPipeline70CalcSizes.go b/storage-service/service/ExtractPipeline70CalcSizes.go deleted file mode 100644 index 10d8366..0000000 --- a/storage-service/service/ExtractPipeline70CalcSizes.go +++ /dev/null @@ -1,41 +0,0 @@ -package service - -import ( - "context" - "fmt" - - "github.com/weoses/memelo/storage-service/entity" - "github.com/weoses/memelo/storage-service/ocr" -) - -type CalcSizesPipelineStep struct { - imageConverter ocr.ImageConveter -} - -func (s *CalcSizesPipelineStep) GetPos() int { - return 70 -} - -func (s *CalcSizesPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - if pCtx.ImageThumbnail == nil { - return fmt.Errorf("error: image thumbnail can't be nil") - } - - wRaw, hRaw, err := s.imageConverter.GetSize(ctx, pCtx.ImageRaw) - if err != nil { - return fmt.Errorf("error getting size of raw image: %w", err) - } - - wThumb, hThumb, err := s.imageConverter.GetSize(ctx, pCtx.ImageThumbnail) - if err != nil { - return fmt.Errorf("error getting size of thumbnail: %w", err) - } - - pCtx.ImageRawSize = entity.ElasticSizes{Width: wRaw, Height: hRaw} - pCtx.ImageThumbnailSize = entity.ElasticSizes{Width: wThumb, Height: hThumb} - return nil -} - -func NewCalcSizesPipelineStep(imageConverter ocr.ImageConveter) ExtractPipelineStep { - return &CalcSizesPipelineStep{imageConverter: imageConverter} -} diff --git a/storage-service/service/ExtractPipeline80CalcTags.go b/storage-service/service/ExtractPipeline80CalcTags.go deleted file mode 100644 index ba8c57b..0000000 --- a/storage-service/service/ExtractPipeline80CalcTags.go +++ /dev/null @@ -1,28 +0,0 @@ -package service - -import ( - "context" - - "github.com/weoses/memelo/storage-service/storage" -) - -type CalcTagsPipelineStep struct { - tagStorage storage.ElasticTagStorage -} - -func (s *CalcTagsPipelineStep) GetPos() int { - return 80 -} - -func (s *CalcTagsPipelineStep) Do(ctx context.Context, pCtx *ImageMetadataPipelineContext) error { - tags, err := s.tagStorage.SearchTagsByEmbedding(ctx, pCtx.AccountId, pCtx.ImageEmbedding, 0.8, 0.0) - if err != nil { - return err - } - pCtx.Tags = tags - return nil -} - -func NewCalcTagsPipelineStep(tagStorage storage.ElasticTagStorage) ExtractPipelineStep { - return &CalcTagsPipelineStep{tagStorage: tagStorage} -} diff --git a/storage-service/service/ExtractPipelineStep.go b/storage-service/service/ExtractPipelineStep.go deleted file mode 100644 index 6ce1717..0000000 --- a/storage-service/service/ExtractPipelineStep.go +++ /dev/null @@ -1,25 +0,0 @@ -package service - -import ( - "context" - - "github.com/google/uuid" - "github.com/weoses/memelo/storage-service/entity" -) - -type ExtractPipelineStep interface { - GetPos() int - Do(ctx context.Context, pipelineContext *ImageMetadataPipelineContext) error -} -type ImageMetadataPipelineContext struct { - AccountId uuid.UUID - ImageHash string - ImageEmbedding entity.ElasticEmbeddingV1 - ImageOcrResult string - ImageThumbnail []byte - ImageThumbnailSize entity.ElasticSizes - ImageRaw []byte - ImageRawSize entity.ElasticSizes - Duplicate *entity.ElasticImageMetaData - Tags []entity.ElasticTag -} diff --git a/storage-service/service/ImageExtractPipelineStep.go b/storage-service/service/ImageExtractPipelineStep.go new file mode 100644 index 0000000..729a092 --- /dev/null +++ b/storage-service/service/ImageExtractPipelineStep.go @@ -0,0 +1,90 @@ +package service + +import ( + "context" + "log/slog" + + "github.com/google/uuid" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/entity" +) + +type ExtractPipelineStep interface { + GetPos() int + GetAllowedPipelineTypes() []entity.MetadataType + Do(ctx context.Context, inputContext MetadataInputContext, pipelineContext *MetadataPipelineContext) error +} +type BasePipelineStep struct { + pos int + typ []entity.MetadataType +} + +func (bp *BasePipelineStep) GetPos() int { + return bp.pos +} + +func (bp *BasePipelineStep) GetAllowedPipelineTypes() []entity.MetadataType { + return bp.typ +} + +type MetadataInputContext struct { + RawInput temp.S3BackedData + AccountId uuid.UUID + Type entity.MetadataType +} + +type MetadataStorageArtifact struct { + Type SavedArtifactType + Data temp.S3BackedData +} + +func (m *MetadataStorageArtifact) Close() error { + if m.Data != nil { + tmpLogger := slog.With("service", "MetadataStorageArtifact") + helper.QuietClose(m.Data, tmpLogger) + } + return nil +} + +type MetadataPipelineContext struct { + Hash string + Embedding []entity.EmbeddingItem + Transcription string + StorageArtifacts []MetadataStorageArtifact + Duplicate *entity.ElasticImageMetaData + Tags []entity.ElasticTag + + ImageOriginalJpeg temp.S3BackedData + ImageOriginalSize entity.Sizes + ImageThumbnail temp.S3BackedData + ImageThumbnailSize entity.Sizes + + VideoMp4 temp.S3BackedData + VideoFrames []temp.S3BackedData + VideoAudio temp.S3BackedData + VideoThumbnail temp.S3BackedData + VideoThumbnailSizes entity.Sizes +} + +func (m *MetadataPipelineContext) Close() error { + tmpLogger := slog.With("service", "MetadataPipelineContext") + if m.StorageArtifacts != nil { + for i := range m.StorageArtifacts { + helper.QuietClose(&m.StorageArtifacts[i], tmpLogger) + } + } + + helper.QuietClose(m.ImageOriginalJpeg, tmpLogger) + helper.QuietClose(m.ImageThumbnail, tmpLogger) + + if m.VideoFrames != nil { + for i := range m.VideoFrames { + helper.QuietClose(m.VideoFrames[i], tmpLogger) + } + } + helper.QuietClose(m.VideoMp4, tmpLogger) + helper.QuietClose(m.VideoAudio, tmpLogger) + + return nil +} diff --git a/storage-service/service/ImageMetadataExtractService.go b/storage-service/service/ImageMetadataExtractService.go deleted file mode 100644 index 2d4bd12..0000000 --- a/storage-service/service/ImageMetadataExtractService.go +++ /dev/null @@ -1,42 +0,0 @@ -package service - -import ( - "context" - "fmt" - "slices" - - "github.com/google/uuid" -) - -type ImageMetadataExtractService interface { - ProcessCreate(ctx context.Context, accountId uuid.UUID, raw []byte, checkDup bool) (*ImageMetadataPipelineContext, error) -} - -type CreateImageServiceImpl struct { - steps []ExtractPipelineStep -} - -func (c *CreateImageServiceImpl) ProcessCreate(ctx context.Context, accountId uuid.UUID, raw []byte, checkDup bool) (*ImageMetadataPipelineContext, error) { - pipelineCtx := &ImageMetadataPipelineContext{ - AccountId: accountId, - ImageRaw: raw, - } - - for _, step := range c.steps { - if err := step.Do(ctx, pipelineCtx); err != nil { - return nil, fmt.Errorf("create pipeline: step failed (pos=%d): %w", step.GetPos(), err) - } - if checkDup && pipelineCtx.Duplicate != nil { - return pipelineCtx, nil - } - } - - return pipelineCtx, nil -} - -func NewImageMetadataExtractService(steps []ExtractPipelineStep) ImageMetadataExtractService { - slices.SortFunc(steps, func(a, b ExtractPipelineStep) int { - return a.GetPos() - b.GetPos() - }) - return &CreateImageServiceImpl{steps: steps} -} diff --git a/storage-service/service/MemeCrudService.go b/storage-service/service/MemeCrudService.go index d215a4c..6307376 100644 --- a/storage-service/service/MemeCrudService.go +++ b/storage-service/service/MemeCrudService.go @@ -2,12 +2,16 @@ package service import ( "context" + "errors" "fmt" "log/slog" "time" "github.com/google/uuid" + "github.com/minio/minio-go/v7" "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/entity" storage2 "github.com/weoses/memelo/storage-service/storage" ) @@ -36,24 +40,25 @@ type SearchResult struct { type MemeCrudService interface { SearchMeme(ctx context.Context, accountId uuid.UUID, query string, afterId *uuid.UUID, size *int) (*SearchResult, error) - CreateMeme(ctx context.Context, accountId uuid.UUID, imgRaw []byte) (*CreateResult, error) + CreateMeme(ctx context.Context, accountId uuid.UUID, typ entity.MetadataType, raw temp.S3BackedData) (*CreateResult, error) DeleteMeme(ctx context.Context, accountId uuid.UUID, id uuid.UUID) error DeleteAll(ctx context.Context, accountId uuid.UUID) error } type MemeCrudServiceImpl struct { - imageStorageService storage2.ImageStorageService + imageStorageService storage2.MediaStorageService metadataStorageService storage2.MetadataStorageService - imageExtractService ImageMetadataExtractService + metadataExtractService MetadataExtractService searchService SearchService slogger *slog.Logger } -func (m *MemeCrudServiceImpl) CreateMeme(ctx context.Context, accountId uuid.UUID, imgRaw []byte) (*CreateResult, error) { - pipelineResult, err := m.imageExtractService.ProcessCreate(ctx, accountId, imgRaw, true) +func (m *MemeCrudServiceImpl) CreateMeme(ctx context.Context, accountId uuid.UUID, mediaType entity.MetadataType, raw temp.S3BackedData) (*CreateResult, error) { + pipelineResult, err := m.metadataExtractService.Extract(ctx, accountId, mediaType, raw, true) if err != nil { - return nil, fmt.Errorf("metadata extract pipeline failed: %w", err) + return nil, fmt.Errorf("metadataService extract pipeline failed: %w", err) } + defer helper.QuietClose(pipelineResult, m.slogger) if pipelineResult.Duplicate != nil { results, err := m.constructMetadataWithUrls(ctx, []*entity.ElasticImageMetaData{pipelineResult.Duplicate}) @@ -69,23 +74,26 @@ func (m *MemeCrudServiceImpl) CreateMeme(ctx context.Context, accountId uuid.UUI s3id := uuid.New() imgId := uuid.New() - - err = m.imageStorageService.Save(ctx, s3id, pipelineResult.ImageRaw, pipelineResult.ImageThumbnail) - if err != nil { - return nil, fmt.Errorf("save image files failed: %w", err) + for i := range pipelineResult.StorageArtifacts { + artifact := pipelineResult.StorageArtifacts[i] + err = m.imageStorageService.Save(ctx, s3id, storageMediaType(mediaType, artifact.Type), artifact.Data) + if err != nil { + return nil, fmt.Errorf("save artifact with type %s failed: %w", artifact.Type, err) + } } metadataEntity := &entity.ElasticImageMetaData{ - ImageId: imgId, - S3Id: s3id, - AccountId: accountId, - Result: pipelineResult.ImageOcrResult, - Hash: pipelineResult.ImageHash, - EmbeddingV1: &pipelineResult.ImageEmbedding, - ImageSize: &pipelineResult.ImageRawSize, - ThumbSize: &pipelineResult.ImageThumbnailSize, - Created: time.Now().UnixMicro(), - Updated: time.Now().UnixMicro(), + ImageId: imgId, + Type: mediaType, + S3Id: s3id, + AccountId: accountId, + Result: pipelineResult.Transcription, + Hash: pipelineResult.Hash, + EmbeddingList: pipelineResult.Embedding, + ImageSize: &pipelineResult.ImageOriginalSize, + ThumbSize: &pipelineResult.ImageThumbnailSize, + Created: time.Now().UnixMicro(), + Updated: time.Now().UnixMicro(), Tags: helper.TransformSlice( pipelineResult.Tags, make([]string, len(pipelineResult.Tags)), @@ -95,7 +103,7 @@ func (m *MemeCrudServiceImpl) CreateMeme(ctx context.Context, accountId uuid.UUI } err = m.metadataStorageService.Save(ctx, metadataEntity) if err != nil { - return nil, fmt.Errorf("save metadata failed: %w", err) + return nil, fmt.Errorf("save metadataService failed: %w", err) } entities, err := m.constructMetadataWithUrls(ctx, []*entity.ElasticImageMetaData{metadataEntity}) @@ -129,18 +137,20 @@ func (m *MemeCrudServiceImpl) SearchMeme(ctx context.Context, accountId uuid.UUI func (m *MemeCrudServiceImpl) DeleteMeme(ctx context.Context, accountId uuid.UUID, id uuid.UUID) error { metadata, err := m.metadataStorageService.GetById(ctx, accountId, id) if err != nil { - return fmt.Errorf("get metadata failed: %w", err) + return fmt.Errorf("get metadataService failed: %w", err) } if metadata == nil { return fmt.Errorf("meme not found: %s", id) } - if err = m.imageStorageService.DeleteImage(ctx, metadata.S3Id); err != nil { + err1 := m.imageStorageService.Delete(ctx, metadata.S3Id, storageMediaType(metadata.Type, SavedOriginal)) + err2 := m.imageStorageService.Delete(ctx, metadata.S3Id, storageMediaType(metadata.Type, SavedThumb)) + if err := errors.Join(err1, err2); err != nil { return fmt.Errorf("delete image failed: %w", err) } if err = m.metadataStorageService.DeleteById(ctx, accountId, id); err != nil { - return fmt.Errorf("delete metadata failed: %w", err) + return fmt.Errorf("delete metadataService failed: %w", err) } return nil @@ -157,8 +167,21 @@ func (m *MemeCrudServiceImpl) DeleteAll(ctx context.Context, accountId uuid.UUID } for _, meta := range results { - if err := m.imageStorageService.DeleteImage(ctx, meta.S3Id); err != nil { - return fmt.Errorf("delete image %s failed: %w", meta.S3Id, err) + errMinioOrig := m.imageStorageService.Delete(ctx, meta.S3Id, storageMediaType(meta.Type, SavedOriginal)) + errMinioThumb := m.imageStorageService.Delete(ctx, meta.S3Id, storageMediaType(meta.Type, SavedThumb)) + + var minioError minio.ErrorResponse + if errMinioOrig != nil && (!errors.As(errMinioOrig, &minioError) || minioError.Code != "NoSuchKey") { + return fmt.Errorf("delete image %s failed: %w", meta.S3Id, errMinioOrig) + } + + if errMinioThumb != nil && (!errors.As(errMinioThumb, &minioError) || minioError.Code != "NoSuchKey") { + return fmt.Errorf("delete image %s failed: %w", meta.S3Id, errMinioThumb) + } + + errElastic := m.metadataStorageService.DeleteById(ctx, accountId, meta.ImageId) + if errElastic != nil { + return fmt.Errorf("delete elastic %s failed: %w", meta.ImageId, errElastic) } } @@ -168,11 +191,6 @@ func (m *MemeCrudServiceImpl) DeleteAll(ctx context.Context, accountId uuid.UUID afterId = &results[len(results)-1].ImageId } - - if err := m.metadataStorageService.DeleteByAccountId(ctx, accountId); err != nil { - return fmt.Errorf("delete metadata failed: %w", err) - } - return nil } @@ -181,12 +199,12 @@ func (m *MemeCrudServiceImpl) constructMetadataWithUrls(ctx context.Context, ela for i, elasticDataObject := range elasticData { - urlOriginal, err := m.imageStorageService.GetUrl(ctx, elasticDataObject.S3Id) + urlOriginal, err := m.imageStorageService.GetUrl(ctx, elasticDataObject.S3Id, storageMediaType(elasticDataObject.Type, SavedOriginal)) if err != nil { return nil, fmt.Errorf("get original url by %s failed: %w", elasticDataObject.ImageId, err) } - urlThumb, err := m.imageStorageService.GetUrlThumb(ctx, elasticDataObject.S3Id) + urlThumb, err := m.imageStorageService.GetUrl(ctx, elasticDataObject.S3Id, storageMediaType(elasticDataObject.Type, SavedThumb)) if err != nil { return nil, fmt.Errorf("get thumbnail url by %s failed: %w", elasticDataObject.ImageId, err) } @@ -201,15 +219,15 @@ func (m *MemeCrudServiceImpl) constructMetadataWithUrls(ctx context.Context, ela } func NewMemeCrudService( - imageStore storage2.ImageStorageService, + imageStore storage2.MediaStorageService, metadataStore storage2.MetadataStorageService, - imageMetadataExtract ImageMetadataExtractService, + imageMetadataExtract MetadataExtractService, searchService SearchService, ) MemeCrudService { return &MemeCrudServiceImpl{ imageStorageService: imageStore, metadataStorageService: metadataStore, - imageExtractService: imageMetadataExtract, + metadataExtractService: imageMetadataExtract, searchService: searchService, slogger: slog.With("service", "MemeCrudService"), } diff --git a/storage-service/service/MetadataExtractService.go b/storage-service/service/MetadataExtractService.go new file mode 100644 index 0000000..4cdd389 --- /dev/null +++ b/storage-service/service/MetadataExtractService.go @@ -0,0 +1,58 @@ +package service + +import ( + "context" + "fmt" + "log/slog" + "slices" + + "github.com/google/uuid" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" + + "github.com/weoses/memelo/storage-service/entity" +) + +type MetadataExtractService interface { + Extract(ctx context.Context, accountId uuid.UUID, typ entity.MetadataType, raw temp.S3BackedData, checkDup bool) (*MetadataPipelineContext, error) +} + +type MetadataExtractServiceImpl struct { + steps []ExtractPipelineStep + slogger *slog.Logger +} + +func (c *MetadataExtractServiceImpl) Extract(ctx context.Context, accountId uuid.UUID, typ entity.MetadataType, raw temp.S3BackedData, checkDup bool) (*MetadataPipelineContext, error) { + pipelineCtx := &MetadataPipelineContext{} + inputCtx := MetadataInputContext{ + AccountId: accountId, + Type: typ, + RawInput: raw, + } + + for _, step := range c.steps { + if !slices.Contains(step.GetAllowedPipelineTypes(), typ) { + continue + } + + if err := step.Do(ctx, inputCtx, pipelineCtx); err != nil { + helper.QuietClose(pipelineCtx, c.slogger) + return nil, fmt.Errorf("create pipeline: step failed (pos=%d): %w", step.GetPos(), err) + } + if checkDup && pipelineCtx.Duplicate != nil { + return pipelineCtx, nil + } + } + + return pipelineCtx, nil +} + +func NewImageMetadataExtractService(steps []ExtractPipelineStep) MetadataExtractService { + slices.SortFunc(steps, func(a, b ExtractPipelineStep) int { + return a.GetPos() - b.GetPos() + }) + return &MetadataExtractServiceImpl{ + steps: steps, + slogger: slog.With("service", "MetadataExtractService"), + } +} diff --git a/storage-service/service/RecomputeService.go b/storage-service/service/RecomputeService.go index e3047fa..7f6b3ff 100644 --- a/storage-service/service/RecomputeService.go +++ b/storage-service/service/RecomputeService.go @@ -7,6 +7,7 @@ import ( "github.com/google/uuid" "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/service" "github.com/weoses/memelo/storage-service/entity" storage2 "github.com/weoses/memelo/storage-service/storage" ) @@ -25,9 +26,10 @@ type RecomputeService interface { type RecomputeServiceImpl struct { slogger *slog.Logger - extractService ImageMetadataExtractService + extractService MetadataExtractService metadataStorageService storage2.MetadataStorageService - imageStorageService storage2.ImageStorageService + imageStorageService storage2.MediaStorageService + tmpDataService service.TmpDataService } func (r *RecomputeServiceImpl) Recompute( @@ -42,7 +44,7 @@ func (r *RecomputeServiceImpl) Recompute( for { page, err := r.metadataStorageService.List(ctx, accountId, id, afterId, &pageSize) if err != nil { - return fmt.Errorf("export: query metadata page failed: %w", err) + return fmt.Errorf("export: query metadataService page failed: %w", err) } if len(page) == 0 { break @@ -76,58 +78,68 @@ func (r *RecomputeServiceImpl) Recompute( } func (r *RecomputeServiceImpl) recomputeOne(ctx context.Context, data *entity.ElasticImageMetaData) error { - rawImg, err := r.imageStorageService.GetImageBytes(ctx, data.S3Id) + rawImg, err := r.imageStorageService.Read(ctx, data.S3Id, storageMediaType(data.Type, SavedOriginal)) + defer helper.QuietClose(rawImg, r.slogger) if err != nil { return fmt.Errorf("recompute: get image bytes failed: %w", err) } - resultCtx, err := r.extractService.ProcessCreate(ctx, + rawImgS3Backed, err := r.tmpDataService.WrapData(ctx, rawImg) + defer helper.QuietClose(rawImgS3Backed, r.slogger) + if err != nil { + return fmt.Errorf("recompute: wrap data failed: %w", err) + } + + pipelineResult, err := r.extractService.Extract(ctx, data.AccountId, - rawImg, + data.Type, + rawImgS3Backed, false, ) - if err != nil { return fmt.Errorf("recompute: process create failed: %w", err) } - data.Hash = resultCtx.ImageHash - data.Result = resultCtx.ImageOcrResult - if resultCtx.ImageRaw != nil && resultCtx.ImageThumbnail != nil { - err = r.imageStorageService.Save(ctx, data.S3Id, resultCtx.ImageRaw, resultCtx.ImageThumbnail) + defer helper.QuietClose(pipelineResult, r.slogger) + + data.Hash = pipelineResult.Hash + data.Result = pipelineResult.Transcription + for i := range pipelineResult.StorageArtifacts { + artifact := pipelineResult.StorageArtifacts[i] + err = r.imageStorageService.Save(ctx, data.S3Id, storageMediaType(data.Type, artifact.Type), artifact.Data) if err != nil { - return fmt.Errorf("recompute: save image failed: %w", err) + return fmt.Errorf("save artifact with type %s failed: %w", artifact.Type, err) } } - data.ImageSize = &entity.ElasticSizes{ - Width: resultCtx.ImageRawSize.Width, - Height: resultCtx.ImageRawSize.Height, + data.ImageSize = &entity.Sizes{ + Width: pipelineResult.ImageOriginalSize.Width, + Height: pipelineResult.ImageOriginalSize.Height, } - data.ThumbSize = &entity.ElasticSizes{ - Width: resultCtx.ImageThumbnailSize.Width, - Height: resultCtx.ImageThumbnailSize.Height, + data.ThumbSize = &entity.Sizes{ + Width: pipelineResult.ImageThumbnailSize.Width, + Height: pipelineResult.ImageThumbnailSize.Height, } - data.EmbeddingV1 = &resultCtx.ImageEmbedding + data.EmbeddingList = pipelineResult.Embedding data.Tags = helper.TransformSlice( - resultCtx.Tags, - make([]string, len(resultCtx.Tags)), + pipelineResult.Tags, + make([]string, len(pipelineResult.Tags)), func(tag entity.ElasticTag) string { return tag.Tag }) err = r.metadataStorageService.Save(ctx, data) if err != nil { - return fmt.Errorf("recompute: save metadata failed: %w", err) + return fmt.Errorf("recompute: save metadataService failed: %w", err) } return nil } func NewRecomputeService( - extractService ImageMetadataExtractService, + extractService MetadataExtractService, metadataStorageService storage2.MetadataStorageService, - imageStorageService storage2.ImageStorageService, + imageStorageService storage2.MediaStorageService, ) RecomputeService { return &RecomputeServiceImpl{ slogger: slog.With("service", "RecomputeService"), diff --git a/storage-service/service/SearchPipeline20FuzzySearcher.go b/storage-service/service/SearchPipeline20FuzzySearcher.go index 3e1b249..2bec32a 100644 --- a/storage-service/service/SearchPipeline20FuzzySearcher.go +++ b/storage-service/service/SearchPipeline20FuzzySearcher.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/google/uuid" + "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/entity" "github.com/weoses/memelo/storage-service/storage" ) @@ -12,7 +13,8 @@ import ( type FuzzySearcher struct { SearcherBase - metadata storage.MetadataStorageService + metadata storage.MetadataStorageService + searchConfig *conf.SearchConfig } func (s FuzzySearcher) Search(ctx context.Context, accountId uuid.UUID, query string, afterId *uuid.UUID, size *int) ([]*entity.ElasticImageMetaData, error) { @@ -27,6 +29,7 @@ func (s FuzzySearcher) Search(ctx context.Context, accountId uuid.UUID, query st ctx, accountId, query, + s.searchConfig.Fuzziness, size, ) @@ -41,9 +44,10 @@ func (s FuzzySearcher) Search(ctx context.Context, accountId uuid.UUID, query st return matchedMetadataAll, nil } -func NewFuzzySearcher(m storage.MetadataStorageService) SearchPipelineStep { +func NewFuzzySearcher(m storage.MetadataStorageService, config *conf.Config) SearchPipelineStep { return &FuzzySearcher{ SearcherBase: SearcherBase{Name: "fuzzy_searcher", Index: 20}, metadata: m, + searchConfig: config.Search, } } diff --git a/storage-service/service/SearchPipeline30TextEmbeddingSearcher.go b/storage-service/service/SearchPipeline30TextEmbeddingSearcher.go index a4a5df3..5e80a4c 100644 --- a/storage-service/service/SearchPipeline30TextEmbeddingSearcher.go +++ b/storage-service/service/SearchPipeline30TextEmbeddingSearcher.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/google/uuid" + "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/entity" "github.com/weoses/memelo/storage-service/ocr" "github.com/weoses/memelo/storage-service/storage" @@ -13,8 +14,9 @@ import ( type TextEmbeddingSearcher struct { SearcherBase - metadata storage.MetadataStorageService - embedder ocr.EmbeddingExtractor + metadata storage.MetadataStorageService + embedder ocr.EmbeddingExtractor + searchConfig *conf.SearchConfig } func (s TextEmbeddingSearcher) Search(ctx context.Context, accountId uuid.UUID, query string, afterId *uuid.UUID, size *int) ([]*entity.ElasticImageMetaData, error) { @@ -25,14 +27,12 @@ func (s TextEmbeddingSearcher) Search(ctx context.Context, accountId uuid.UUID, return make([]*entity.ElasticImageMetaData, 0), nil } - embedding, err := s.embedder.GetTextEmbeddingV1(ctx, query) + embedding, err := s.embedder.GetTextEmbedding(ctx, query) if err != nil { - return nil, fmt.Errorf("searcher %s: GetTextEmbeddingV1 failed: %w", s.GetName(), err) + return nil, fmt.Errorf("searcher %s: GetTextEmbedding failed: %w", s.GetName(), err) } - count := 5 - - results, err := s.metadata.SearchByEmbeddingV1(ctx, accountId, *embedding, count, false) + results, err := s.metadata.SearchByEmbeddingV1(ctx, accountId, *embedding, size, s.searchConfig.SemanticTextSearchThreshold) if err != nil { return nil, fmt.Errorf("searcher %s: SearchByEmbeddingV1 failed: %w", s.GetName(), err) } @@ -40,10 +40,11 @@ func (s TextEmbeddingSearcher) Search(ctx context.Context, accountId uuid.UUID, return results, nil } -func NewTextEmbeddingSearcher(m storage.MetadataStorageService, e ocr.EmbeddingExtractor) SearchPipelineStep { +func NewTextEmbeddingSearcher(m storage.MetadataStorageService, e ocr.EmbeddingExtractor, cfg *conf.Config) SearchPipelineStep { return &TextEmbeddingSearcher{ SearcherBase: SearcherBase{Name: "text_embedding_searcher", Index: 30}, metadata: m, embedder: e, + searchConfig: cfg.Search, } } diff --git a/storage-service/service/SearchService.go b/storage-service/service/SearchService.go index d590b88..cc76470 100644 --- a/storage-service/service/SearchService.go +++ b/storage-service/service/SearchService.go @@ -24,7 +24,13 @@ type SearchServiceImpl struct { slogger *slog.Logger } -func (m *SearchServiceImpl) Search(ctx context.Context, accountId uuid.UUID, query string, afterId *uuid.UUID, size *int) (*SearchServiceResponse, error) { +func (m *SearchServiceImpl) Search( + ctx context.Context, + accountId uuid.UUID, + query string, + afterId *uuid.UUID, + size *int, +) (*SearchServiceResponse, error) { selectedSearcherName := "" selectedElasticData := make([]*entity.ElasticImageMetaData, 0) for _, searcher := range m.searchers { diff --git a/storage-service/service/TagMetadataExtractService.go b/storage-service/service/TagMetadataExtractService.go index 899e272..00deff6 100644 --- a/storage-service/service/TagMetadataExtractService.go +++ b/storage-service/service/TagMetadataExtractService.go @@ -12,7 +12,7 @@ import ( type TagMetadataPipelineContext struct { Name string Description string - Embedding *entity.ElasticEmbeddingV1 + Embedding *entity.EmbeddingItem } type TagMetadataExtractService interface { @@ -29,7 +29,7 @@ func (s *TagMetadataExtractServiceImpl) ProcessTagMetadata(ctx context.Context, s.slogger.InfoContext(ctx, "ProcessTagMetadata: computing embedding", "name", name) - embedding, err := s.embedder.GetTextEmbeddingV1(ctx, text) + embedding, err := s.embedder.GetTextEmbedding(ctx, text) if err != nil { return nil, fmt.Errorf("get text embedding failed: %w", err) } diff --git a/storage-service/service/TagService.go b/storage-service/service/TagService.go index 0207d2f..9a8f45e 100644 --- a/storage-service/service/TagService.go +++ b/storage-service/service/TagService.go @@ -40,7 +40,7 @@ func (s *TagServiceImpl) CreateTag(ctx context.Context, accountId uuid.UUID, tag metadata, err := s.tagMetadataExtractService.ProcessTagMetadata(ctx, tag, description) if err != nil { - return nil, fmt.Errorf("process tag metadata failed: %w", err) + return nil, fmt.Errorf("process tag metadataService failed: %w", err) } tagEntity := entity.ElasticTag{ @@ -48,7 +48,7 @@ func (s *TagServiceImpl) CreateTag(ctx context.Context, accountId uuid.UUID, tag AccountId: accountId, Tag: tag, Description: description, - EmbeddingV1: metadata.Embedding, + Embedding: metadata.Embedding, Created: time.Now().UnixMicro(), } diff --git a/storage-service/service/util.go b/storage-service/service/util.go new file mode 100644 index 0000000..91683f5 --- /dev/null +++ b/storage-service/service/util.go @@ -0,0 +1,33 @@ +package service + +import ( + "github.com/weoses/memelo/storage-service/entity" + "github.com/weoses/memelo/storage-service/storage" +) + +type SavedArtifactType string + +const SavedOriginal SavedArtifactType = "original" +const SavedThumb SavedArtifactType = "thumbnail" + +func storageMediaType(metadataType entity.MetadataType, savedItem SavedArtifactType) storage.MediaType { + if metadataType == entity.VideoMetadataType { + if savedItem == SavedOriginal { + return storage.VideoMp4Original + } + if savedItem == SavedThumb { + return storage.VideoMp4ThumbV1 + } + } + + if metadataType == entity.ImageMetadataType { + if savedItem == SavedOriginal { + return storage.ImageJpegOriginal + } + if savedItem == SavedThumb { + return storage.ImageJpegThumbV1 + } + } + + panic("Unsupported metadata type") +} diff --git a/storage-service/storage/ElasticTagStorage.go b/storage-service/storage/ElasticTagStorage.go index cd53974..0c64375 100644 --- a/storage-service/storage/ElasticTagStorage.go +++ b/storage-service/storage/ElasticTagStorage.go @@ -2,10 +2,12 @@ package storage import ( "context" + "embed" "encoding/json" "fmt" "log/slog" "sort" + "strconv" "time" elasticsearch8 "github.com/elastic/go-elasticsearch/v8" @@ -18,21 +20,28 @@ import ( "github.com/weoses/memelo/storage-service/entity" ) +//go:embed migrations/tags +var tagMigrationFS embed.FS + type ElasticTagStorage interface { SaveTag(ctx context.Context, tag entity.ElasticTag) error ListTag(ctx context.Context, accountId uuid.UUID, queryName *string, queryDescription *string) ([]entity.ElasticTag, error) DeleteTag(ctx context.Context, accountId uuid.UUID, id uuid.UUID) error DeleteAllTags(ctx context.Context, accountId uuid.UUID) error - SearchTagsByEmbedding(ctx context.Context, accountId uuid.UUID, tag entity.ElasticEmbeddingV1, percentileMatch float32, threshold float32) ([]entity.ElasticTag, error) + SearchTagsByEmbedding(ctx context.Context, accountId uuid.UUID, tag entity.EmbeddingItem, percentileMatch float32, threshold float32) ([]entity.ElasticTag, error) } type ElasticTagStorageImpl struct { + *ElasticMigrator + client *elasticsearch8.TypedClient index string slogger *slog.Logger } -func NewElasticTagStorage(config *conf.ElasticTagConfig) (ElasticTagStorage, error) { +func NewElasticTagStorage(cfg *conf.Config) (ElasticTagStorage, error) { + config := cfg.TagDb + configEmbeddings := cfg.Embeddings es8, _ := elasticsearch8.NewTypedClient(*config.Elastic) logger := slog.With("service", "ElasticTagStorage") indexExists, err := es8.Indices. @@ -54,30 +63,25 @@ func NewElasticTagStorage(config *conf.ElasticTagConfig) (ElasticTagStorage, err "error", err) } - indexTypeMapping := types.NewTypeMapping() - indexTypeMapping.Properties["Created"] = types.NewLongNumberProperty() - indexTypeMapping.Properties["Updated"] = types.NewLongNumberProperty() - indexTypeMapping.Properties["AccountId"] = types.NewKeywordProperty() - indexTypeMapping.Properties["Tag"] = types.NewKeywordProperty() - - denseProp := types.NewDenseVectorProperty() - denseProp.Index = helper.Addr(true) - denseProp.Dims = helper.Addr(config.EmbeddingV1Dimensions) - denseProp.Similarity = helper.Addr("cosine") - indexTypeMapping.Properties["EmbeddingV1.Data"] = denseProp - - responseMapping, err := es8.Indices.PutMapping(config.Index). - Properties(indexTypeMapping.Properties). - Do(context.Background()) - - logger.InfoContext(context.Background(), "Elastic create mapping index", - "response", render.Render(responseMapping), - "error", err) + migrator, err := NewElasticMigrator( + config.Elastic, + tagMigrationFS, "migrations/tags", + config.Index, MigrationHistoryIndex, + map[string]string{ + "index": config.Index, + "dims": strconv.Itoa(configEmbeddings.Dimensions), + }, + logger, + ) + if err != nil { + return nil, fmt.Errorf("create tag migrator failed: %w", err) + } return &ElasticTagStorageImpl{ - client: es8, - index: config.Index, - slogger: logger, + ElasticMigrator: migrator, + client: es8, + index: config.Index, + slogger: logger, }, nil } @@ -190,9 +194,9 @@ func (s *ElasticTagStorageImpl) DeleteAllTags(ctx context.Context, accountId uui return nil } -func (s *ElasticTagStorageImpl) SearchTagsByEmbedding(ctx context.Context, accountId uuid.UUID, tag entity.ElasticEmbeddingV1, percentileMatch float32, threshold float32) ([]entity.ElasticTag, error) { +func (s *ElasticTagStorageImpl) SearchTagsByEmbedding(ctx context.Context, accountId uuid.UUID, tag entity.EmbeddingItem, percentileMatch float32, threshold float32) ([]entity.ElasticTag, error) { script := types.NewScript() - script.Source = helper.Addr("cosineSimilarity(params.queryVector, 'EmbeddingV1.Data') + 1.0") + script.Source = helper.Addr("cosineSimilarity(params.queryVector, 'Embedding.Data') + 1.0") items, err := json.Marshal(tag.Data) if err != nil { diff --git a/storage-service/storage/ImageStorageService.go b/storage-service/storage/ImageStorageService.go deleted file mode 100644 index 82c62c3..0000000 --- a/storage-service/storage/ImageStorageService.go +++ /dev/null @@ -1,181 +0,0 @@ -package storage - -import ( - "bytes" - "context" - "errors" - "fmt" - "io" - "log/slog" - "net/url" - "time" - - "github.com/google/uuid" - "github.com/minio/minio-go/v7" - "github.com/minio/minio-go/v7/pkg/credentials" - "github.com/weoses/memelo/storage-service/conf" -) - -type ImageStorageService interface { - Save(ctx context.Context, id uuid.UUID, imgRaw []byte, imgThumbnail []byte) error - GetImageBytes(ctx context.Context, id uuid.UUID) ([]byte, error) - GetImageThumbBytes(ctx context.Context, id uuid.UUID) ([]byte, error) - - GetUrl(ctx context.Context, id uuid.UUID) (string, error) - GetUrlThumb(ctx context.Context, id uuid.UUID) (string, error) - DeleteImage(ctx context.Context, id uuid.UUID) error -} - -type MinioFileStorageServiceImpl struct { - client minio.Client - bucketName string - slogger *slog.Logger -} - -func (m *MinioFileStorageServiceImpl) DeleteImage(ctx context.Context, id uuid.UUID) error { - err1 := m.client.RemoveObject(ctx, m.bucketName, getObjectNameV1(id, false), minio.RemoveObjectOptions{}) - err2 := m.client.RemoveObject(ctx, m.bucketName, getObjectNameV1(id, true), minio.RemoveObjectOptions{}) - - return errors.Join(err1, err2) -} - -// GetImageBytes implements ImageStorageService. -func (m *MinioFileStorageServiceImpl) GetImageBytes(ctx context.Context, id uuid.UUID) ([]byte, error) { - return m.getObjectBytes(ctx, getObjectNameV1(id, false)) -} - -// GetImageThumbBytes implements ImageStorageService. -func (m *MinioFileStorageServiceImpl) GetImageThumbBytes(ctx context.Context, id uuid.UUID) ([]byte, error) { - return m.getObjectBytes(ctx, getObjectNameV1(id, true)) -} - -func (m *MinioFileStorageServiceImpl) getObjectBytes(ctx context.Context, objectName string) ([]byte, error) { - obj, err := m.client.GetObject(ctx, m.bucketName, objectName, minio.GetObjectOptions{}) - if err != nil { - return nil, err - } - defer func(obj *minio.Object) { - err2 := obj.Close() - if err2 != nil { - m.slogger.Error("failed to close minio object", "error", err2) - } - }(obj) - return io.ReadAll(obj) -} - -// GetUrl implements ImageStorageService. -func (m *MinioFileStorageServiceImpl) GetUrl(ctx context.Context, id uuid.UUID) (string, error) { - u, err := m.client.PresignedGetObject( - ctx, - m.bucketName, - getObjectNameV1(id, false), - time.Hour*5, - url.Values{}, - ) - - if err != nil { - return "", err - } - return u.String(), err -} - -// GetUrlThumb implements ImageStorageService. -func (m *MinioFileStorageServiceImpl) GetUrlThumb(ctx context.Context, id uuid.UUID) (string, error) { - u, err := m.client.PresignedGetObject( - ctx, - m.bucketName, - getObjectNameV1(id, true), - time.Hour*5, - url.Values{}, - ) - - if err != nil { - return "", err - } - - return u.String(), err -} - -// Save implements ImageStorageService. -func (m *MinioFileStorageServiceImpl) Save(ctx context.Context, id uuid.UUID, image []byte, imgThumbnail []byte) error { - _, err := m.client.PutObject( - ctx, - m.bucketName, - getObjectNameV1(id, false), - bytes.NewReader(image), - -1, - minio.PutObjectOptions{ - ContentType: "image/jpeg", - }, - ) - - if err != nil { - return fmt.Errorf("PutObject failed for source doc: %w", err) - } - - _, err = m.client.PutObject( - ctx, - m.bucketName, - getObjectNameV1(id, true), - bytes.NewReader(imgThumbnail), - -1, - minio.PutObjectOptions{ - ContentType: "image/jpeg", - }, - ) - - if err != nil { - return fmt.Errorf("PutObject failed for thumb doc: %w", err) - } - - return err -} - -func NewMinioFileStorageServiceImpl(config *conf.ImageStorageConfig) (ImageStorageService, error) { - - minioClient, err := minio.New(config.S3.Endpoint, &minio.Options{ - Creds: credentials.NewStaticV4( - config.S3.AccessKey, - config.S3.SecretKey, - ""), - Secure: config.S3.Secure, - }) - - if err != nil { - return nil, fmt.Errorf("failed to create minio client: %w", err) - } - - exists, err := minioClient.BucketExists(context.Background(), config.S3.Bucket) - - if err != nil { - return nil, fmt.Errorf("failed to check if bucket exists: %w", err) - } - - if !exists { - err = minioClient.MakeBucket(context.Background(), config.S3.Bucket, minio.MakeBucketOptions{}) - if err != nil { - return nil, fmt.Errorf("failed to create minio bucket: %w", err) - } - } - return &MinioFileStorageServiceImpl{ - bucketName: config.S3.Bucket, - client: *minioClient, - slogger: slog.With("service", "MinioFileStorageServiceImpl"), - }, - nil -} - -func getObjectNameV1(id uuid.UUID, thumb bool) string { - var imgName string - if !thumb { - imgName = "image.jpg" - } else { - imgName = "thumb-1.jpg" - } - - return id.String() + "/" + imgName -} - -func NewImageStorageService(config *conf.ImageStorageConfig) (ImageStorageService, error) { - return NewMinioFileStorageServiceImpl(config) -} diff --git a/storage-service/storage/MediaStorageService.go b/storage-service/storage/MediaStorageService.go new file mode 100644 index 0000000..f14a4ce --- /dev/null +++ b/storage-service/storage/MediaStorageService.go @@ -0,0 +1,80 @@ +package storage + +import ( + "context" + + "github.com/google/uuid" + commonstorage "github.com/weoses/memelo/common/storage" + "github.com/weoses/memelo/common/temp" + "github.com/weoses/memelo/storage-service/conf" +) + +type MediaType string +type MediaStorageServiceS3Adapter commonstorage.S3OperationsAdapter + +const ( + ImageJpegOriginal MediaType = "image-jpeg-original" + ImageJpegThumbV1 MediaType = "image-jpeg-thumbv1" + VideoMp4Original MediaType = "video-mp4-original" + VideoMp4ThumbV1 MediaType = "video-mp4-thumb" +) + +func objectName(id uuid.UUID, t MediaType) string { + switch t { + case ImageJpegOriginal: + return id.String() + "/image.jpg" + case ImageJpegThumbV1: + return id.String() + "/thumb-1.jpg" + case VideoMp4Original: + return id.String() + "/video.mp4" + case VideoMp4ThumbV1: + return id.String() + "/thumb-1.mp4" + default: + return id.String() + "/" + string(t) + } +} + +func contentType(t MediaType) string { + switch t { + case ImageJpegOriginal, ImageJpegThumbV1: + return "image/jpeg" + case VideoMp4Original, VideoMp4ThumbV1: + return "video/mp4" + default: + return "application/octet-stream" + } +} + +type MediaStorageService interface { + Save(ctx context.Context, id uuid.UUID, mediaType MediaType, data temp.Data) error + Read(ctx context.Context, id uuid.UUID, mediaType MediaType) (temp.Data, error) + GetUrl(ctx context.Context, id uuid.UUID, mediaType MediaType) (string, error) + Delete(ctx context.Context, id uuid.UUID, mediaType MediaType) error +} + +type MediaStorageServiceImpl struct { + storage MediaStorageServiceS3Adapter +} + +func (m *MediaStorageServiceImpl) Save(ctx context.Context, id uuid.UUID, mediaType MediaType, data temp.Data) error { + return m.storage.Save(ctx, objectName(id, mediaType), data, commonstorage.WithContentType(contentType(mediaType))) +} + +func (m *MediaStorageServiceImpl) Read(ctx context.Context, id uuid.UUID, mediaType MediaType) (temp.Data, error) { + return m.storage.Read(ctx, objectName(id, mediaType)) +} + +func (m *MediaStorageServiceImpl) GetUrl(ctx context.Context, id uuid.UUID, mediaType MediaType) (string, error) { + return m.storage.GetPresignedUrl(ctx, objectName(id, mediaType)) +} + +func (m *MediaStorageServiceImpl) Delete(ctx context.Context, id uuid.UUID, mediaType MediaType) error { + return m.storage.Delete(ctx, objectName(id, mediaType)) +} + +func NewMediaStorageServiceS3Adapter(cfg *conf.Config) (MediaStorageServiceS3Adapter, error) { + return commonstorage.NewS3OperationsAdapter(cfg.MediaStorage) +} +func NewMediaStorageService(imageStorage MediaStorageServiceS3Adapter) (MediaStorageService, error) { + return &MediaStorageServiceImpl{storage: imageStorage}, nil +} diff --git a/storage-service/storage/MetadataStorageService.go b/storage-service/storage/MetadataStorageService.go index 055b6ce..94d51f4 100644 --- a/storage-service/storage/MetadataStorageService.go +++ b/storage-service/storage/MetadataStorageService.go @@ -3,9 +3,11 @@ package storage import ( "bytes" "context" + "embed" "encoding/json" "fmt" "log/slog" + "strconv" "time" elasticsearch8 "github.com/elastic/go-elasticsearch/v8" @@ -22,6 +24,9 @@ import ( "github.com/weoses/memelo/storage-service/entity" ) +//go:embed migrations/metadata +var metadataMigrationFS embed.FS + const MaxFuzzy = 10 type MetadataStorageService interface { @@ -51,23 +56,25 @@ type MetadataStorageService interface { SearchFuzzy(ctx context.Context, accountId uuid.UUID, query string, + fuzziness string, pageSize *int, ) ([]*entity.ElasticImageMetaData, error) GetById(ctx context.Context, accountId uuid.UUID, id uuid.UUID) (*entity.ElasticImageMetaData, error) GetByHash(ctx context.Context, accountId uuid.UUID, hash string, count *int) ([]*entity.ElasticImageMetaData, error) - SearchByEmbeddingV1(ctx context.Context, accountId uuid.UUID, img entity.ElasticEmbeddingV1, count int, filterSimilarity bool) ([]*entity.ElasticImageMetaData, error) + SearchByEmbeddingV1(ctx context.Context, accountId uuid.UUID, img entity.EmbeddingItem, count *int, threshold float64) ([]*entity.ElasticImageMetaData, error) DeleteById(ctx context.Context, accountId uuid.UUID, id uuid.UUID) error DeleteByAccountId(ctx context.Context, accountId uuid.UUID) error } type ElasticMetadataStorageServiceImpl struct { - client *elasticsearch8.TypedClient - embeddingMatchTreshold float64 - indexName string - validate *validator.Validate - slogger *slog.Logger + *ElasticMigrator + + client *elasticsearch8.TypedClient + indexName string + validate *validator.Validate + slogger *slog.Logger } func (e *ElasticMetadataStorageServiceImpl) SearchByAccountId( @@ -122,8 +129,8 @@ func (e *ElasticMetadataStorageServiceImpl) SearchSimple(ctx context.Context, ac return results, nil } -func (e *ElasticMetadataStorageServiceImpl) SearchFuzzy(ctx context.Context, accountId uuid.UUID, query string, pageSize *int) ([]*entity.ElasticImageMetaData, error) { - result, err := e.searchFuzzy(ctx, accountId, query, pageSize) +func (e *ElasticMetadataStorageServiceImpl) SearchFuzzy(ctx context.Context, accountId uuid.UUID, query string, fuzziness string, pageSize *int) ([]*entity.ElasticImageMetaData, error) { + result, err := e.searchFuzzy(ctx, accountId, query, fuzziness, pageSize) if err != nil { return nil, fmt.Errorf("search_pipeline all failed: %w", err) } @@ -253,9 +260,9 @@ func (e *ElasticMetadataStorageServiceImpl) GetByHash( func (e *ElasticMetadataStorageServiceImpl) SearchByEmbeddingV1( ctx context.Context, accountId uuid.UUID, - img entity.ElasticEmbeddingV1, - count int, - filterSimilarity bool, + img entity.EmbeddingItem, + count *int, + semanticTreshold float64, ) ([]*entity.ElasticImageMetaData, error) { e.slogger.InfoContext(ctx, "Search embedding start", "pageSize", count, @@ -272,7 +279,7 @@ func (e *ElasticMetadataStorageServiceImpl) SearchByEmbeddingV1( resultsEntity := make([]*entity.ElasticImageMetaData, 0) for index := range resultsSize { - if filterSimilarity && float64(*(result.Hits.Hits[index].Score_)) < e.embeddingMatchTreshold { + if float64(*(result.Hits.Hits[index].Score_)) < semanticTreshold { continue } @@ -328,16 +335,16 @@ func (e *ElasticMetadataStorageServiceImpl) Save(ctx context.Context, file *enti } func (e *ElasticMetadataStorageServiceImpl) embeddingV1KnnAllQuery( - img entity.ElasticEmbeddingV1, + img entity.EmbeddingItem, accountIdQuery *types.Query, - count int, + count *int, ) *types.KnnSearch { query := types.NewKnnSearch() - query.Field = "EmbeddingV1.Data" - query.QueryVector = *img.Data + query.Field = "EmbeddingList.Data" + query.QueryVector = img.Data query.NumCandidates = helper.Addr(1000) - query.K = helper.Addr(count) + query.K = count query.Filter = []types.Query{*accountIdQuery} return query } @@ -395,12 +402,13 @@ func (e *ElasticMetadataStorageServiceImpl) stringAndAccountQuery( func (e *ElasticMetadataStorageServiceImpl) fuzzyStringAndAccountQuery( accountId uuid.UUID, queryString string, + fuzziness string, ) *types.Query { q1 := types.NewQuery() q1.Match = map[string]types.MatchQuery{ "Result": { Query: queryString, - Fuzziness: "AUTO", + Fuzziness: fuzziness, Operator: &operator.And, }, } @@ -482,13 +490,18 @@ func (e *ElasticMetadataStorageServiceImpl) runSearchQuery( return resp, nil } -func (e *ElasticMetadataStorageServiceImpl) searchFuzzy(ctx context.Context, accountId uuid.UUID, queryString string, pageSize *int) (*search.Response, error) { +func (e *ElasticMetadataStorageServiceImpl) searchFuzzy( + ctx context.Context, + accountId uuid.UUID, + queryString string, + fuzziness string, + pageSize *int) (*search.Response, error) { e.slogger.InfoContext(ctx, "Search FUZZY start", "query", queryString, "pageSize", pageSize, ) - queryFuzzy := e.fuzzyStringAndAccountQuery(accountId, queryString) + queryFuzzy := e.fuzzyStringAndAccountQuery(accountId, queryString, fuzziness) resultFuzzy, err := e.runSearchQuery(ctx, queryFuzzy, nil, pageSize) if err != nil { return nil, fmt.Errorf("failed to search_pipeline FUZZY query : %w", err) @@ -603,9 +616,11 @@ func unmarshalSourceDocument(result json.RawMessage) (*entity.ElasticImageMetaDa } func NewElasticMetadataStorage( - config *conf.MetadataStorageConfig, + cfg *conf.Config, validate *validator.Validate, ) (MetadataStorageService, error) { + config := cfg.MetadataDb + configEmbeddings := cfg.Embeddings es8, _ := elasticsearch8.NewTypedClient(*config.Elastic) logger := slog.With("service", "ElasticMetadataStorage") indexExists, err := es8.Indices. @@ -627,37 +642,29 @@ func NewElasticMetadataStorage( "error", err) } - indexTypeMapping := types.NewTypeMapping() - indexTypeMapping.Properties["Created"] = types.NewLongNumberProperty() - indexTypeMapping.Properties["Updated"] = types.NewLongNumberProperty() - indexTypeMapping.Properties["AccountId"] = types.NewKeywordProperty() - indexTypeMapping.Properties["CalcHash"] = types.NewKeywordProperty() - indexTypeMapping.Properties["ImageId"] = types.NewKeywordProperty() - indexTypeMapping.Properties["Tags"] = types.NewKeywordProperty() - - denseProp := types.NewDenseVectorProperty() - denseProp.Index = helper.Addr(true) - denseProp.Dims = helper.Addr(config.EmbeddingV1Dimensions) - denseProp.Similarity = helper.Addr("cosine") - indexTypeMapping.Properties["EmbeddingV1.Data"] = denseProp - - responseMapping, err := es8.Indices.PutMapping(config.Index). - Properties(indexTypeMapping.Properties). - Do(context.Background()) - - logger.InfoContext(context.Background(), "Elastic create mapping index", - "response", render.Render(responseMapping), - "error", err) + migrator, err := NewElasticMigrator( + config.Elastic, + metadataMigrationFS, "migrations/metadata", + config.Index, MigrationHistoryIndex, + map[string]string{ + "index": config.Index, + "dims": strconv.Itoa(configEmbeddings.Dimensions), + }, + logger, + ) + if err != nil { + return nil, fmt.Errorf("create metadata migrator failed: %w", err) + } return &ElasticMetadataStorageServiceImpl{ - client: es8, - embeddingMatchTreshold: config.EmbeddingMatchTreshold, - indexName: config.Index, - validate: validate, - slogger: logger, + ElasticMigrator: migrator, + client: es8, + indexName: config.Index, + validate: validate, + slogger: logger, }, nil } -func NewMetadataStorageService(config *conf.MetadataStorageConfig, validate *validator.Validate) (MetadataStorageService, error) { - return NewElasticMetadataStorage(config, validate) +func NewMetadataStorageService(cfg *conf.Config, validate *validator.Validate) (MetadataStorageService, error) { + return NewElasticMetadataStorage(cfg, validate) } diff --git a/storage-service/storage/TmpDataService.go b/storage-service/storage/TmpDataService.go new file mode 100644 index 0000000..f2d0da7 --- /dev/null +++ b/storage-service/storage/TmpDataService.go @@ -0,0 +1,17 @@ +package storage + +import ( + commonservice "github.com/weoses/memelo/common/service" + commonstorage "github.com/weoses/memelo/common/storage" + "github.com/weoses/memelo/storage-service/conf" +) + +type TmpDataServiceS3OperationsAdapter commonstorage.S3OperationsAdapter + +func NewTmpDataServiceS3Adapter(cfg *conf.Config) (TmpDataServiceS3OperationsAdapter, error) { + return commonstorage.NewS3OperationsAdapter(cfg.TempStorage) +} + +func NewTmpDataService(adapter TmpDataServiceS3OperationsAdapter) (commonservice.TmpDataService, error) { + return commonservice.NewTmpDataS3Service(adapter) +} diff --git a/storage-service/storage/elastic_migrator.go b/storage-service/storage/elastic_migrator.go new file mode 100644 index 0000000..6863937 --- /dev/null +++ b/storage-service/storage/elastic_migrator.go @@ -0,0 +1,223 @@ +package storage + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + iofs "io/fs" + "log/slog" + "net/http" + "strings" + + "embed" + + elasticsearch8 "github.com/elastic/go-elasticsearch/v8" + "github.com/elastic/go-elasticsearch/v8/typedapi/types" + "github.com/elastic/go-elasticsearch/v8/typedapi/types/enums/refresh" + "github.com/weoses/memelo/common/helper" +) + +const MigrationHistoryIndex = "elastic-migrations-history" + +type ElasticMigrating interface { + Migrate(ctx context.Context) error +} + +type migrationVersion struct { + IndexName string `json:"IndexName"` + LastMigration string `json:"LastMigration"` +} + +type migration struct { + Method string `json:"method"` + URL string `json:"url"` + Body json.RawMessage `json:"body"` +} + +type ElasticMigrator struct { + typedClient *elasticsearch8.TypedClient + rawClient *elasticsearch8.Client + migrationFS embed.FS + fsRoot string + index string + historyIndex string + vars map[string]string + slogger *slog.Logger +} + +func NewElasticMigrator( + esConfig *elasticsearch8.Config, + fs embed.FS, + fsRoot string, + index string, + historyIndex string, + vars map[string]string, + logger *slog.Logger, +) (*ElasticMigrator, error) { + typedClient, err := elasticsearch8.NewTypedClient(*esConfig) + if err != nil { + return nil, fmt.Errorf("create typed elastic client failed: %w", err) + } + rawClient, err := elasticsearch8.NewClient(*esConfig) + if err != nil { + return nil, fmt.Errorf("create raw elastic client failed: %w", err) + } + + return &ElasticMigrator{ + typedClient: typedClient, + rawClient: rawClient, + migrationFS: fs, + fsRoot: fsRoot, + index: index, + historyIndex: historyIndex, + vars: vars, + slogger: logger, + }, nil +} + +func (m *ElasticMigrator) Migrate(ctx context.Context) error { + if err := m.ensureHistoryIndex(ctx); err != nil { + return fmt.Errorf("ensure history index failed: %w", err) + } + + entries, err := iofs.ReadDir(m.migrationFS, m.fsRoot) + if err != nil { + return fmt.Errorf("read migration dir %s failed: %w", m.fsRoot, err) + } + + lastMigration, err := m.getLastMigration(ctx) + if err != nil { + return fmt.Errorf("get last migration failed: %w", err) + } + + m.slogger.InfoContext(ctx, "Migrate: last applied migration", + "index", m.index, + "lastMigration", lastMigration, + ) + + for _, entry := range entries { + if entry.IsDir() { + continue + } + name := entry.Name() + if name <= lastMigration { + continue + } + + err = m.applyEntry(ctx, name) + if err != nil { + return fmt.Errorf("apply migration entry failed: %w", err) + } + } + + return nil +} + +func (m *ElasticMigrator) applyEntry(ctx context.Context, name string) error { + m.slogger.InfoContext(ctx, "Migrate: applying migration", + "index", m.index, + "migration", name, + ) + + data, err := m.migrationFS.ReadFile(m.fsRoot + "/" + name) + if err != nil { + return fmt.Errorf("read migration file %s failed: %w", name, err) + } + + content := string(data) + for k, v := range m.vars { + content = strings.ReplaceAll(content, "{"+k+"}", v) + } + + var mig migration + if err := json.Unmarshal([]byte(content), &mig); err != nil { + return fmt.Errorf("unmarshal migration %s failed: %w", name, err) + } + + req, err := http.NewRequestWithContext(ctx, mig.Method, mig.URL, bytes.NewReader(mig.Body)) + if err != nil { + return fmt.Errorf("create request for migration %s failed: %w", name, err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := m.rawClient.Perform(req) + if err != nil { + return fmt.Errorf("execute migration %s failed: %w", name, err) + } + defer helper.QuietClose(resp.Body, m.slogger) + responseBodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("read elastic response failed: %w", err) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("migration %s returned non-2xx status: %d with body %s", name, resp.StatusCode, string(responseBodyBytes)) + } + + if err := m.saveLastMigration(ctx, name); err != nil { + return fmt.Errorf("save last migration %s failed: %w", name, err) + } + + m.slogger.InfoContext(ctx, "Migrate: migration applied", + "index", m.index, + "migration", name, + ) + return nil +} + +func (m *ElasticMigrator) ensureHistoryIndex(ctx context.Context) error { + exists, err := m.typedClient.Indices.Exists(m.historyIndex).Do(ctx) + if err != nil { + return err + } + if !exists { + _, err = m.typedClient.Indices.Create(m.historyIndex).Do(ctx) + return err + } + return nil +} + +func (m *ElasticMigrator) getLastMigration(ctx context.Context) (string, error) { + resp, err := m.typedClient.Get(m.historyIndex, m.index).Do(ctx) + if err != nil { + var esErr *types.ElasticsearchError + if errors.As(err, &esErr) && esErr.Status == 404 { + return "", nil + } + return "", err + } + if !resp.Found { + return "", nil + } + var ver migrationVersion + if err := json.Unmarshal(resp.Source_, &ver); err != nil { + return "", err + } + return ver.LastMigration, nil +} + +func (m *ElasticMigrator) saveLastMigration(ctx context.Context, name string) error { + ver := migrationVersion{ + IndexName: m.index, + LastMigration: name, + } + _, err := m.typedClient.Index(m.historyIndex). + Id(m.index). + Document(ver). + Refresh(refresh.True). + Do(ctx) + return err +} + +func RunMigrations(migrators []ElasticMigrating) error { + ctx := context.Background() + for _, mg := range migrators { + if err := mg.Migrate(ctx); err != nil { + return err + } + } + return nil +} diff --git a/storage-service/storage/migrations/metadata/0001_setup_mapping.json b/storage-service/storage/migrations/metadata/0001_setup_mapping.json new file mode 100644 index 0000000..caee2bb --- /dev/null +++ b/storage-service/storage/migrations/metadata/0001_setup_mapping.json @@ -0,0 +1,32 @@ +{ + "method": "PUT", + "url": "/{index}/_mapping", + "body": { + "properties": { + "Created": { "type": "long" }, + "Updated": { "type": "long" }, + "AccountId": { "type": "keyword" }, + "CalcHash": { "type": "keyword" }, + "ImageId": { "type": "keyword" }, + "Tags": { "type": "keyword" }, + "EmbeddingV1.Data": { + "type": "dense_vector", + "dims": {dims}, + "index": true, + "similarity": "cosine" + }, + "EmbeddingList": { + "type": "nested", + "properties": { + "Data": { + "type": "dense_vector", + "dims": {dims}, + "index": true, + "similarity": "cosine" + } + } + } + } + }, + "timeout": "60s" +} diff --git a/storage-service/storage/migrations/metadata/0002_copy_embeddingv1.json b/storage-service/storage/migrations/metadata/0002_copy_embeddingv1.json new file mode 100644 index 0000000..355d798 --- /dev/null +++ b/storage-service/storage/migrations/metadata/0002_copy_embeddingv1.json @@ -0,0 +1,16 @@ +{ + "method": "POST", + "url": "/{index}/_update_by_query", + "body": { + "script": { + "lang": "painless", + "source": "if (ctx._source.EmbeddingV1 != null && ctx._source.EmbeddingV1.Data != null) { if (ctx._source.EmbeddingList == null) { ctx._source.EmbeddingList = new ArrayList(); } boolean alreadyMigrated = false; for (def item : ctx._source.EmbeddingList) { if ('image'.equals(item.get('Type'))) { alreadyMigrated = true; break; } } if (!alreadyMigrated) { Map newItem = new HashMap(); newItem.put('Data', ctx._source.EmbeddingV1.Data); newItem.put('Model', ctx._source.EmbeddingV1.Model); newItem.put('Type', 'image'); newItem.put('TimeStart', 0); newItem.put('TimeEnd', 0); ctx._source.EmbeddingList.add(newItem); } }" + }, + "query": { + "exists": { + "field": "EmbeddingV1.Data" + } + } + }, + "timeout": "160s" +} diff --git a/storage-service/storage/migrations/metadata/0003_add_tags_type_mapping.json b/storage-service/storage/migrations/metadata/0003_add_tags_type_mapping.json new file mode 100644 index 0000000..c16b267 --- /dev/null +++ b/storage-service/storage/migrations/metadata/0003_add_tags_type_mapping.json @@ -0,0 +1,11 @@ +{ + "method": "PUT", + "url": "/{index}/_mapping", + "body": { + "properties": { + "Tags": { "type": "keyword" }, + "Type": { "type": "keyword" } + } + }, + "timeout": "160s" +} diff --git a/storage-service/storage/migrations/metadata/0004_clear_embeddingv1.json b/storage-service/storage/migrations/metadata/0004_clear_embeddingv1.json new file mode 100644 index 0000000..66da5b5 --- /dev/null +++ b/storage-service/storage/migrations/metadata/0004_clear_embeddingv1.json @@ -0,0 +1,16 @@ +{ + "method": "POST", + "url": "/{index}/_update_by_query", + "body": { + "script": { + "lang": "painless", + "source": "ctx._source.remove('EmbeddingV1')" + }, + "query": { + "exists": { + "field": "EmbeddingV1.Data" + } + } + }, + "timeout": "160s" +} diff --git a/storage-service/storage/migrations/metadata/0005_set_type_image.json b/storage-service/storage/migrations/metadata/0005_set_type_image.json new file mode 100644 index 0000000..8a72531 --- /dev/null +++ b/storage-service/storage/migrations/metadata/0005_set_type_image.json @@ -0,0 +1,20 @@ +{ + "method": "POST", + "url": "/{index}/_update_by_query", + "body": { + "script": { + "lang": "painless", + "source": "ctx._source.Type = 'image'" + }, + "query": { + "bool": { + "must_not": { + "exists": { + "field": "Type" + } + } + } + } + }, + "timeout": "160s" +} diff --git a/storage-service/storage/migrations/tags/0001_setup_mapping.json b/storage-service/storage/migrations/tags/0001_setup_mapping.json new file mode 100644 index 0000000..1bbec29 --- /dev/null +++ b/storage-service/storage/migrations/tags/0001_setup_mapping.json @@ -0,0 +1,29 @@ +{ + "method": "PUT", + "url": "/{index}/_mapping", + "body": { + "properties": { + "Created": { "type": "long" }, + "Updated": { "type": "long" }, + "AccountId": { "type": "keyword" }, + "Tag": { "type": "keyword" }, + "EmbeddingV1.Data": { + "type": "dense_vector", + "dims": {dims}, + "index": true, + "similarity": "cosine" + }, + "Embedding": { + "type": "nested", + "properties": { + "Data": { + "type": "dense_vector", + "dims": {dims}, + "index": true, + "similarity": "cosine" + } + } + } + } + } +} diff --git a/storage-service/storage/migrations/tags/0002_copy_embeddingv1.json b/storage-service/storage/migrations/tags/0002_copy_embeddingv1.json new file mode 100644 index 0000000..2ced7ac --- /dev/null +++ b/storage-service/storage/migrations/tags/0002_copy_embeddingv1.json @@ -0,0 +1,15 @@ +{ + "method": "POST", + "url": "/{index}/_update_by_query", + "body": { + "script": { + "lang": "painless", + "source": "if (ctx._source.EmbeddingV1 != null && ctx._source.EmbeddingV1.Data != null && ctx._source.Embedding == null) { Map newEmb = new HashMap(); newEmb.put('Data', ctx._source.EmbeddingV1.Data); newEmb.put('Model', ctx._source.EmbeddingV1.Model); newEmb.put('Type', 'text'); newEmb.put('TimeStart', 0); newEmb.put('TimeEnd', 0); ctx._source.Embedding = newEmb; }" + }, + "query": { + "exists": { + "field": "EmbeddingV1.Data" + } + } + } +} diff --git a/storage-service/storage/migrations/tags/0003_clear_embeddingv1.json b/storage-service/storage/migrations/tags/0003_clear_embeddingv1.json new file mode 100644 index 0000000..53df9ed --- /dev/null +++ b/storage-service/storage/migrations/tags/0003_clear_embeddingv1.json @@ -0,0 +1,15 @@ +{ + "method": "POST", + "url": "/{index}/_update_by_query", + "body": { + "script": { + "lang": "painless", + "source": "ctx._source.remove('EmbeddingV1')" + }, + "query": { + "exists": { + "field": "EmbeddingV1.Data" + } + } + } +} diff --git a/telegram-service/.env.example b/telegram-service/.env.example new file mode 100644 index 0000000..03d5e4b --- /dev/null +++ b/telegram-service/.env.example @@ -0,0 +1,3 @@ +TELEGRAM_TOKEN= +POSTGRES_DSN= +WEBHOOK_EXTERNALURL= \ No newline at end of file diff --git a/telegram-service/conf/Config.go b/telegram-service/conf/Config.go index 8c8d83b..92c4df9 100644 --- a/telegram-service/conf/Config.go +++ b/telegram-service/conf/Config.go @@ -1,7 +1,10 @@ package conf import ( + "fmt" + "github.com/spf13/viper" + commonconfig "github.com/weoses/memelo/common/config" ) type TelegramConfig struct { @@ -25,32 +28,26 @@ type UserAccountConfig struct { StaticUuid string } -func NewTelegramConfig() (*TelegramConfig, error) { - conf := &TelegramConfig{} - err := viper.UnmarshalKey("telegram", conf) - return conf, err -} - -func NewPostgresConfig() (*PostgresConfig, error) { - conf := &PostgresConfig{} - err := viper.UnmarshalKey("postgres", conf) - return conf, err -} - -func NewInlineConfig() (*InlineConfig, error) { - conf := &InlineConfig{} - err := viper.UnmarshalKey("inline", conf) - return conf, err +type WebhookConfig struct { + ExternalUrl string } -func NewStorageConfig() (*StorageServiceConfig, error) { - conf := &StorageServiceConfig{} - err := viper.UnmarshalKey("storage-service", conf) - return conf, err +type Config struct { + Server *commonconfig.ServerConfig `mapstructure:"server"` + Log *commonconfig.LoggingConfig `mapstructure:"log"` + Webhook *WebhookConfig `mapstructure:"webhook"` + Telegram *TelegramConfig `mapstructure:"telegram"` + Postgres *PostgresConfig `mapstructure:"postgres"` + Inline *InlineConfig `mapstructure:"inline"` + StorageService *StorageServiceConfig `mapstructure:"storage-service"` + UserAccount *UserAccountConfig `mapstructure:"user-account"` + TempStorage *commonconfig.MediaStorageConfig `mapstructure:"temp-storage"` } -func NewUserAccountConfig() (*UserAccountConfig, error) { - conf := &UserAccountConfig{} - err := viper.UnmarshalKey("user-account", conf) - return conf, err +func NewConfig() (*Config, error) { + cfg := &Config{} + if err := viper.Unmarshal(cfg); err != nil { + return nil, fmt.Errorf("error reading config: %w", err) + } + return cfg, nil } diff --git a/telegram-service/config.yaml b/telegram-service/config.yaml index fe05162..39ca814 100644 --- a/telegram-service/config.yaml +++ b/telegram-service/config.yaml @@ -1,3 +1,9 @@ +log: + Level: info + +server: + ListenAddress: :7002 + telegram: Token: Debug: false @@ -13,4 +19,14 @@ storage-service: Uri: "http://localhost:7001" user-account: - StaticUuid: "00000000-0000-0000-0000-000000000000" \ No newline at end of file + StaticUuid: "00000000-0000-0000-0000-000000000000" + +webhook: + ExternalUrl: "" + +temp-storage: + Endpoint: + AccessKey: + SecretKey: + Bucket: melo-temp + Secure: false diff --git a/telegram-service/entity/Entity.go b/telegram-service/entity/Entity.go index 0a5f34b..382ea40 100644 --- a/telegram-service/entity/Entity.go +++ b/telegram-service/entity/Entity.go @@ -9,13 +9,22 @@ type MemeCreateResult struct { Id uuid.UUID Text string DuplicateStatus string + Tags []string } +const ResultTypeImage = "image" +const ResultTypeVideo = "video" + type MemeSearchResult struct { - Id uuid.UUID - SortId string - ImageUrl string + Id string + + MediaUrl string + MediaWidth int + MediaHeight int + ThumbUrl string ThumbWidth int ThumbHeight int + + Type string } diff --git a/telegram-service/go.mod b/telegram-service/go.mod index 22756a3..7636a47 100644 --- a/telegram-service/go.mod +++ b/telegram-service/go.mod @@ -1,6 +1,6 @@ module github.com/weoses/memelo/telegram-service -go 1.24.10 +go 1.25.0 require ( github.com/golang-migrate/migrate/v4 v4.18.1 @@ -13,6 +13,8 @@ require ( require ( connectrpc.com/connect v1.19.1 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/go-ini/ini v1.67.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/jackc/pgerrcode v0.0.0-20220416144525-469b46aa5efa // indirect @@ -20,11 +22,22 @@ require ( github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect github.com/joho/godotenv v1.5.1 // indirect + github.com/klauspost/compress v1.18.2 // indirect + github.com/klauspost/cpuid/v2 v2.2.11 // indirect + github.com/klauspost/crc32 v1.3.0 // indirect + github.com/minio/crc64nvme v1.1.1 // indirect + github.com/minio/md5-simd v1.1.2 // indirect + github.com/minio/minio-go/v7 v7.0.100 // indirect + github.com/philhofer/fwd v1.2.0 // indirect + github.com/rs/xid v1.6.0 // indirect + github.com/tinylib/msgp v1.6.1 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/dig v1.18.0 // indirect go.uber.org/zap v1.26.0 // indirect - golang.org/x/crypto v0.44.0 // indirect - golang.org/x/sync v0.18.0 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.46.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/sync v0.19.0 // indirect google.golang.org/protobuf v1.36.11 // indirect ) @@ -45,8 +58,8 @@ require ( go.uber.org/fx v1.23.0 go.uber.org/multierr v1.10.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect - golang.org/x/sys v0.38.0 // indirect - golang.org/x/text v0.31.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/text v0.32.0 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/telegram-service/go.sum b/telegram-service/go.sum index a696345..bedf25c 100644 --- a/telegram-service/go.sum +++ b/telegram-service/go.sum @@ -18,12 +18,16 @@ github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A= +github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= @@ -57,6 +61,13 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= +github.com/klauspost/compress v1.18.2/go.mod h1:R0h/fSBs8DE4ENlcrlib3PsXS61voFxhIs2DeRhCvJ4= +github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.11 h1:0OwqZRYI2rFrjS4kvkDnqJkKHdHaRnCm68/DY4OxRzU= +github.com/klauspost/cpuid/v2 v2.2.11/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= +github.com/klauspost/crc32 v1.3.0 h1:sSmTt3gUt81RP655XGZPElI0PelVTZ6YwCRnPSupoFM= +github.com/klauspost/crc32 v1.3.0/go.mod h1:D7kQaZhnkX/Y0tstFGf8VUzv2UofNGqCjnC3zdHB0Hw= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -65,6 +76,12 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/minio/crc64nvme v1.1.1 h1:8dwx/Pz49suywbO+auHCBpCtlW1OfpcLN7wYgVR6wAI= +github.com/minio/crc64nvme v1.1.1/go.mod h1:eVfm2fAzLlxMdUGc0EEBGSMmPwmXD5XiNRpnu9J3bvg= +github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34= +github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM= +github.com/minio/minio-go/v7 v7.0.100 h1:ShkWi8Tyj9RtU57OQB2HIXKz4bFgtVib0bbT1sbtLI8= +github.com/minio/minio-go/v7 v7.0.100/go.mod h1:EtGNKtlX20iL2yaYnxEigaIvj0G0GwSDnifnG8ClIdw= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= @@ -79,6 +96,8 @@ github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQ github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= +github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -86,6 +105,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= +github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= @@ -114,6 +135,8 @@ github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tinylib/msgp v1.6.1 h1:ESRv8eL3u+DNHUoSAAQRE50Hm162zqAnBoGv9PzScPY= +github.com/tinylib/msgp v1.6.1/go.mod h1:RSp0LW9oSxFut3KzESt5Voq4GVWyS+PSulT77roAqEA= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= @@ -134,16 +157,28 @@ go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU= golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc= +golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= +golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/telegram-service/main.go b/telegram-service/main.go index 242f3d9..7f7a30c 100644 --- a/telegram-service/main.go +++ b/telegram-service/main.go @@ -4,48 +4,65 @@ import ( "context" "log" "log/slog" + "net" + "net/http" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" "github.com/weoses/memelo/common/config" "github.com/weoses/memelo/telegram-service/conf" "github.com/weoses/memelo/telegram-service/service" + tgstorage "github.com/weoses/memelo/telegram-service/storage" "go.uber.org/fx" "go.uber.org/fx/fxevent" ) -func Startup(lc fx.Lifecycle, serv service.TelegramBotService) { - ctx, cancel := context.WithCancel(context.Background()) +func Startup(lc fx.Lifecycle, cfg *conf.Config, svc service.TelegramBotService) { + var srv *http.Server lc.Append(fx.Hook{ - OnStart: func(startCtx context.Context) error { - go serv.StartBot(ctx) + OnStart: func(ctx context.Context) error { + if err := svc.RegisterWebhook(); err != nil { + return err + } + mux := http.NewServeMux() + mux.Handle("/webhook", svc.Handler()) + mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusOK) + }) + srv = &http.Server{ + Addr: cfg.Server.ListenAddress, + Handler: mux, + } + ln, err := net.Listen("tcp", cfg.Server.ListenAddress) + if err != nil { + return err + } + go func() { _ = srv.Serve(ln) }() return nil }, - OnStop: func(stopCtx context.Context) error { - cancel() - return nil + OnStop: func(ctx context.Context) error { + _ = svc.RemoveWebhook() + return srv.Shutdown(ctx) }, }) } func main() { config.InitConfig() - loggingConfig, err := config.NewLoggingConfig() + cfg, err := conf.NewConfig() if err != nil { log.Fatal(err) } - config.InitLogs(loggingConfig) + config.InitLogs(cfg.Log) fx.New( fx.WithLogger(func() fxevent.Logger { return &fxevent.SlogLogger{Logger: slog.With()} }), - fx.Provide(conf.NewTelegramConfig), - fx.Provide(conf.NewUserAccountConfig), - fx.Provide(conf.NewPostgresConfig), - fx.Provide(conf.NewStorageConfig), - fx.Provide(conf.NewInlineConfig), + fx.Supply(cfg), fx.Provide(service.NewTelegramBot), + fx.Provide(tgstorage.NewTmpDataServiceS3Adapter), + fx.Provide(tgstorage.NewTmpDataService), fx.Provide(service.NewStorageConnector), fx.Provide(fx.Annotate(service.NewTelegramFileResolverService, fx.From(new(*tgbotapi.BotAPI)))), fx.Provide(service.NewUserAccountService), diff --git a/telegram-service/service/InlineHandlerService.go b/telegram-service/service/InlineHandlerService.go index 08fe113..635e65b 100644 --- a/telegram-service/service/InlineHandlerService.go +++ b/telegram-service/service/InlineHandlerService.go @@ -7,6 +7,8 @@ import ( "strings" "github.com/google/uuid" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/telegram-service/entity" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" "github.com/weoses/memelo/telegram-service/conf" @@ -29,7 +31,7 @@ type InlineHandlerService interface { type InineHandlerServiceImpl struct { userAccount UserAccountService storage StorageConnector - config *conf.InlineConfig + config *conf.Config log *slog.Logger } @@ -75,9 +77,7 @@ func (i *InineHandlerServiceImpl) ProcessQuery( query := request.Query delQuery := false - i.log.InfoContext(ctx, "Inline query:", - "userId", userId, - "requestId", request.ID, + i.log.InfoContext(ctx, "ProcessQuery start:", "query", request.Query, "offset", request.Offset) @@ -102,7 +102,7 @@ func (i *InineHandlerServiceImpl) ProcessQuery( ctx, accountId, query, - i.config.PageSize, + i.config.Inline.PageSize, searchAfter, ) if err != nil { @@ -123,36 +123,56 @@ func (i *InineHandlerServiceImpl) ProcessQuery( return &retval, nil } - photos := make([]interface{}, len(results)) - for index, item := range results { - i.log.DebugContext(ctx, "SearchResultItem", - "userId", userId, - "requestId", request.ID, - "index", index, - "id", item.Id, - "sortId", item.SortId, - "url", item.ImageUrl, - ) - - inlineChoice := tgbotapi.NewInlineQueryResultPhotoWithThumb( - item.Id.String(), - item.ImageUrl, - item.ImageUrl, - ) - inlineChoice.MimeType = "image/jpeg" - inlineChoice.Height = item.ThumbHeight - inlineChoice.Width = item.ThumbWidth - - if delQuery { - inlineChoice.Caption = "Deleted" - } - - photos[index] = inlineChoice - } + photos := helper.TransformSlice( + results, + make([]interface{}, len(results)), + func(item *entity.MemeSearchResult) interface{} { + i.log.DebugContext(ctx, "SearchResultItem", + "id", item.Id, + "url", item.MediaUrl, + ) + + switch item.Type { + case entity.ResultTypeImage: + { + inlineChoice := tgbotapi.NewInlineQueryResultPhotoWithThumb( + item.Id, + item.MediaUrl, + item.ThumbUrl, + ) + inlineChoice.MimeType = "image/jpeg" + inlineChoice.Height = item.ThumbHeight + inlineChoice.Width = item.ThumbWidth + + if delQuery { + inlineChoice.Caption = "Deleted" + } + return inlineChoice + } + + case entity.ResultTypeVideo: + { + inlineChoice := tgbotapi.NewInlineQueryResultVideo( + item.Id, + item.MediaUrl) + inlineChoice.MimeType = "video/mp4" + inlineChoice.ThumbURL = item.ThumbUrl + inlineChoice.Width = item.ThumbWidth + inlineChoice.Height = item.ThumbHeight + inlineChoice.Title = "memelo-video" + + if delQuery { + inlineChoice.Caption = "Deleted" + } + return inlineChoice + } + } + panic("unknown result type") + }) nextOffset := "" - if len(results) == i.config.PageSize && i.config.PageSize > 0 { - nextOffset = results[i.config.PageSize-1].SortId + if len(results) == i.config.Inline.PageSize && i.config.Inline.PageSize > 0 { + nextOffset = results[i.config.Inline.PageSize-1].Id } i.log.InfoContext(ctx, "Search next offset", @@ -162,7 +182,7 @@ func (i *InineHandlerServiceImpl) ProcessQuery( retval := tgbotapi.InlineConfig{ InlineQueryID: request.ID, - CacheTime: 5, + CacheTime: 50, IsPersonal: true, NextOffset: nextOffset, } @@ -174,7 +194,7 @@ func (i *InineHandlerServiceImpl) ProcessQuery( func NewInlineService( userAccount UserAccountService, storage StorageConnector, - config *conf.InlineConfig, + config *conf.Config, ) InlineHandlerService { return &InineHandlerServiceImpl{ diff --git a/telegram-service/service/MessageHandlerService.go b/telegram-service/service/MessageHandlerService.go index f1e4b14..6f04dfd 100644 --- a/telegram-service/service/MessageHandlerService.go +++ b/telegram-service/service/MessageHandlerService.go @@ -5,12 +5,19 @@ import ( "errors" "fmt" "log/slog" + "net/http" + "strings" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" + "github.com/weoses/memelo/common/helper" + commonservice "github.com/weoses/memelo/common/service" + "github.com/weoses/memelo/common/temp" ) type MessageHandlerService interface { - ProcessMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) + ProcessImageMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) + ProcessVideoMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) + ProcessCommandAddTag(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) } type MessageHandlerResponse struct { @@ -22,11 +29,42 @@ type MessageHandlerServiceImpl struct { storage StorageConnector fileResolver TelegramFileResolverService userAccountService UserAccountService - log *slog.Logger + tmpDataService commonservice.TmpDataService + slogger *slog.Logger } -// ProcessMessage implements MessageHandlerService. -func (m MessageHandlerServiceImpl) ProcessMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) { +func (m MessageHandlerServiceImpl) ProcessCommandAddTag(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) { + arguments := message.CommandArguments() + if arguments == "" { + return nil, errors.New("empty arguments for add tag, need NAME DESCRIPTION") + } + args := strings.SplitN(arguments, " ", 2) + if len(args) < 2 { + return nil, errors.New("invalid arguments for add tag, need NAME DESCRIPTION") + } + + name := args[0] + description := args[1] + if len(name) == 0 || len(description) == 0 { + return nil, errors.New("empty arguments for add tag, need NAME DESCRIPTION") + } + + accountId, err := m.userAccountService.MapUserToAccount(ctx, message.Chat.ID) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: MapUserToAccount failed: %w", err) + } + + if err := m.storage.AddTag(ctx, accountId, name, description); err != nil { + return nil, fmt.Errorf("messageHandlerService: AddTag failed: %w", err) + } + + return &MessageHandlerResponse{ + Message: fmt.Sprintf("Tag `%s` created", name), + ParseMode: "Markdown", + }, nil +} + +func (m MessageHandlerServiceImpl) ProcessImageMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) { var fileId string if len(message.Photo) >= 1 { fileId = message.Photo[len(message.Photo)-1].FileID @@ -36,40 +74,118 @@ func (m MessageHandlerServiceImpl) ProcessMessage(ctx context.Context, message * return nil, errors.New("messageHandlerService: message dont contain image") } - file, err := m.fileResolver.GetFile(ctx, fileId) + fileURL, err := m.fileResolver.GetFileURL(ctx, fileId) if err != nil { - return nil, fmt.Errorf("messageHandlerService: GetFile failed, fileId: %s : %w", fileId, err) + return nil, fmt.Errorf("messageHandlerService: GetFileURL failed, fileId: %s : %w", fileId, err) } + s3data, err := m.downloadToS3(ctx, fileURL) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: downloadToS3 failed: %w", err) + } + defer helper.QuietClose(s3data, m.slogger) + accountId, err := m.userAccountService.MapUserToAccount(ctx, message.Chat.ID) if err != nil { return nil, fmt.Errorf("messageHandlerService: MapUserToAccount failed : %w", err) } - result, err := m.storage.CreateMeme(ctx, file, "image/jpeg", accountId) + result, err := m.storage.CreateMeme(ctx, s3data, "image/jpeg", accountId) if err != nil { return nil, fmt.Errorf("messageHandlerService: CreateMeme failed : %w", err) } - m.log.InfoContext(ctx, "meme created", + m.slogger.InfoContext(ctx, "meme created", "imageId", result.Id, "duplicate", result.DuplicateStatus) return &MessageHandlerResponse{ - Message: fmt.Sprintf("\n```Text\n%s\n```\n ID: `%s` \n Status: `%s`", result.Text, result.Id, result.DuplicateStatus), + Message: fmt.Sprintf( + "\n```Text\n%s\n```\n ID: `%s` \n Status: `%s`\n Tags: ```%s```", + result.Text, + result.Id, + result.DuplicateStatus, + strings.Join(result.Tags, ", ")), ParseMode: "Markdown", }, nil } +func (m MessageHandlerServiceImpl) ProcessVideoMessage(ctx context.Context, message *tgbotapi.Message) (*MessageHandlerResponse, error) { + if message.Video == nil { + return nil, errors.New("messageHandlerService: message does not contain a video") + } + + fileURL, err := m.fileResolver.GetFileURL(ctx, message.Video.FileID) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: GetFileURL failed, fileId: %s : %w", message.Video.FileID, err) + } + + s3data, err := m.downloadToS3(ctx, fileURL) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: downloadToS3 failed: %w", err) + } + defer helper.QuietClose(s3data, m.slogger) + + accountId, err := m.userAccountService.MapUserToAccount(ctx, message.Chat.ID) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: MapUserToAccount failed : %w", err) + } + + result, err := m.storage.CreateVideo(ctx, s3data, accountId) + if err != nil { + return nil, fmt.Errorf("messageHandlerService: CreateVideo failed : %w", err) + } + + m.slogger.InfoContext(ctx, "video meme created", + "memeId", result.Id, + "duplicate", result.DuplicateStatus) + + return &MessageHandlerResponse{ + Message: fmt.Sprintf( + "\n```Text\n%s\n```\n ID: `%s` \n Status: `%s`\n Tags: ```%s```", + result.Text, + result.Id, + result.DuplicateStatus, + strings.Join(result.Tags, ", ")), + ParseMode: "Markdown", + }, nil +} + +func (m MessageHandlerServiceImpl) downloadToS3(ctx context.Context, fileURL string) (temp.S3BackedData, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL, nil) + if err != nil { + return nil, fmt.Errorf("downloadToS3: create request: %w", err) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("downloadToS3: http get: %w", err) + } + defer helper.QuietClose(resp.Body, m.slogger) + + if resp.StatusCode >= 400 { + return nil, fmt.Errorf("downloadToS3: non-2xx status: %d", resp.StatusCode) + } + + s3data, err := m.tmpDataService.ByReader(ctx, resp.Body) + if err != nil { + return nil, fmt.Errorf("downloadToS3: upload to s3: %w", err) + } + + return s3data, nil +} + func NewMessageHandlerService( storage StorageConnector, fileResolver TelegramFileResolverService, userAccountService UserAccountService, + tmpDataService commonservice.TmpDataService, ) MessageHandlerService { return &MessageHandlerServiceImpl{ storage: storage, fileResolver: fileResolver, userAccountService: userAccountService, - log: slog.With("service", "MessageHandlerService"), + tmpDataService: tmpDataService, + slogger: slog.With("service", "MessageHandlerService"), } } diff --git a/telegram-service/service/StorageConnector.go b/telegram-service/service/StorageConnector.go index 3423501..18cd2f2 100644 --- a/telegram-service/service/StorageConnector.go +++ b/telegram-service/service/StorageConnector.go @@ -2,11 +2,15 @@ package service import ( "context" + "errors" "fmt" "log/slog" "net/http" + "connectrpc.com/connect" "github.com/google/uuid" + "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" v1 "github.com/weoses/memelo/gen/proto/v1" "github.com/weoses/memelo/gen/proto/v1/v1connect" "github.com/weoses/memelo/telegram-service/conf" @@ -22,14 +26,34 @@ type StorageConnector interface { pageAfterId *string, ) ([]*entity.MemeSearchResult, error) - CreateMeme(ctx context.Context, file []byte, mime string, accountId uuid.UUID) (*entity.MemeCreateResult, error) + CreateMeme(ctx context.Context, data temp.S3BackedData, mime string, accountId uuid.UUID) (*entity.MemeCreateResult, error) + CreateVideo(ctx context.Context, data temp.S3BackedData, accountId uuid.UUID) (*entity.MemeCreateResult, error) DeleteMeme(ctx context.Context, accountId uuid.UUID, memeId uuid.UUID) error + + AddTag(ctx context.Context, accountId uuid.UUID, name string, description string) error } type StorageConnectorImpl struct { - cl v1connect.SearchServiceClient - log *slog.Logger + cl v1connect.SearchServiceClient + tagsCl v1connect.TagsServiceClient + log *slog.Logger +} + +func (s *StorageConnectorImpl) AddTag(ctx context.Context, accountId uuid.UUID, name string, description string) error { + _, err := s.tagsCl.CreateTag(ctx, &v1.CreateTagRequest{ + AccountId: accountId.String(), + Tag: name, + Description: description, + }) + if err != nil { + var connectErr *connect.Error + if errors.As(err, &connectErr) && connectErr.Code() == connect.CodeAlreadyExists { + return fmt.Errorf("tag '%s' already exists", name) + } + return fmt.Errorf("AddTag failed: name=%s %w", name, err) + } + return nil } func (s *StorageConnectorImpl) DeleteMeme(ctx context.Context, accountId uuid.UUID, memeId uuid.UUID) error { @@ -61,33 +85,40 @@ func (s *StorageConnectorImpl) ProcessSearchQuery( return nil, fmt.Errorf("storageService: search_pipeline meme query failed query: %s : %w", query, err) } - entityResult := make([]*entity.MemeSearchResult, len(response.Results)) - for i, dto := range response.Results { - id, err := uuid.Parse(dto.GetId()) - if err != nil { - return nil, fmt.Errorf("storageService: failed to parse meme id: %s: %w", dto.GetId(), err) - } - result := &entity.MemeSearchResult{ - Id: id, - SortId: dto.GetId(), - } - if dto.GetImageOriginal() != nil { - result.ImageUrl = dto.GetImageOriginal().GetUrl() - } - if dto.GetImageThumbnail() != nil { - result.ThumbUrl = dto.GetImageThumbnail().GetUrl() - result.ThumbWidth = int(dto.GetImageThumbnail().GetWidth()) - result.ThumbHeight = int(dto.GetImageThumbnail().GetHeight()) - } - entityResult[i] = result - } + entityResult := helper.TransformSlice( + response.Results, + make([]*entity.MemeSearchResult, len(response.Results)), + func(dto *v1.MemeDto) *entity.MemeSearchResult { + result := &entity.MemeSearchResult{ + Id: dto.GetId(), + } + + if dto.GetMediaOriginal() != nil { + result.MediaUrl = dto.GetMediaOriginal().GetUrl() + result.MediaWidth = int(dto.GetMediaOriginal().GetImageWidth()) + result.MediaHeight = int(dto.GetMediaOriginal().GetImageHeight()) + } + + if dto.GetImageThumbnail() != nil { + result.ThumbUrl = dto.GetImageThumbnail().GetUrl() + result.ThumbWidth = int(dto.GetImageThumbnail().GetImageWidth()) + result.ThumbHeight = int(dto.GetImageThumbnail().GetImageHeight()) + } + result.Type = dto.GetType() + return result + }) return entityResult, nil } -func (s *StorageConnectorImpl) CreateMeme(ctx context.Context, file []byte, mime string, accountId uuid.UUID) (*entity.MemeCreateResult, error) { +func (s *StorageConnectorImpl) CreateMeme(ctx context.Context, data temp.S3BackedData, mime string, accountId uuid.UUID) (*entity.MemeCreateResult, error) { + s3path, err := data.GetS3Path(ctx) + if err != nil { + return nil, fmt.Errorf("storageService: get s3 path for image failed: %w", err) + } + response, err := s.cl.CreateMeme(ctx, &v1.CreateMemeRequest{ AccountId: accountId.String(), - RawImage: file, + Image: &v1.MediaDataDto{S3Path: &s3path}, }) if err != nil { return nil, fmt.Errorf("storageService: create meme failed: %w", err) @@ -101,13 +132,42 @@ func (s *StorageConnectorImpl) CreateMeme(ctx context.Context, file []byte, mime Id: memeId, Text: response.Result.GetOcrResult(), DuplicateStatus: response.Status.String(), + Tags: response.Result.GetTags(), + }, nil +} + +func (s *StorageConnectorImpl) CreateVideo(ctx context.Context, data temp.S3BackedData, accountId uuid.UUID) (*entity.MemeCreateResult, error) { + s3path, err := data.GetS3Path(ctx) + if err != nil { + return nil, fmt.Errorf("storageService: get s3 path for video failed: %w", err) + } + + response, err := s.cl.CreateMeme(ctx, &v1.CreateMemeRequest{ + AccountId: accountId.String(), + Video: &v1.MediaDataDto{S3Path: &s3path}, + }) + if err != nil { + return nil, fmt.Errorf("storageService: create video meme failed: %w", err) + } + + memeId, err := uuid.Parse(response.Result.GetId()) + if err != nil { + return nil, fmt.Errorf("storageService: failed to parse created meme id: %s: %w", response.Result.GetId(), err) + } + return &entity.MemeCreateResult{ + Id: memeId, + Text: response.Result.GetOcrResult(), + DuplicateStatus: response.Status.String(), + Tags: response.Result.GetTags(), }, nil } -func NewStorageConnector(config *conf.StorageServiceConfig) (StorageConnector, error) { - cl := v1connect.NewSearchServiceClient(http.DefaultClient, config.Uri) +func NewStorageConnector(config *conf.Config) (StorageConnector, error) { + cl := v1connect.NewSearchServiceClient(http.DefaultClient, config.StorageService.Uri) + tagsCl := v1connect.NewTagsServiceClient(http.DefaultClient, config.StorageService.Uri) return &StorageConnectorImpl{ - cl: cl, - log: slog.With("service", "StorageConnectorService"), + cl: cl, + tagsCl: tagsCl, + log: slog.With("service", "StorageConnectorService"), }, nil } diff --git a/telegram-service/service/Telegram.go b/telegram-service/service/Telegram.go index c2241e2..d15d3fa 100644 --- a/telegram-service/service/Telegram.go +++ b/telegram-service/service/Telegram.go @@ -2,118 +2,194 @@ package service import ( "context" + "encoding/json" + "errors" "log/slog" + "net/http" tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api/v5" "github.com/weoses/memelo/telegram-service/conf" ) type TelegramBotService interface { - StartBot(ctx context.Context) + Handler() http.Handler + RegisterWebhook() error + RemoveWebhook() error } type TelegramBotServiceImpl struct { - inline InlineHandlerService - message MessageHandlerService - bot *tgbotapi.BotAPI - log *slog.Logger + inline InlineHandlerService + message MessageHandlerService + bot *tgbotapi.BotAPI + webhookCfg *conf.WebhookConfig + log *slog.Logger + cancel context.CancelFunc } -func (srv *TelegramBotServiceImpl) StartBot(ctx context.Context) { - srv.log.InfoContext(ctx, "Authorized", "account", srv.bot.Self.UserName) +func (s *TelegramBotServiceImpl) Handler() http.Handler { + ctx, cancel := context.WithCancel(context.Background()) + s.cancel = cancel + updates := make(chan tgbotapi.Update, 100) + go s.dispatchUpdates(ctx, updates) + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + var update tgbotapi.Update + if err := json.NewDecoder(r.Body).Decode(&update); err != nil { + http.Error(w, "bad request", http.StatusBadRequest) + return + } + updates <- update + }) +} - u := tgbotapi.NewUpdate(0) - u.Timeout = 60 +func (s *TelegramBotServiceImpl) RegisterWebhook() error { + wh, err := tgbotapi.NewWebhook(s.webhookCfg.ExternalUrl) + if err != nil { + return err + } + _, err = s.bot.Request(wh) + return err +} - updates := srv.bot.GetUpdatesChan(u) +func (s *TelegramBotServiceImpl) RemoveWebhook() error { + if s.cancel != nil { + s.cancel() + } + _, err := s.bot.Request(tgbotapi.DeleteWebhookConfig{}) + return err +} +func (s *TelegramBotServiceImpl) dispatchUpdates(ctx context.Context, updates <-chan tgbotapi.Update) { + s.log.InfoContext(ctx, "Authorized", "account", s.bot.Self.UserName) for { select { case <-ctx.Done(): - srv.bot.StopReceivingUpdates() return case update := <-updates: if update.InlineQuery != nil { - srv.handleInlineRequest(ctx, &update) + s.handleInlineRequest(ctx, &update) } else if update.Message != nil { - srv.handleMessage(ctx, &update) + if update.Message.IsCommand() { + s.handleCommand(ctx, update.Message) + } else { + s.handleMessage(ctx, update.Message) + } } else if update.ChosenInlineResult != nil { - srv.handleChosenResult(ctx, &update) + s.handleChosenResult(ctx, &update) } } } } -func (srv *TelegramBotServiceImpl) handleMessage(ctx context.Context, update *tgbotapi.Update) { - srv.log.InfoContext(ctx, "Bot message request") - srv.log.DebugContext(ctx, "Bot message request details", - "request", update.Message) +func (s *TelegramBotServiceImpl) handleCommand(ctx context.Context, requestMessage *tgbotapi.Message) { + s.log.InfoContext(ctx, "Bot message request") + s.log.DebugContext(ctx, "Bot message request details", + "request", requestMessage) - answer, err := srv.message.ProcessMessage(ctx, update.Message) - if err != nil { - srv.log.ErrorContext(ctx, "Failed to process message", "error", err) - message := tgbotapi.NewMessage(update.Message.Chat.ID, err.Error()) - message.ReplyToMessageID = update.Message.MessageID - _, err = srv.bot.Send(message) + if requestMessage.Command() == "addtag" { + responseData, err := s.message.ProcessCommandAddTag(ctx, requestMessage) + if err != nil { + s.sendCommonErrorMessage(ctx, requestMessage, err) + } + + err = s.sendCommonResponseMessage(ctx, requestMessage, responseData) if err != nil { - srv.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + s.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + s.sendCommonErrorMessage(ctx, requestMessage, err) + return } + + } +} + +func (s *TelegramBotServiceImpl) handleMessage(ctx context.Context, requestMessage *tgbotapi.Message) { + s.log.InfoContext(ctx, "Bot message request") + s.log.DebugContext(ctx, "Bot message request details", + "request", requestMessage) + + var answer *MessageHandlerResponse + var err error + if requestMessage.Video != nil { + answer, err = s.message.ProcessVideoMessage(ctx, requestMessage) + } else if requestMessage.Photo != nil { + answer, err = s.message.ProcessImageMessage(ctx, requestMessage) + } else { + err = errors.New("message dont contain any media temp") + } + + if err != nil { + s.log.ErrorContext(ctx, "Failed to process message", "error", err) + s.sendCommonErrorMessage(ctx, requestMessage, err) return } - message := tgbotapi.NewMessage(update.Message.Chat.ID, answer.Message) - message.ReplyToMessageID = update.Message.MessageID - message.ParseMode = answer.ParseMode - _, err = srv.bot.Send(message) + err = s.sendCommonResponseMessage(ctx, requestMessage, answer) if err != nil { - srv.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + s.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + s.sendCommonErrorMessage(ctx, requestMessage, err) return } } -func (srv *TelegramBotServiceImpl) handleInlineRequest(ctx context.Context, update *tgbotapi.Update) { - srv.log.InfoContext(ctx, "Bot inline request:", +func (s *TelegramBotServiceImpl) sendCommonResponseMessage(ctx context.Context, requestMessage *tgbotapi.Message, answer *MessageHandlerResponse) error { + responseMessage := tgbotapi.NewMessage(requestMessage.Chat.ID, answer.Message) + responseMessage.ReplyToMessageID = requestMessage.MessageID + responseMessage.ParseMode = answer.ParseMode + _, err := s.bot.Send(responseMessage) + return err +} + +func (s *TelegramBotServiceImpl) sendCommonErrorMessage(ctx context.Context, requestMessage *tgbotapi.Message, err error) { + errorResponseMessage := tgbotapi.NewMessage(requestMessage.Chat.ID, err.Error()) + errorResponseMessage.ReplyToMessageID = requestMessage.MessageID + _, err = s.bot.Send(errorResponseMessage) + if err != nil { + s.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + } +} + +func (s *TelegramBotServiceImpl) handleInlineRequest(ctx context.Context, update *tgbotapi.Update) { + s.log.InfoContext(ctx, "Bot inline request:", "query", update.InlineQuery.Query) - srv.log.DebugContext(ctx, "Bot inline request details:", - "data", update.InlineQuery) + s.log.DebugContext(ctx, "Bot inline request details:", + "temp", update.InlineQuery) - inlineResponse, err := srv.inline.ProcessQuery(ctx, update.InlineQuery) + inlineResponse, err := s.inline.ProcessQuery(ctx, update.InlineQuery) if err != nil { - srv.log.ErrorContext(ctx, "failed to process inline query:", "error", err) + s.log.ErrorContext(ctx, "failed to process inline query:", "error", err) return } - srv.log.DebugContext(ctx, "Bot inline response details:", - "data", inlineResponse) + s.log.DebugContext(ctx, "Bot inline response details:", + "temp", inlineResponse) - _, err = srv.bot.Request(inlineResponse) + _, err = s.bot.Request(inlineResponse) if err != nil { - srv.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) + s.log.ErrorContext(ctx, "Failed to send message to bot", "error", err) return } } -func (srv *TelegramBotServiceImpl) handleChosenResult(ctx context.Context, u *tgbotapi.Update) { - srv.log.InfoContext(ctx, "Bot choose result:", +func (s *TelegramBotServiceImpl) handleChosenResult(ctx context.Context, u *tgbotapi.Update) { + s.log.InfoContext(ctx, "Bot choose result:", "query", u.ChosenInlineResult.Query, "resultId", u.ChosenInlineResult.ResultID) - srv.log.DebugContext(ctx, "Bot chosen result details:", - "data", u.ChosenInlineResult) + s.log.DebugContext(ctx, "Bot chosen result details:", + "temp", u.ChosenInlineResult) - err := srv.inline.ProcessChosenInlineQuery(ctx, u.ChosenInlineResult) + err := s.inline.ProcessChosenInlineQuery(ctx, u.ChosenInlineResult) if err != nil { - srv.log.ErrorContext(ctx, "Failed to process chosen result", "error", err) + s.log.ErrorContext(ctx, "Failed to process chosen result", "error", err) } } -func NewTelegramBot(config *conf.TelegramConfig) *tgbotapi.BotAPI { - bot, err := tgbotapi.NewBotAPI(config.Token) +func NewTelegramBot(config *conf.Config) *tgbotapi.BotAPI { + bot, err := tgbotapi.NewBotAPI(config.Telegram.Token) if err != nil { slog.ErrorContext(context.Background(), "Bot api creation failed", "error", err) panic("bot api creation failed") } - bot.Debug = config.Debug + bot.Debug = config.Telegram.Debug return bot } @@ -121,11 +197,13 @@ func NewTelegramBotService( bot *tgbotapi.BotAPI, inline InlineHandlerService, message MessageHandlerService, + cfg *conf.Config, ) TelegramBotService { return &TelegramBotServiceImpl{ - bot: bot, - inline: inline, - message: message, - log: slog.With("service", "TelegramBotService"), + bot: bot, + inline: inline, + message: message, + webhookCfg: cfg.Webhook, + log: slog.With("service", "TelegramBotService"), } } diff --git a/telegram-service/service/TelegramFileResolver.go b/telegram-service/service/TelegramFileResolver.go index f312e8b..75f7949 100644 --- a/telegram-service/service/TelegramFileResolver.go +++ b/telegram-service/service/TelegramFileResolver.go @@ -2,11 +2,8 @@ package service import ( "context" - "errors" "fmt" - "io" "log/slog" - "net/http" ) type BotFileGetter interface { @@ -14,7 +11,7 @@ type BotFileGetter interface { } type TelegramFileResolverService interface { - GetFile(ctx context.Context, fileID string) ([]byte, error) + GetFileURL(ctx context.Context, fileID string) (string, error) } type TelegramFileResolverServiceImpl struct { @@ -22,28 +19,12 @@ type TelegramFileResolverServiceImpl struct { log *slog.Logger } -// GetFile implements TelegramFileResolverService. -func (t *TelegramFileResolverServiceImpl) GetFile(ctx context.Context, fileID string) ([]byte, error) { +func (t *TelegramFileResolverServiceImpl) GetFileURL(ctx context.Context, fileID string) (string, error) { url, err := t.fileGetter.GetFileDirectURL(fileID) if err != nil { - return nil, fmt.Errorf("TelegramFileResolverService: GetFileDirectURL failed, fileId: %s : %w", fileID, err) + return "", fmt.Errorf("TelegramFileResolverService: GetFileDirectURL failed, fileId: %s : %w", fileID, err) } - - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) - if err != nil { - return nil, fmt.Errorf("TelegramFileResolverService: create request failed, url: %s : %w", url, err) - } - - resp, err := http.DefaultClient.Do(req) - if err != nil { - return nil, fmt.Errorf("TelegramFileResolverService: download file by got url failed, url: %s : %w", url, err) - } - - if resp.StatusCode >= 400 { - return nil, errors.New("TelegramFileResolverService: download file by got url failed url: %s : non 2xx status code") - } - - return io.ReadAll(resp.Body) + return url, nil } func NewTelegramFileResolverService(fileGetter BotFileGetter) TelegramFileResolverService { diff --git a/telegram-service/service/UserAccountService.go b/telegram-service/service/UserAccountService.go index 59df7cb..f61b9c8 100644 --- a/telegram-service/service/UserAccountService.go +++ b/telegram-service/service/UserAccountService.go @@ -80,17 +80,17 @@ func runMigrations(pool *pgxpool.Pool) error { return nil } -func NewUserAccountService(config *conf.PostgresConfig, userAccountConfig *conf.UserAccountConfig) (UserAccountService, error) { +func NewUserAccountService(config *conf.Config) (UserAccountService, error) { logger := slog.With("service", "UserAccountService") - if userAccountConfig.StaticUuid != "" { + if config.UserAccount.StaticUuid != "" { return &UserAccountServiceStaticImpl{ - staticUuid: uuid.MustParse(userAccountConfig.StaticUuid), + staticUuid: uuid.MustParse(config.UserAccount.StaticUuid), log: logger, }, nil } - pool, err := pgxpool.New(context.Background(), config.DSN) + pool, err := pgxpool.New(context.Background(), config.Postgres.DSN) if err != nil { return nil, fmt.Errorf("pgxpool.New: %w", err) } diff --git a/telegram-service/storage/TmpDataService.go b/telegram-service/storage/TmpDataService.go new file mode 100644 index 0000000..884422b --- /dev/null +++ b/telegram-service/storage/TmpDataService.go @@ -0,0 +1,17 @@ +package storage + +import ( + commonservice "github.com/weoses/memelo/common/service" + commonstorage "github.com/weoses/memelo/common/storage" + "github.com/weoses/memelo/telegram-service/conf" +) + +type TmpDataServiceS3OperationsAdapter commonstorage.S3OperationsAdapter + +func NewTmpDataServiceS3Adapter(cfg *conf.Config) (TmpDataServiceS3OperationsAdapter, error) { + return commonstorage.NewS3OperationsAdapter(cfg.TempStorage) +} + +func NewTmpDataService(adapter TmpDataServiceS3OperationsAdapter) (commonservice.TmpDataService, error) { + return commonservice.NewTmpDataS3Service(adapter) +} diff --git a/tests/e2e/search_test.go b/tests/e2e/search_test.go index 09e28f0..1db05b2 100644 --- a/tests/e2e/search_test.go +++ b/tests/e2e/search_test.go @@ -2,6 +2,7 @@ package e2e_test import ( "context" + "net/http" "os" "testing" @@ -9,6 +10,7 @@ import ( ) func TestCreateMeme(t *testing.T) { + defer cleanup() imageData, err := os.ReadFile("images/test-pic-cat.jpeg") if err != nil { t.Fatalf("failed to read test image: %v", err) @@ -16,7 +18,9 @@ func TestCreateMeme(t *testing.T) { resp, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ AccountId: testAccountId, - RawImage: imageData, + Image: &v1.MediaDataDto{ + Data: imageData, + }, }) if err != nil { t.Fatalf("CreateMeme failed: %v", err) @@ -31,6 +35,7 @@ func TestCreateMeme(t *testing.T) { } func TestCreateDuplicate(t *testing.T) { + defer cleanup() imageData, err := os.ReadFile("images/test-pic-cat.jpeg") if err != nil { t.Fatalf("failed to read test image: %v", err) @@ -38,7 +43,9 @@ func TestCreateDuplicate(t *testing.T) { resp, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ AccountId: testAccountId, - RawImage: imageData, + Image: &v1.MediaDataDto{ + Data: imageData, + }, }) if err != nil { t.Fatalf("CreateMeme failed: %v", err) @@ -49,7 +56,9 @@ func TestCreateDuplicate(t *testing.T) { resp2, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ AccountId: testAccountId, - RawImage: imageData, + Image: &v1.MediaDataDto{ + Data: imageData, + }, }) if err != nil { t.Fatalf("second CreateMeme failed: %v", err) @@ -68,6 +77,7 @@ func TestCreateDuplicate(t *testing.T) { } func TestSearchMeme_Simple(t *testing.T) { + defer cleanup() imageData, err := os.ReadFile("images/test-pic-cat.jpeg") if err != nil { t.Fatalf("failed to read test image: %v", err) @@ -75,7 +85,9 @@ func TestSearchMeme_Simple(t *testing.T) { respCreate, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ AccountId: testAccountId, - RawImage: imageData, + Image: &v1.MediaDataDto{ + Data: imageData, + }, }) if err != nil { t.Fatalf("CreateMeme failed: %v", err) @@ -110,6 +122,7 @@ func TestSearchMeme_Simple(t *testing.T) { } func TestSearchMeme_All(t *testing.T) { + defer cleanup() imageData, err := os.ReadFile("images/test-pic-cat.jpeg") if err != nil { t.Fatalf("failed to read test image: %v", err) @@ -117,7 +130,9 @@ func TestSearchMeme_All(t *testing.T) { respCreate, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ AccountId: testAccountId, - RawImage: imageData, + Image: &v1.MediaDataDto{ + Data: imageData, + }, }) if err != nil { t.Fatalf("CreateMeme failed: %v", err) @@ -147,6 +162,240 @@ func TestSearchMeme_All(t *testing.T) { } } +func TestSearchMeme_ById(t *testing.T) { + defer cleanup() + imageData, err := os.ReadFile("images/test-pic-cat.jpeg") + if err != nil { + t.Fatalf("failed to read test image: %v", err) + } + + respCreate, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ + AccountId: testAccountId, + Image: &v1.MediaDataDto{ + Data: imageData, + }, + }) + if err != nil { + t.Fatalf("CreateMeme failed: %v", err) + } + id := respCreate.GetResult().GetId() + if id == "" { + t.Fatal("expected non-empty ID in CreateMeme response") + } + + respSearch, err := searchClient.SearchMeme(context.Background(), &v1.SearchMemeRequest{ + AccountId: testAccountId, + Query: id, + }) + if err != nil { + t.Fatalf("SearchMeme by ID failed: %v", err) + } + if len(respSearch.Results) != 1 { + t.Fatalf("expected exactly 1 result, got %d", len(respSearch.Results)) + } + if respSearch.Results[0].Id != id { + t.Fatalf("expected result ID %s, got %s", id, respSearch.Results[0].Id) + } + if respSearch.SearcherName != "id_searcher" { + t.Fatalf("expected id_searcher, got %s", respSearch.SearcherName) + } +} + +func TestSearchMeme_ById_ValidImageLinks(t *testing.T) { + defer cleanup() + imageData, err := os.ReadFile("images/test-pic-cat.jpeg") + if err != nil { + t.Fatalf("failed to read test image: %v", err) + } + + respCreate, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ + AccountId: testAccountId, + Image: &v1.MediaDataDto{ + Data: imageData, + }, + }) + if err != nil { + t.Fatalf("CreateMeme failed: %v", err) + } + id := respCreate.GetResult().GetId() + if id == "" { + t.Fatal("expected non-empty ID in CreateMeme response") + } + + respSearch, err := searchClient.SearchMeme(context.Background(), &v1.SearchMemeRequest{ + AccountId: testAccountId, + Query: id, + }) + if err != nil { + t.Fatalf("SearchMeme by ID failed: %v", err) + } + if len(respSearch.Results) != 1 { + t.Fatalf("expected exactly 1 result, got %d", len(respSearch.Results)) + } + + meme := respSearch.Results[0] + + original := meme.GetMediaOriginal() + if original == nil { + t.Fatal("expected non-nil ImageOriginal") + } + if original.GetUrl() == "" { + t.Fatal("expected non-empty ImageOriginal URL") + } + + thumbnail := meme.GetImageThumbnail() + if thumbnail == nil { + t.Fatal("expected non-nil ImageThumbnail") + } + if thumbnail.GetUrl() == "" { + t.Fatal("expected non-empty ImageThumbnail URL") + } + + if original.GetUrl() == thumbnail.GetUrl() { + t.Fatalf("expected original and thumbnail URLs to differ, both are %q", original.GetUrl()) + } + + for _, tc := range []struct { + name string + url string + }{ + {"original", original.GetUrl()}, + {"thumbnail", thumbnail.GetUrl()}, + } { + resp, err := http.Get(tc.url) + if err != nil { + t.Fatalf("%s URL %q is not accessible: %v", tc.name, tc.url, err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("%s URL %q returned status %d", tc.name, tc.url, resp.StatusCode) + } + } +} + +func TestSearchMeme_EmbeddingSearch(t *testing.T) { + defer cleanup() + imageData, err := os.ReadFile("images/test-pic-cat.jpeg") + if err != nil { + t.Fatalf("failed to read test image: %v", err) + } + + respCreate, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ + AccountId: testAccountId, + Image: &v1.MediaDataDto{ + Data: imageData, + }, + }) + if err != nil { + t.Fatalf("CreateMeme failed: %v", err) + } + if respCreate.GetResult().GetId() == "" { + t.Fatal("expected non-empty ID in CreateMeme response") + } + + respSearch, err := searchClient.SearchMeme(context.Background(), &v1.SearchMemeRequest{ + AccountId: testAccountId, + Query: "cat", + }) + if err != nil { + t.Fatalf("SearchMeme by embedding failed: %v", err) + } + if len(respSearch.Results) == 0 { + t.Fatal("expected at least one result") + } + if respSearch.SearcherName != "text_embedding_searcher" { + t.Fatalf("expected text_embedding_searcher, got %s", respSearch.SearcherName) + } +} + +func TestCreateVideoMemeV1(t *testing.T) { + createVideoTestInternal(t, "videos/video-test-v1-steven.mp4") +} + +func TestCreateVideoMemeV2(t *testing.T) { + createVideoTestInternal(t, "videos/video-test-v2-static-text.mp4") +} + +func TestCreateVideoMemeV3(t *testing.T) { + createVideoTestInternal(t, "videos/video-test-v3-language.mp4") +} + +func createVideoTestInternal(t *testing.T, file string) { + defer cleanup() + videoData, err := os.ReadFile(file) + if err != nil { + t.Fatalf("failed to read test video: %v", err) + } + + resp, err := searchClient.CreateMeme(context.Background(), &v1.CreateMemeRequest{ + AccountId: testAccountId, + Video: &v1.MediaDataDto{ + Data: videoData, + }, + }) + if err != nil { + t.Fatalf("CreateMeme (video) failed: %v", err) + } + if resp.GetResult().GetId() == "" { + t.Fatal("expected non-empty ID in CreateMeme response") + } + if resp.GetStatus() != v1.CreateMemeStatus_STATUS_NEW { + t.Fatalf("expected STATUS_NEW, got %v", resp.GetStatus()) + } + + println(resp.GetResult().GetOcrResult()) + + id := resp.GetResult().GetId() + respSearch, err := searchClient.SearchMeme(context.Background(), &v1.SearchMemeRequest{ + AccountId: testAccountId, + Query: id, + }) + if err != nil { + t.Fatalf("SearchMeme by ID failed: %v", err) + } + if len(respSearch.Results) != 1 { + t.Fatalf("expected exactly 1 result, got %d", len(respSearch.Results)) + } + + meme := respSearch.Results[0] + if meme.Id != id { + t.Fatalf("expected result ID %s, got %s", id, meme.Id) + } + + original := meme.GetMediaOriginal() + if original == nil { + t.Fatal("expected non-nil MediaOriginal") + } + if original.GetUrl() == "" { + t.Fatal("expected non-empty MediaOriginal URL") + } + + thumbnail := meme.GetImageThumbnail() + if thumbnail == nil { + t.Fatal("expected non-nil ImageThumbnail") + } + if thumbnail.GetUrl() == "" { + t.Fatal("expected non-empty ImageThumbnail URL") + } + + for _, tc := range []struct { + name string + url string + }{ + {"original", original.GetUrl()}, + {"thumbnail", thumbnail.GetUrl()}, + } { + resp, err := http.Get(tc.url) + if err != nil { + t.Fatalf("%s URL %q is not accessible: %v", tc.name, tc.url, err) + } + resp.Body.Close() + if resp.StatusCode != http.StatusOK { + t.Fatalf("%s URL %q returned status %d", tc.name, tc.url, resp.StatusCode) + } + } +} + func TestSearchMeme_Empty(t *testing.T) { resp, err := searchClient.SearchMeme(context.Background(), &v1.SearchMemeRequest{ AccountId: testAccountId, @@ -158,4 +407,7 @@ func TestSearchMeme_Empty(t *testing.T) { if resp == nil { t.Fatal("expected non-nil response from SearchMeme") } + if len(resp.Results) > 0 { + t.Fatal("expected zero results") + } } diff --git a/tests/e2e/tags_test.go b/tests/e2e/tags_test.go index 5675c87..4486aab 100644 --- a/tests/e2e/tags_test.go +++ b/tests/e2e/tags_test.go @@ -16,6 +16,7 @@ func uniqueTagName() string { } func TestCreateTag(t *testing.T) { + defer cleanup() resp, err := tagsClient.CreateTag(context.Background(), &v1.CreateTagRequest{ AccountId: testAccountId, Tag: uniqueTagName(), @@ -30,18 +31,38 @@ func TestCreateTag(t *testing.T) { } func TestListTags(t *testing.T) { - resp, err := tagsClient.ListTags(context.Background(), &v1.ListTagRequest{ + defer cleanup() + + createResp, err := tagsClient.CreateTag(context.Background(), &v1.CreateTagRequest{ + AccountId: testAccountId, + Tag: uniqueTagName(), + Description: "e2e test tag", + }) + if err != nil { + t.Fatalf("CreateTag failed: %v", err) + } + + resp2, err := tagsClient.ListTags(context.Background(), &v1.ListTagRequest{ AccountId: testAccountId, }) if err != nil { t.Fatalf("ListTags failed: %v", err) } - if resp.GetResult() == nil { + if resp2.GetResult() == nil { t.Fatal("expected non-nil result slice from ListTags") } + + if len(resp2.Result) != 1 { + t.Fatal("expected 1 result from ListTags") + } + + if createResp.Result.Id != resp2.Result[0].Id { + t.Fatal("expected same ID in ListTags response") + } } func TestDeleteTag(t *testing.T) { + defer cleanup() name := uniqueTagName() created, err := tagsClient.CreateTag(context.Background(), &v1.CreateTagRequest{ @@ -76,6 +97,7 @@ func TestDeleteTag(t *testing.T) { } func TestCreateTag_Duplicate(t *testing.T) { + defer cleanup() name := uniqueTagName() req := &v1.CreateTagRequest{ AccountId: testAccountId, diff --git a/tests/e2e/videos/video-test-v1-steven.mp4 b/tests/e2e/videos/video-test-v1-steven.mp4 new file mode 100644 index 0000000..7435ca3 Binary files /dev/null and b/tests/e2e/videos/video-test-v1-steven.mp4 differ diff --git a/tests/e2e/videos/video-test-v2-static-text.mp4 b/tests/e2e/videos/video-test-v2-static-text.mp4 new file mode 100644 index 0000000..d34f05b Binary files /dev/null and b/tests/e2e/videos/video-test-v2-static-text.mp4 differ diff --git a/tests/e2e/videos/video-test-v3-language.mp4 b/tests/e2e/videos/video-test-v3-language.mp4 new file mode 100644 index 0000000..f952739 Binary files /dev/null and b/tests/e2e/videos/video-test-v3-language.mp4 differ