Skip to content

Commit

Permalink
feat(rerankers): Add new backend, support jina rerankers API
Browse files Browse the repository at this point in the history
Signed-off-by: Ettore Di Giacinto <[email protected]>
  • Loading branch information
mudler committed Apr 24, 2024
1 parent d65214a commit 1e6ec05
Show file tree
Hide file tree
Showing 19 changed files with 358 additions and 5 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/test-extra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,37 @@ jobs:
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers
make --jobs=5 --output-sync=target -C backend/python/sentencetransformers test
tests-rerankers:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v4
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential ffmpeg
curl https://repo.anaconda.com/pkgs/misc/gpgkeys/anaconda.asc | gpg --dearmor > conda.gpg && \
sudo install -o root -g root -m 644 conda.gpg /usr/share/keyrings/conda-archive-keyring.gpg && \
gpg --keyring /usr/share/keyrings/conda-archive-keyring.gpg --no-default-keyring --fingerprint 34161F5BF5EB1D4BFBBB8F0A8AEB4F8B29D82806 && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" > /etc/apt/sources.list.d/conda.list' && \
sudo /bin/bash -c 'echo "deb [arch=amd64 signed-by=/usr/share/keyrings/conda-archive-keyring.gpg] https://repo.anaconda.com/pkgs/misc/debrepo/conda stable main" | tee -a /etc/apt/sources.list.d/conda.list' && \
sudo apt-get update && \
sudo apt-get install -y conda
sudo apt-get install -y ca-certificates cmake curl patch python3-pip
sudo apt-get install -y libopencv-dev
pip install --user grpcio-tools
sudo rm -rfv /usr/bin/conda || true
- name: Test rerankers
run: |
export PATH=$PATH:/opt/conda/bin
make --jobs=5 --output-sync=target -C backend/python/rerankers
make --jobs=5 --output-sync=target -C backend/python/rerankers test
tests-diffusers:
runs-on: ubuntu-latest
steps:
Expand Down
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ ARG TARGETVARIANT

ENV BUILD_TYPE=${BUILD_TYPE}
ENV DEBIAN_FRONTEND=noninteractive
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"
ENV EXTERNAL_GRPC_BACKENDS="coqui:/build/backend/python/coqui/run.sh,huggingface-embeddings:/build/backend/python/sentencetransformers/run.sh,petals:/build/backend/python/petals/run.sh,transformers:/build/backend/python/transformers/run.sh,sentencetransformers:/build/backend/python/sentencetransformers/run.sh,rerankers:/build/backend/python/rerankers/run.sh,autogptq:/build/backend/python/autogptq/run.sh,bark:/build/backend/python/bark/run.sh,diffusers:/build/backend/python/diffusers/run.sh,exllama:/build/backend/python/exllama/run.sh,vall-e-x:/build/backend/python/vall-e-x/run.sh,vllm:/build/backend/python/vllm/run.sh,mamba:/build/backend/python/mamba/run.sh,exllama2:/build/backend/python/exllama2/run.sh,transformers-musicgen:/build/backend/python/transformers-musicgen/run.sh,parler-tts:/build/backend/python/parler-tts/run.sh"

ARG GO_TAGS="stablediffusion tinydream tts"

Expand Down Expand Up @@ -259,6 +259,9 @@ RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/sentencetransformers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/rerankers \
; fi
RUN if [ "${IMAGE_TYPE}" = "extras" ]; then \
make -C backend/python/transformers \
; fi
Expand Down
13 changes: 11 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,10 @@ protogen-go-clean:
$(RM) bin/*

.PHONY: protogen-python
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen
protogen-python: autogptq-protogen bark-protogen coqui-protogen diffusers-protogen exllama-protogen exllama2-protogen mamba-protogen petals-protogen rerankers-protogen sentencetransformers-protogen transformers-protogen parler-tts-protogen transformers-musicgen-protogen vall-e-x-protogen vllm-protogen

.PHONY: protogen-python-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean
protogen-python-clean: autogptq-protogen-clean bark-protogen-clean coqui-protogen-clean diffusers-protogen-clean exllama-protogen-clean exllama2-protogen-clean mamba-protogen-clean petals-protogen-clean sentencetransformers-protogen-clean rerankers-protogen-clean transformers-protogen-clean transformers-musicgen-protogen-clean parler-tts-protogen-clean vall-e-x-protogen-clean vllm-protogen-clean

.PHONY: autogptq-protogen
autogptq-protogen:
Expand Down Expand Up @@ -506,6 +506,14 @@ petals-protogen:
petals-protogen-clean:
$(MAKE) -C backend/python/petals protogen-clean

.PHONY: rerankers-protogen
rerankers-protogen:
$(MAKE) -C backend/python/rerankers protogen

.PHONY: rerankers-protogen-clean
rerankers-protogen-clean:
$(MAKE) -C backend/python/rerankers protogen-clean

.PHONY: sentencetransformers-protogen
sentencetransformers-protogen:
$(MAKE) -C backend/python/sentencetransformers protogen
Expand Down Expand Up @@ -564,6 +572,7 @@ prepare-extra-conda-environments: protogen-python
$(MAKE) -C backend/python/vllm
$(MAKE) -C backend/python/mamba
$(MAKE) -C backend/python/sentencetransformers
$(MAKE) -C backend/python/rerankers
$(MAKE) -C backend/python/transformers
$(MAKE) -C backend/python/transformers-musicgen
$(MAKE) -C backend/python/parler-tts
Expand Down
27 changes: 27 additions & 0 deletions aio/cpu/rerank.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder

usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'
2 changes: 1 addition & 1 deletion aio/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ detect_gpu
detect_gpu_size

PROFILE="${PROFILE:-$GPU_SIZE}" # default to cpu
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"
export MODELS="${MODELS:-/aio/${PROFILE}/embeddings.yaml,/aio/${PROFILE}/rerank.yaml,/aio/${PROFILE}/text-to-speech.yaml,/aio/${PROFILE}/image-gen.yaml,/aio/${PROFILE}/text-to-text.yaml,/aio/${PROFILE}/speech-to-text.yaml,/aio/${PROFILE}/vision.yaml}"

check_vars

Expand Down
27 changes: 27 additions & 0 deletions aio/gpu-8g/rerank.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder

usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'
27 changes: 27 additions & 0 deletions aio/intel/rerank.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: jina-reranker-v1-base-en
backend: rerankers
parameters:
model: cross-encoder

usage: |
You can test this model with curl like this:
curl http://localhost:8080/v1/rerank \
-H "Content-Type: application/json" \
-d '{
"model": "jina-reranker-v1-base-en",
"query": "Organic skincare products for sensitive skin",
"documents": [
"Eco-friendly kitchenware for modern homes",
"Biodegradable cleaning supplies for eco-conscious consumers",
"Organic cotton baby clothes for sensitive skin",
"Natural organic skincare range for sensitive skin",
"Tech gadgets for smart homes: 2024 edition",
"Sustainable gardening tools and compost solutions",
"Sensitive skin-friendly facial cleansers and toners",
"Organic food wraps and storage solutions",
"All-natural pet food for dogs with allergies",
"Yoga mats made from recycled materials"
],
"top_n": 3
}'
24 changes: 24 additions & 0 deletions backend/backend.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,30 @@ service Backend {
rpc StoresDelete(StoresDeleteOptions) returns (Result) {}
rpc StoresGet(StoresGetOptions) returns (StoresGetResult) {}
rpc StoresFind(StoresFindOptions) returns (StoresFindResult) {}

rpc Rerank(RerankRequest) returns (RerankResult) {}
}

message RerankRequest {
string query = 1;
repeated string documents = 2;
int32 top_n = 3;
}

message RerankResult {
Usage usage = 1;
repeated DocumentResult results = 2;
}

message Usage {
int32 total_tokens = 1;
int32 prompt_tokens = 2;
}

message DocumentResult {
int32 index = 1;
string text = 2;
float relevance_score = 3;
}

message StoresKey {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,4 +120,6 @@ dependencies:
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
- rerankers[transformers]
- pydantic
prefix: /opt/conda/envs/transformers
2 changes: 2 additions & 0 deletions backend/python/common-env/transformers/transformers-rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,4 +108,6 @@ dependencies:
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
- rerankers[transformers]
- pydantic
prefix: /opt/conda/envs/transformers
4 changes: 3 additions & 1 deletion backend/python/common-env/transformers/transformers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,5 +111,7 @@ dependencies:
- vllm>=0.4.0
- transformers>=4.38.2 # Updated Version
- transformers_stream_generator==0.0.5
- xformers==0.0.23.post1
- xformers==0.0.23.post1
- rerankers[transformers]
- pydantic
prefix: /opt/conda/envs/transformers
39 changes: 39 additions & 0 deletions core/backend/rerank.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package backend

import (
"context"
"fmt"

"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
model "github.com/go-skynet/LocalAI/pkg/model"
)

func Rerank(backend, modelFile string, request *proto.RerankRequest, loader *model.ModelLoader, appConfig *config.ApplicationConfig, backendConfig config.BackendConfig) (*proto.RerankResult, error) {
bb := backend
if bb == "" {
bb = model.PiperBackend
}

grpcOpts := gRPCModelOpts(backendConfig)

opts := modelOpts(config.BackendConfig{}, appConfig, []model.Option{
model.WithBackendString(bb),
model.WithModel(modelFile),
model.WithContext(appConfig.Context),
model.WithAssetDir(appConfig.AssetsDestination),
model.WithLoadGRPCLoadModelOpts(grpcOpts),
})
rerankModel, err := loader.BackendLoader(opts...)
if err != nil {
return nil, err
}

if rerankModel == nil {
return nil, fmt.Errorf("could not load piper model")
}

res, err := rerankModel.Rerank(context.Background(), request)

return res, err
}
1 change: 1 addition & 0 deletions core/http/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ func App(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *confi
routes.RegisterOpenAIRoutes(app, cl, ml, appConfig, auth)
routes.RegisterPagesRoutes(app, cl, ml, appConfig, auth)
routes.RegisterUIRoutes(app, cl, ml, appConfig, galleryService, auth)
routes.RegisterJINARoutes(app, cl, ml, appConfig, auth)

// Define a custom 404 handler
// Note: keep this at the bottom!
Expand Down
84 changes: 84 additions & 0 deletions core/http/endpoints/jina/rerank.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package jina

import (
"github.com/go-skynet/LocalAI/core/backend"
"github.com/go-skynet/LocalAI/core/config"

fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
"github.com/go-skynet/LocalAI/core/schema"
"github.com/go-skynet/LocalAI/pkg/grpc/proto"
"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
"github.com/rs/zerolog/log"
)

func JINARerankEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfig *config.ApplicationConfig) func(c *fiber.Ctx) error {
return func(c *fiber.Ctx) error {
req := new(schema.JINARerankRequest)
if err := c.BodyParser(req); err != nil {
return c.Status(fiber.StatusBadRequest).JSON(fiber.Map{
"error": "Cannot parse JSON",
})
}

input := new(schema.TTSRequest)

// Get input data from the request body
if err := c.BodyParser(input); err != nil {
return err
}

modelFile, err := fiberContext.ModelFromContext(c, ml, input.Model, false)
if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
}

cfg, err := cl.LoadBackendConfigFileByName(modelFile, appConfig.ModelPath,
config.LoadOptionDebug(appConfig.Debug),
config.LoadOptionThreads(appConfig.Threads),
config.LoadOptionContextSize(appConfig.ContextSize),
config.LoadOptionF16(appConfig.F16),
)

if err != nil {
modelFile = input.Model
log.Warn().Msgf("Model not found in context: %s", input.Model)
} else {
modelFile = cfg.Model
}
log.Debug().Msgf("Request for model: %s", modelFile)

if input.Backend != "" {
cfg.Backend = input.Backend
}

request := &proto.RerankRequest{
Query: req.Query,
TopN: int32(req.TopN),
Documents: req.Documents,
}

results, err := backend.Rerank(cfg.Backend, modelFile, request, ml, appConfig, *cfg)
if err != nil {
return err
}

response := &schema.JINARerankResponse{
Model: req.Model,
}

for _, r := range results.Results {
response.Results = append(response.Results, schema.JINADocumentResult{
Index: int(r.Index),
Document: schema.JINAText{Text: r.Text},
RelevanceScore: float64(r.RelevanceScore),
})
}

response.Usage.TotalTokens = int(results.Usage.TotalTokens)
response.Usage.PromptTokens = int(results.Usage.PromptTokens)

return c.Status(fiber.StatusOK).JSON(response)
}
}
19 changes: 19 additions & 0 deletions core/http/routes/jina.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package routes

import (
"github.com/go-skynet/LocalAI/core/config"
"github.com/go-skynet/LocalAI/core/http/endpoints/jina"

"github.com/go-skynet/LocalAI/pkg/model"
"github.com/gofiber/fiber/v2"
)

func RegisterJINARoutes(app *fiber.App,
cl *config.BackendConfigLoader,
ml *model.ModelLoader,
appConfig *config.ApplicationConfig,
auth func(*fiber.Ctx) error) {

// POST endpoint to mimic the reranking
app.Post("/v1/rerank", jina.JINARerankEndpoint(cl, ml, appConfig))
}
Loading

0 comments on commit 1e6ec05

Please sign in to comment.