Skip to content

Commit

Permalink
Merge pull request #5 from Dynom/ImplementingAListEndpoint
Browse files Browse the repository at this point in the history
Config and endpoints
  • Loading branch information
Dynom authored Jul 3, 2018
2 parents ce050f5 + 44bbedc commit 141c1ae
Show file tree
Hide file tree
Showing 20 changed files with 706 additions and 163 deletions.
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ jobs:
- run: dep ensure -v -vendor-only
- run: go get -v -t -d ./...
- run: go get -u github.com/alecthomas/gometalinter && gometalinter -i
- run: gometalinter -t -D gotype -D errcheck --vendor --cyclo-over=10 --deadline=10m --enable=testify --enable=test --enable=gofmt --enable=unused -e "should have" ./...
- run: gometalinter -t -D errcheck -D gotype -D ineffassig --vendor --cyclo-over=10 --deadline=5m --enable=testify --enable=test --enable=gofmt --enable=megacheck --enable=nakedret -e "Errors unhandled" ./...
- run: go test -test.v -test.cover -test.race -test.coverprofile=coverage.txt -test.covermode=atomic ./...
- run: bash <(curl -s https://codecov.io/bash)
26 changes: 25 additions & 1 deletion Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 18 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,18 @@ out-of-the-box as a library, a webservice or as a set of packages to build your
By default it uses [Jaro-Winkler](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) to calculate similarity.

## As a webservice
@todo

In a nutshell
```bash
curl 'http://localhost:1337/list/domains' -d '{"input": "foomail.com"}'
{"result":"hotmail.com","score":0.9030303030303031}
```

### The path /list/< name >
The name corresponds with a list definition in the config.yml. Using this approach the service can be used for various
types of data. This is both for efficiency (shorter lists to iterate over) and to be more opinionated. when no list by
that name is found, a 404 is returned.


## As a library
You can use the various components that make up TySug individually or as a whole.
Expand Down Expand Up @@ -99,24 +110,23 @@ To help people avoid submitting an incorrect e-mail address, one could try the f

```go
func SuggestAlternative(email string, domains []string) (string, float64) {

i := strings.LastIndex(email, "@")
if i <= 0 || i >= len(email) {
return email, 0
}

// Extracting the local and domain parts
localPart := email[:i]
hostname := email[i+1:]

sug, _ := finder.New(domains)
alternative, score := sug.Find(hostname)
if score > 0.9 {
alternative, score, exact := sug.Find(hostname)

if exact || score > 0.9 {
combined := localPart + "@" + alternative
return combined, score
}

return email, score
}
```
Expand Down
26 changes: 12 additions & 14 deletions algorithm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"testing"

"fmt"
"math"
"strings"

"github.com/Dynom/TySug/finder"
Expand All @@ -14,15 +13,14 @@ import (

const (
defaultTestAlgorithm = `JaroWinkler .7/4`
floatTolerance = 0.000001
)

// Several algorithms to test with.
var algorithms = map[string]finder.Algorithm{
"Ukkonen 1/1/1": func(a, b string) float64 {
return -1 * float64(smetrics.Ukkonen(a, b, 1, 1, 1))
},
"JaroWinkler .7/4": func(a, b string) float64 {
defaultTestAlgorithm: func(a, b string) float64 {
return smetrics.JaroWinkler(a, b, .7, 4)
},
"WagnerFischer 1/1/1": func(a, b string) float64 {
Expand Down Expand Up @@ -82,7 +80,7 @@ func TestAlgorithms(t *testing.T) {
for expectedDomain, emailsToTest := range testData {
for _, domain := range emailsToTest {

bestMatch, score := sug.Find(domain)
bestMatch, score, _ := sug.Find(domain)
if bestMatch != expectedDomain {
t.Logf("Related score: %f", score)
t.Logf("Expected '%s' to result in '%s'. Instead I got: '%s'.", domain, expectedDomain, bestMatch)
Expand All @@ -96,7 +94,7 @@ func TestAlgorithms(t *testing.T) {
func TestNew(t *testing.T) {
expect := "example"
sug, _ := finder.New([]string{expect, "ample"}, finder.WithAlgorithm(algorithms[defaultTestAlgorithm]))
alt, _ := sug.Find("exampel")
alt, _, _ := sug.Find("exampel")

if alt != expect {
t.Errorf("Expected '%s' to be '%s'.", alt, expect)
Expand All @@ -114,14 +112,14 @@ func TestTestExactMatch(t *testing.T) {

for _, td := range cases {
sug, _ := finder.New([]string{"foo", "example", "CaseSensitive", "cASEsENSITIVE"}, finder.WithAlgorithm(algorithms[defaultTestAlgorithm]))
match, score := sug.Find(td.Input)
match, _, exact := sug.Find(td.Input)

if match != td.Expect {
t.Errorf("Expected the input '%s' to result in '%s', however the best match is '%s'", td.Input, td.Expect, match)
}

if math.Abs(1-score) > floatTolerance {
t.Errorf("Expected a score of ~1.0, instead it is: %f", score)
if !exact {
t.Errorf("Expected an exact match, instead I got %t", exact)
}
}
}
Expand All @@ -137,7 +135,7 @@ func TestApproximateMatch(t *testing.T) {

for _, td := range cases {
sug, _ := finder.New([]string{td.Reference}, finder.WithAlgorithm(algorithms[defaultTestAlgorithm]))
match, _ := sug.Find(td.Input)
match, _, _ := sug.Find(td.Input)

if match != td.Reference {
t.Errorf("Expected the input '%s' to result in '%s', however the best match '%s'", td.Input, td.Reference, match)
Expand All @@ -150,19 +148,19 @@ func BenchmarkBasicUsage(b *testing.B) {

b.Run("Direct match", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = sug.Find("foo")
_, _, _ = sug.Find("foo")
}
})

b.Run("Non direct match, low score", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = sug.Find("juice")
_, _, _ = sug.Find("juice")
}
})

b.Run("Non direct match, high score", func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, _ = sug.Find("butterfyl")
_, _, _ = sug.Find("butterfyl")
}
})
}
Expand Down Expand Up @@ -190,9 +188,9 @@ func SuggestAlternative(email string, domains []string) (string, float64) {
hostname := email[i+1:]

sug, _ := finder.New(domains, finder.WithAlgorithm(algorithms[defaultTestAlgorithm]))
alternative, score := sug.Find(strings.ToLower(hostname))
alternative, score, exact := sug.Find(strings.ToLower(hostname))

if score > 0.9 {
if exact || score > 0.9 {
combined := localPart + "@" + alternative
return combined, score
}
Expand Down
118 changes: 118 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
client:
referencesMax: 100
inputLengthMax: 50

CORS:
# Not defining allowedOrigins will use wildcard '*', meaning all clients are allowed.
#allowedOrigins:
# - http://example.org
# - https://api.example.com

server:
listenOn: "0.0.0.0:1337"

references:
domains:
- gmail.com
- yahoo.com
- hotmail.com
- aol.com
- hotmail.co.uk
- hotmail.fr
- msn.com
- yahoo.fr
- wanadoo.fr
- orange.fr
- comcast.net
- yahoo.co.uk
- yahoo.com.br
- yahoo.co.in
- live.com
- rediffmail.com
- free.fr
- gmx.de
- web.de
- yandex.ru
- ymail.com
- libero.it
- outlook.com
- uol.com.br
- bol.com.br
- mail.ru
- cox.net
- hotmail.it
- sbcglobal.net
- sfr.fr
- live.fr
- verizon.net
- live.co.uk
- googlemail.com
- yahoo.es
- ig.com.br
- live.nl
- bigpond.com
- terra.com.br
- yahoo.it
- neuf.fr
- yahoo.de
- alice.it
- rocketmail.com
- att.net
- laposte.net
- facebook.com
- bellsouth.net
- yahoo.in
- hotmail.es
- charter.net
- yahoo.ca
- yahoo.com.au
- rambler.ru
- hotmail.de
- tiscali.it
- shaw.ca
- yahoo.co.jp
- sky.com
- earthlink.net
- optonline.net
- freenet.de
- t-online.de
- aliceadsl.fr
- virgilio.it
- home.nl
- qq.com
- telenet.be
- me.com
- yahoo.com.ar
- tiscali.co.uk
- yahoo.com.mx
- voila.fr
- gmx.net
- mail.com
- planet.nl
- tin.it
- live.it
- ntlworld.com
- arcor.de
- yahoo.co.id
- frontiernet.net
- hetnet.nl
- live.com.au
- yahoo.com.sg
- zonnet.nl
- club-internet.fr
- juno.com
- optusnet.com.au
- blueyonder.co.uk
- bluewin.ch
- skynet.be
- sympatico.ca
- windstream.net
- mac.com
- centurytel.net
- chello.nl
- live.ca
- aim.com
- bigpond.net.au
- hotmail.nl
- ziggo.nl
- live.com
Loading

0 comments on commit 141c1ae

Please sign in to comment.