Selaa lähdekoodia

Just a tiny script.

Frédéric G. MARAND 4 kuukautta sitten
vanhempi
sitoutus
202dcf46e5
7 muutettua tiedostoa jossa 303 lisäystä ja 1 poistoa
  1. 1 0
      .gitignore
  2. 13 0
      Makefile
  3. 40 1
      README.md
  4. 4 0
      example.env
  5. 21 0
      go.mod
  6. 54 0
      go.sum
  7. 170 0
      main.go

+ 1 - 0
.gitignore

@@ -1,3 +1,4 @@
+.idea
 # ---> Go
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o

+ 13 - 0
Makefile

@@ -0,0 +1,13 @@
+all: build
+
+.PHONY: lint
+lint:
+	golint ./...
+	go vet ./...
+	staticcheck -checks=all ./...
+
+build:
+	go build -o mongodb_duplicates .
+
+install:
+	go install

+ 40 - 1
README.md

@@ -1,3 +1,42 @@
 # mongodb_duplicates
 
-A CLI command to find duplicate values in a MongoDB collection.
+## Install
+
+- Prerequisites:
+  - Go SDK 1.22+
+  - The URL and credentials to a working MongoDB server
+- Optional: `make lint` to verify code
+- `make`
+- `make install`
+
+## Use
+
+A CLI command to find duplicate values in a MongoDB collection.
+
+1. In your working directory, create a `.env` file based on the example provided
+   in https://code.osinet.fr/fgm/mongodb_duplicates/src/master/example.env
+2. Install the `envrun` command to load environment variables from that file:
+  - `go install github.com/fgm/envrun@latest`
+3. Adjuster your `.env`. For the first check, configure it for an empty collection
+  in an empty database:
+   - MONGODB_URL: default = `mongodb://localhost:27017`
+   - MONGODB_DB: default = `test`
+   - MONGODB_COLLECTION: default = `test`
+   - MONGODB_FIELD: the duplicate field. Default = `email`
+3. Run the command in read-only mode
+   - `$ envrun go run ./docs/osinet/cmd/duplicates`
+   - it should not display anything since the collection is empty
+4. Re-run the command with seed generation. The seed data will remain in the collection.
+   - `$ envrun go run ./docs/osinet/cmd/duplicates -command seed`
+   - `user1@example.com: 3`
+   - `user2@example.com: 2`
+   - Resultat show show 3 duplicates of user1 and 2 of user2.
+5. Now adjust configuration for the actual collection you want to check.
+6. Run the command in read-only mode
+  - `$ envrun go run ./docs/osinet/cmd/duplicates`
+  - It will give you the values of the field for which duplicates exist,
+    and the document count for that value
+
+## License
+
+Licensed under the Apache 2.0 license.

+ 4 - 0
example.env

@@ -0,0 +1,4 @@
+MONGODB_URI=mongodb://localhost:27017
+MONGODB_DB=test
+MONGODB_COLLECTION=test
+MONGODB_FIELD=email

+ 21 - 0
go.mod

@@ -0,0 +1,21 @@
+module code.osinet.fr/fgm/mongodb_duplicates
+
+go 1.22.4
+
+require (
+	go.mongodb.org/mongo-driver v1.16.0
+	gopkg.in/yaml.v3 v3.0.1
+)
+
+require (
+	github.com/golang/snappy v0.0.4 // indirect
+	github.com/klauspost/compress v1.13.6 // indirect
+	github.com/montanaflynn/stats v0.7.1 // indirect
+	github.com/xdg-go/pbkdf2 v1.0.0 // indirect
+	github.com/xdg-go/scram v1.1.2 // indirect
+	github.com/xdg-go/stringprep v1.0.4 // indirect
+	github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
+	golang.org/x/crypto v0.22.0 // indirect
+	golang.org/x/sync v0.7.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
+)

+ 54 - 0
go.sum

@@ -0,0 +1,54 @@
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM=
+github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
+github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
+github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
+github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
+github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
+github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
+github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY=
+github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4=
+github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8=
+github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
+github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
+go.mongodb.org/mongo-driver v1.16.0 h1:tpRsfBJMROVHKpdGyc1BBEzzjDUWjItxbVSZ8Ls4BQ4=
+go.mongodb.org/mongo-driver v1.16.0/go.mod h1:oB6AhJQvFQL4LEHyXi6aJzQJtBiTQHiAd83l0GdFaiw=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
+golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30=
+golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
+golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
+golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
+golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

+ 170 - 0
main.go

@@ -0,0 +1,170 @@
+/*
+Package main contains command "duplicates", which detects documents containing
+a duplicated field in a MongoDB collection.
+
+It takes its configuration from environment variables: refer to file `example.env`
+for a sample.
+
+(c) 2024 Ouest Systèmes Informatiques
+
+Licensed under the Apache 2.0 license.
+*/
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"os"
+	"slices"
+
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
+	"gopkg.in/yaml.v3"
+)
+
+const (
+	defaultMongoDBURI = "mongodb://localhost:27017"
+	defaultDatabase   = "test"
+	defaultCollection = "test"
+	defaultField      = "email"
+	defaultCommand    = "check"
+	seedCommand       = "seed"
+)
+
+type conf struct {
+	dbURI    string
+	client   *mongo.Client
+	dbName   string
+	collName string
+	command  string
+	field    string
+}
+
+func configure(ctx context.Context, name string, args []string) (*conf, error) {
+	var (
+		conf conf
+		err  error
+		ok   bool
+	)
+	if conf.dbURI, ok = os.LookupEnv("MONGODB_URI"); !ok {
+		conf.dbURI = defaultMongoDBURI
+	}
+	if conf.dbName, ok = os.LookupEnv("MONGODB_DB"); !ok {
+		conf.dbName = defaultDatabase
+	}
+	if conf.collName, ok = os.LookupEnv("MONGODB_COLLECTION"); !ok {
+		conf.collName = defaultCollection
+	}
+	if conf.field, ok = os.LookupEnv("MONGODB_FIELD"); !ok {
+		conf.field = defaultField
+	}
+	conf.client, err = mongo.Connect(ctx, options.Client().ApplyURI(conf.dbURI))
+	if err != nil {
+		return nil, fmt.Errorf("failed to connect to MongoDB: %v", err)
+	}
+	fs := flag.NewFlagSet(name, flag.ContinueOnError)
+	fs.StringVar(&conf.command, "command", defaultCommand, "sub-command to run")
+	if err := fs.Parse(args); err != nil {
+		return nil, fmt.Errorf("failed to parse arguments: %v", err)
+	}
+	if !slices.Contains([]string{defaultCommand, seedCommand}, conf.command) {
+		return nil, fmt.Errorf("unknown command %q", conf.command)
+	}
+	return &conf, nil
+}
+
+func user(n int) string {
+	return fmt.Sprintf("user%d@example.com", n)
+}
+
+func seed(ctx context.Context, coll *mongo.Collection, field string) error {
+	// 1. Ensure empty collection on startup.
+	if err := coll.Drop(ctx); err != nil {
+		return fmt.Errorf("seed/dropping collection: %w", err)
+	}
+
+	// 2. Insert non duplicate elements
+	for i := range 5 {
+		if _, err := coll.InsertOne(ctx, bson.D{{Key: field, Value: user(i)}}); err != nil {
+			return fmt.Errorf("seed/inserting initial doc %d: %w", i, err)
+		}
+	}
+	// 3. Insert duplicate elements: 3*1, 2*2
+	for _, i := range []int{1, 1, 2} {
+		if _, err := coll.InsertOne(ctx, bson.D{{Key: field, Value: user(i)}}); err != nil {
+			return fmt.Errorf("seed/inserting duplicate doc %d: %w", i, err)
+		}
+	}
+	return nil
+}
+
+func check(ctx context.Context, coll *mongo.Collection, field string) (map[string]int, error) {
+	docs, err := coll.Distinct(ctx, field, bson.D{}, nil)
+	dups := make(map[string]int)
+	if err != nil {
+		return nil, fmt.Errorf("check/distinct: %w", err)
+	}
+	for _, doc := range docs {
+		n, err := coll.CountDocuments(ctx, bson.D{{Key: field, Value: doc}}, nil)
+		if err != nil {
+			return nil, fmt.Errorf("check/counting: %w", err)
+		}
+		if n > 1 {
+			dups[doc.(string)] = int(n)
+		}
+	}
+	return dups, nil
+}
+
+// testableMain is extracted for testability
+func testableMain(ctx context.Context, w io.Writer, logger *log.Logger, name string, args []string) (exit int) {
+	config, err := configure(ctx, name, args)
+	if err != nil {
+		exit = 1
+		logger.Println(err)
+		return
+	}
+	defer func() {
+		if err := config.client.Disconnect(ctx); err != nil {
+			exit = 2
+			logger.Println(err)
+		}
+	}()
+	coll := config.client.Database(config.dbName).Collection(config.collName)
+	if config.command == seedCommand {
+		if err := seed(ctx, coll, config.field); err != nil {
+			exit = 3
+			logger.Println(err)
+			return
+		}
+	}
+
+	dups, err := check(ctx, coll, config.field)
+	if err != nil {
+		exit = 4
+		logger.Println(err)
+		return
+	}
+
+	if err := yaml.NewEncoder(w).Encode(dups); err != nil {
+		exit = 5
+		logger.Println(err)
+		return
+	}
+
+	// Allow a non-zero exit in the deferred disconnect.
+	exit = 0
+	return
+}
+
+func main() {
+	ctx := context.Background()
+	logger := log.Default()
+	name, args := os.Args[0], os.Args[1:]
+	out := os.Stdout
+	os.Exit(testableMain(ctx, out, logger, name, args))
+}