lunasec-io / lunasec

LunaSec - Dependency Security Scanner that automatically notifies you about vulnerabilities like Log4Shell or node-ipc in your Pull Requests and Builds. Protect yourself in 30 seconds with the LunaTrace GitHub App: https://github.com/marketplace/lunatrace-by-lunasec/
https://www.lunasec.io/
Other
1.44k stars 164 forks source link

Vulnbot initial commit #1150

Closed breadchris closed 1 year ago

breadchris commented 1 year ago

Commit the MVP of the vulnbot.

![Uploading 2023-03-08_06-56.png…]()

github-actions[bot] commented 1 year ago

Hasura Semantic Diff

Hasura config files have changed. This comment shows which fields have changed ignoring formatting.

Click to expand! ``` (root level) + two list entries added: - "!include vulnerability_reference_content.yaml" - "!include vulnerability_reference_embedding.yaml" (root level) + three map entries added: table: name: reference_content schema: vulnerability object_relationships: - name: reference using: foreign_key_constraint_on: reference_id array_relationships: - name: reference_embeddings using: foreign_key_constraint_on: column: reference_content_id table: name: reference_embedding schema: vulnerability (root level) + two map entries added: table: name: reference_embedding schema: vulnerability object_relationships: - name: reference_content using: foreign_key_constraint_on: reference_content_id lunatrace-custom.permissions - three list entries removed: - role: user definition: schema: | type AuthenticatedRepoCloneUrlOutput { url: String } scalar JSON type Mutation { presignManifestUpload(project_id: UUID!): PresignedUrlResponse } type PresignedUrlResponse { bucket: String! headers: JSON! key: String! url: String! } type Query { authenticatedRepoCloneUrl(repoGithubId: Int!): AuthenticatedRepoCloneUrlOutput fakeQueryToHackHasuraBeingABuggyMess: String sbomUrl(buildId: UUID!): String } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } scalar UUID type UploadUrl { headers: JSON! url: String! } - role: service definition: schema: | type AuthenticatedRepoCloneUrlOutput { url: String } scalar JSON type Mutation { presignManifestUpload(project_id: UUID!): PresignedUrlResponse } type PresignedUrlResponse { bucket: String! headers: JSON! key: String! url: String! } type Query { authenticatedRepoCloneUrl(repoGithubId: Int!): AuthenticatedRepoCloneUrlOutput fakeQueryToHackHasuraBeingABuggyMess: String presignSbomUpload(orgId: UUID!, buildId: UUID!): SbomUploadUrlOutput sbomUrl(buildId: UUID!): String } input SbomUploadUrlInput { orgId: UUID! projectId: UUID! } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } scalar UUID type UploadUrl { headers: JSON! url: String! } - role: cli definition: schema: | scalar JSON type Query { presignSbomUpload(orgId: UUID!, buildId: UUID!): SbomUploadUrlOutput } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } scalar UUID type UploadUrl { headers: JSON! url: String! } + three list entries added: - role: user definition: schema: | scalar JSON scalar UUID type AuthenticatedRepoCloneUrlOutput { url: String } type Mutation { presignManifestUpload(project_id: UUID!): PresignedUrlResponse } type PresignedUrlResponse { bucket: String! headers: JSON! key: String! url: String! } type Query { authenticatedRepoCloneUrl(repoGithubId: Int!): AuthenticatedRepoCloneUrlOutput fakeQueryToHackHasuraBeingABuggyMess: String sbomUrl(buildId: UUID!): String } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } type UploadUrl { headers: JSON! url: String! } - role: service definition: schema: | scalar JSON scalar UUID type AuthenticatedRepoCloneUrlOutput { url: String } type Mutation { presignManifestUpload(project_id: UUID!): PresignedUrlResponse } type PresignedUrlResponse { bucket: String! headers: JSON! key: String! url: String! } type Query { authenticatedRepoCloneUrl(repoGithubId: Int!): AuthenticatedRepoCloneUrlOutput fakeQueryToHackHasuraBeingABuggyMess: String presignSbomUpload(orgId: UUID!, buildId: UUID!): SbomUploadUrlOutput sbomUrl(buildId: UUID!): String } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } type UploadUrl { headers: JSON! url: String! } input SbomUploadUrlInput { orgId: UUID! projectId: UUID! } - role: cli definition: schema: | scalar JSON scalar UUID type Query { presignSbomUpload(orgId: UUID!, buildId: UUID!): SbomUploadUrlOutput } type SbomUploadUrlOutput { error: Boolean! uploadUrl: UploadUrl } type UploadUrl { headers: JSON! url: String! } diff --git a/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/down.sql b/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/down.sql new file mode 100644 index 00000000..6c064981 --- /dev/null +++ b/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/down.sql @@ -0,0 +1,4 @@ +DROP FUNCTION vulnerability.match_reference_embedding_for_vulnerability; +DROP TABLE "vulnerability"."reference_embedding"; +DROP TABLE "vulnerability"."reference_content"; +DROP EXTENSION vector; diff --git a/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/up.sql b/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/up.sql new file mode 100644 index 00000000..cc2ac9c5 --- /dev/null +++ b/lunatrace/bsl/hasura/migrations/lunatrace/1677516215552_vulnerable_reference_embeddings/up.sql @@ -0,0 +1,59 @@ +CREATE EXTENSION vector; + +CREATE TABLE "vulnerability"."reference_content" ( + "id" uuid NOT NULL DEFAULT gen_random_uuid(), + "reference_id" uuid NOT NULL REFERENCES "vulnerability"."reference"("id") ON UPDATE cascade ON DELETE cascade, + "title" text NOT NULL, + "content" text NOT NULL, + "normalized_content" text NOT NULL, + "content_type" text NOT NULL, + "last_successful_fetch" timestamptz DEFAULT NULL, + PRIMARY KEY ("id"), + UNIQUE ("reference_id") +); + +CREATE TABLE "vulnerability"."reference_embedding" ( + "id" uuid NOT NULL DEFAULT gen_random_uuid(), + "content_hash" text NOT NULL, + "reference_content_id" uuid NOT NULL REFERENCES "vulnerability"."reference_content"("id") ON UPDATE cascade ON DELETE cascade, + "content" text NOT NULL, + "embedding" vector (1536) NOT NULL, + PRIMARY KEY ("id"), + UNIQUE ("content_hash") +); + +CREATE INDEX ON "vulnerability"."reference_embedding" +USING ivfflat (embedding vector_cosine_ops) +WITH (lists = 100); + +create or replace function vulnerability.match_reference_embedding_for_vulnerability ( + query_embedding vector(1536), + vuln_id text, + similarity_threshold float, + match_count int +) + returns table ( + id uuid, + url text, + content text, + similarity float + ) + language plpgsql +as $$ +begin + return query + select + r.id, + r.url, + re.content, + 1 - (re.embedding <=> query_embedding) as similarity + from vulnerability.reference_embedding re + join vulnerability.reference_content rc on rc.id = re.reference_content_id + join vulnerability.reference r on r.id = rc.reference_id + join vulnerability.vulnerability v on v.id = r.vulnerability_id + where 1 - (re.embedding <=> query_embedding) > similarity_threshold + and v.source_id = vuln_id + order by re.embedding <=> query_embedding + limit match_count; +end; +$$; ```
factoidforrest commented 1 year ago

This is cool, i think a lot of this will change when it gets combined into my python work but thats ok!