Skip to content

Commit

Permalink
Merge pull request #216 from Netflix/feature/github-languages
Browse files Browse the repository at this point in the history
Adds language metadata for github repos
  • Loading branch information
sbehrens authored Sep 20, 2017
2 parents 6be95fa + b0a8706 commit 74242da
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 33 deletions.
86 changes: 53 additions & 33 deletions lib/scumblr_tasks/sync_tasks/github_sync.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,35 +47,35 @@ def self.config_options
def self.options
{
:sync_type => {name: "Sync Type (Organization/User)",
description: "Should this task retrieve repos for an organization or for a user?",
required: false,
type: :choice,
default: :both,
choices: [:org, :user]},
description: "Should this task retrieve repos for an organization or for a user?",
required: false,
type: :choice,
default: :both,
choices: [:org, :user]},
:owner => {name: "Organization/User",
description: "Specify the organization or user.",
required: false,
type: :string},
description: "Specify the organization or user.",
required: false,
type: :string},
:owner_metadata => {name: "Organization/Users from Metadata",
description: "Provide a metadata key to pull organizations or users from.",
required: false,
type: :system_metadata},
description: "Provide a metadata key to pull organizations or users from.",
required: false,
type: :system_metadata},
:members => {name: "Import Organization Members' Repos",
description: "If syncing for an organization, should the task also import Repos owned by members of the organization.",
required: false,
type: :boolean},
description: "If syncing for an organization, should the task also import Repos owned by members of the organization.",
required: false,
type: :boolean},
:tags => {name: "Tag Results",
description: "Provide a tag for newly created results",
required: false,
default: "github",
type: :tag
},
:scope_visibility => {name: "Repo Visibility",
description: "Should the task sync public repos, private repos, or both.",
required: true,
type: :choice,
default: :both,
choices: [:both, :public, :private]},
description: "Should the task sync public repos, private repos, or both.",
required: true,
type: :choice,
default: :both,
choices: [:both, :public, :private]},

}
end
Expand Down Expand Up @@ -104,9 +104,10 @@ def initialize(options={})
end

def run

@completed=0
@last_total = 0

owners =[]
if(@options[:owner_metadata])
begin
Expand All @@ -120,7 +121,7 @@ def run

previous_results = @options.try(:[],:_self).try(:metadata).try(:[],"previous_results")
if(previous_results)
@last_total = previous_results["created"].to_a.count + previous_results["updated"].to_a.count
@last_total = previous_results["created"].to_a.count + previous_results["updated"].to_a.count
end

owners.each do |owner|
Expand All @@ -137,7 +138,7 @@ def run
end
end




return []
Expand All @@ -146,27 +147,42 @@ def run

private

def get_languages(name, repo)
begin
response = @github.repos.languages name, repo
rescue Github::Error::Forbidden=>e
handle_rate_limit(e)
retry
rescue

return nil
end
return response.body
end

def get_repos(name, type)

if(type == "org")
begin
response = @github.repos.list org: name
rescue Github::Error::Forbidden=>e
handle_rate_limit(e)

retry

end
else
begin

response = @github.repos.list user: name
rescue Github::Error::Forbidden=>e
handle_rate_limit(e)
retry
rescue => e

end
end
parse_results(response)



while(response.has_next_page?)
puts "Getting new page"
response = response.next_page
Expand All @@ -183,7 +199,6 @@ def handle_rate_limit(e)
sleep(wait_for + 1) if wait_for.to_i > 0
elsif(e.try(:http_headers).try(:[],"x-ratelimit-remaining").present? && e.try(:http_headers).try(:[],"x-ratelimit-remaining").to_i <= 1)


wait_for = e.http_headers["x-ratelimit-reset"].to_i - Time.now.to_i

puts "Sleeping for #{wait_for}"
Expand All @@ -196,18 +211,18 @@ def handle_rate_limit(e)

def parse_results(response)
puts "Rate limit: #{response.headers.ratelimit_remaining} of #{response.headers.ratelimit_limit} remaining. Reset in #{response.response.headers["x-ratelimit-reset"].to_i - DateTime.now.to_i} seconds (#{response.response.headers["x-ratelimit-reset"]})"



response.each do |repo|
if(@options[:scope_visibility] == "both" || (repo.private == true && @options[:scope_visibility] == "private") || (repo.private == false && @options[:scope_visibility] == "public"))


res = Result.where(url: repo.html_url.downcase).first_or_initialize

res.title = repo.full_name.to_s + " (Github)"
res.domain = "github.com"
res.metadata ||={}
#search_metadata[:github_analyzer] = true

res.metadata["repository_data"] ||= {}
res.metadata["repository_data"]["name"] = repo["name"]
res.metadata["repository_data"]["slug"] = repo["name"]
Expand All @@ -221,7 +236,16 @@ def parse_results(response)
res.metadata["repository_data"]["link"] = repo["html_url"]
res.metadata["repository_data"]["repository_host"] = @github_api_endpoint.gsub(/\Ahttps?:\/\//,"").gsub(/\/.+/,"")

# Add programming language metadata including primary language as well as language per LOC
if repo["language"].present?
res.metadata["repository_data"]["primary_language"] = repo["language"]
end

languages = get_languages(repo["owner"]["login"], repo["name"])

if languages.present?
res.metadata["repository_data"]["languages"] = languages.to_hash
end

if @options[:tags].present?
res.add_tags(@options[:tags])
Expand All @@ -238,10 +262,6 @@ def parse_results(response)
end
end
end




end


Expand Down
7 changes: 7 additions & 0 deletions test/models/task_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,15 @@ class TaskTest < ActiveSupport::TestCase
test "should execute github sync task" do
skip("Github OAuth Token not defined") if Rails.configuration.try(:github_oauth_token).blank?
github_sync.perform_task
res = Result.find(github_sync.metadata[:current_results]["updated"].first)

assert_equal(1, github_sync.metadata[:current_results].count)

# add assertion that langauges were analyzed
assert_equal("Ruby", res.metadata["repository_data"]["primary_language"])

# add assertion that langauges were analyzed
assert(res.metadata["repository_data"]["languages"].keys.include? "Ruby")
end
test "should execute google search task" do
skip("Google developer key not defined") if Rails.configuration.try(:google_developer_key).blank?
Expand Down

0 comments on commit 74242da

Please sign in to comment.