diff --git a/README.md b/README.md index 1ec1ad9..01e7ee9 100644 --- a/README.md +++ b/README.md @@ -1 +1,4 @@ -# ruby-scrapping \ No newline at end of file +# rubysite scrapping process + +Run `bundle install` +Run `ruby tour_site_scraper.rb` \ No newline at end of file diff --git a/selenium_scraper.rb b/selenium_scraper.rb new file mode 100644 index 0000000..a68f176 --- /dev/null +++ b/selenium_scraper.rb @@ -0,0 +1,39 @@ +require 'rubygems' +require 'selenium-webdriver' +require 'pry' + +# This options are for headless execution of the browser so that it don't need to load browser +# options = Selenium::WebDriver::Firefox::Options.new(args: ['-headless']) +# driver = Selenium::WebDriver.for(:firefox, options: options) + +driver = Selenium::WebDriver.for :firefox + +# driver.navigate.to "https://www.tour.ne.jp/j_air/list/?adult=1&air_type=2&arr_out=ITM&change_date_in=0&change_date_out=0&date_out=20211221&dpt_out=TYO" +driver.navigate.to "https://www.tour.ne.jp/j_air/list/?adult=1&air_type=2&arr_out=ITM&change_date_in=0&change_date_out=0&date_out=20211231&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0" +wait = Selenium::WebDriver::Wait.new(:timeout => 10000) + +#Take some time to load the page +sleep(10) + +ticket_summary_button = wait.until { + elements = driver.find_element(:css, "#Act_Airline_Out") +} + +sleep(10) + +ticket_summary_button.click + +sleep(5) + +ticket_summary = driver.find_elements(:class, "airline-name") +ticket_available_lists = driver.find_elements(:class, "toggle-btn-company") + +# binding.pry + +available_ticket = 0 + +ticket_available_lists.each do |ticket_count| + available_ticket += ticket_count.text.delete("^0-9").to_i +end +puts "available ticket companies = " + ticket_summary.first.text + ", " + ticket_summary.last.text +puts "Total available ticket found is = " + available_ticket.to_s \ No newline at end of file diff --git a/tour_site_scraper.rb b/tour_site_scraper.rb new file mode 100644 index 0000000..80048ca --- /dev/null +++ b/tour_site_scraper.rb @@ -0,0 +1,39 @@ +require 'rubygems' +require 'selenium-webdriver' +require 'pry' + +# This options are for headless execution of the browser so that it don't need to load browser +# options = Selenium::WebDriver::Firefox::Options.new(args: ['-headless']) +# driver = Selenium::WebDriver.for(:firefox, options: options) + +driver = Selenium::WebDriver.for :firefox +#Generate the search url physically using any date, time and put here, we will make it dynamic later +driver.navigate.to "https://www.tour.ne.jp/j_air/list/?adult=1&air_type=2&arr_out=ITM&change_date_in=0&change_date_out=0&date_out=20211231&dpt_out=TYO&time_from_out=0600&time_to_out=0700&time_type_out=0" +wait = Selenium::WebDriver::Wait.new(:timeout => 10000) + +#Take some time to load the page +sleep(5) + +ticket_summary_button = wait.until { + elements = driver.find_element(:css, "#Act_Airline_Out") +} + +ticket_summary_button.click + +#Take some time after click on second tab to load ajax html content +sleep(3) + +#Find available information and search companies name, this is optional +ticket_summary = driver.find_elements(:class, "airline-name") + +#Find available ticke count element +ticket_available_lists = driver.find_elements(:class, "toggle-btn-company") + +#Find each companies available ticket and sum to get total available tickets +total_available_ticket = 0 +ticket_available_lists.each do |ticket_count| + total_available_ticket += ticket_count.text.delete("^0-9").to_i +end + +puts "Available ticket companies name = " + ticket_summary.first.text + ", " + ticket_summary.last.text +puts "Total available ticket found is = " + total_available_ticket.to_s \ No newline at end of file