0% found this document useful (0 votes)
26 views4 pages

Emirates Line Web Scraper Function

The document outlines a web scraping function for the Emirates Line schedule using Selenium, which includes error handling and logging. It provides routes for starting the scraping process, checking its status, and retrieving results. The function manages global state variables to track the scraping status, results, and any errors encountered during execution.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

Topics covered

  • Scraping Completion,
  • User Interface Interaction,
  • User Input Simulation,
  • Scraping Locking Mechanism,
  • Input Suggestions,
  • API Endpoints,
  • Result Processing,
  • JavaScript Execution,
  • XPath Selection,
  • Scraping Techniques
0% found this document useful (0 votes)
26 views4 pages

Emirates Line Web Scraper Function

The document outlines a web scraping function for the Emirates Line schedule using Selenium, which includes error handling and logging. It provides routes for starting the scraping process, checking its status, and retrieving results. The function manages global state variables to track the scraping status, results, and any errors encountered during execution.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

Topics covered

  • Scraping Completion,
  • User Interface Interaction,
  • User Input Simulation,
  • Scraping Locking Mechanism,
  • Input Suggestions,
  • API Endpoints,
  • Result Processing,
  • JavaScript Execution,
  • XPath Selection,
  • Scraping Techniques

# --- Selenium Imports --- # << ADDED

from selenium import webdriver


from [Link] import By
from [Link] import WebDriverWait
from [Link] import expected_conditions as EC
from [Link] import TimeoutException, NoSuchElementException,
WebDriverException
from webdriver_manager.chrome import ChromeDriverManager # Optional: Auto-manages
chromedriver

# --- Web Scraper Function --- # << ADDED


def run_emirates_scrape():
"""
Performs the web scraping task for Emirates Line schedule.
Updates global state variables upon completion or error.
"""
global scraper_status, scraper_result, scraper_error, scraper_lock

[Link]("Starting Emirates Line scraping task...")


driver = None
try:
options = [Link]()
# options.add_argument("--disable-gpu")
# options.add_argument("--headless") # Enable for production/server
environments
# options.add_argument("--no-sandbox") # Often needed in
containerized/headless environments
# options.add_argument("--disable-dev-shm-usage") # Overcomes limited
resource problems

# Use WebDriverManager or specify path directly


try:
# Attempt to use WebDriverManager first
service =
[Link](ChromeDriverManager().install())
driver = [Link](service=service, options=options)
[Link]("ChromeDriver started using WebDriverManager.")
except Exception as wdm_error:
[Link](f"WebDriverManager failed ({wdm_error}). Falling back
to default ChromeDriver path.")
# Fallback if WebDriverManager fails or isn't used
driver = [Link](options=options)
[Link]("ChromeDriver started using default path.")

[Link]("[Link]
wait = WebDriverWait(driver, 20) # Increased wait time

[Link]("Page loaded. Waiting for elements...")


# driver.save_screenshot("debug_page_loaded.png") # Optional debug
screenshot

# --- Origin Port ---


origin_port = [Link](EC.visibility_of_element_located(([Link],
"originPort")))
[Link]("Origin port input found.")
text = "Je"
for ch in text:
origin_port.send_keys(ch)
[Link](1) # Shorter delay might work, adjust if needed

[Link]("Typed 'Je'. Waiting for origin suggestions...")


# Wait for the dropdown suggestion and click
origin_suggestion = [Link](EC.element_to_be_clickable(
([Link], "//li[contains(@class,
'ui-menu-item')]/div[contains(text(),'JEBEL ALI')]"))) # More specific XPath
[Link](f"Found origin suggestion: {origin_suggestion.text}")
origin_suggestion.click()
[Link]("Clicked origin suggestion.")
[Link](0.5) # Small pause after click

# --- Destination Port ---


destination_port = [Link](EC.visibility_of_element_located(([Link],
"destinationPort")))
[Link]("Destination port input found.")
text1 = "Mu"
for ch in text1:
destination_port.send_keys(ch)
[Link](1) # Shorter delay

[Link]("Typed 'Mu'. Waiting for destination suggestions...")


# Wait for the dropdown suggestion and click
dest_suggestion = [Link](EC.element_to_be_clickable(
([Link], "//li[contains(@class,
'ui-menu-item')]/div[contains(text(),'MUNDRA, INDIA')]"))) # More specific XPath
[Link](f"Found destination suggestion: {dest_suggestion.text}")
dest_suggestion.click()
[Link]("Clicked destination suggestion.")
[Link](0.5) # Small pause

# --- Click Search ---


search_button = [Link](EC.element_to_be_clickable(
([Link], "//button[contains(@class, 'primary-btn') and
contains(text(), 'Search')]")))
[Link]("Search button found.")
search_button.click()
[Link]("Clicked search button.")

# --- Wait for and Extract Results ---


[Link]("Waiting for schedule results table...")
schedule_div = [Link](EC.presence_of_element_located(
(By.CLASS_NAME, "schedule-viewer-table-main")))
[Link]("Results table located.")

# Get only the visible text using JavaScript for cleaner output
visible_text = driver.execute_script(
"return arguments[0].innerText || arguments[0].textContent;",
schedule_div
)
[Link]("Extracted visible text from results table.")
# driver.save_screenshot("debug_results_found.png") # Optional debug

# --- Update Global State (Success) ---


with scraper_lock:
scraper_result = visible_text.strip() if visible_text else "No schedule
data found."
scraper_status = "completed"
scraper_error = None
[Link]("Scraping completed successfully.")

except TimeoutException as te:


[Link](f"Scraping timed out waiting for element: {te}",
exc_info=True)
# driver.save_screenshot("debug_timeout_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Timeout waiting for element: {str(te).splitlines()
[0]}"
scraper_status = "error"
scraper_result = None
except NoSuchElementException as nse:
[Link](f"Scraping failed: Element not found: {nse}", exc_info=True)
# driver.save_screenshot("debug_notfound_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Element not found: {str(nse).splitlines()[0]}"
scraper_status = "error"
scraper_result = None
except WebDriverException as wde:
[Link](f"WebDriver error during scraping: {wde}", exc_info=True)
# driver.save_screenshot("debug_webdriver_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Browser/Driver error: {str(wde).splitlines()[0]}"
scraper_status = "error"
scraper_result = None
except Exception as e:
[Link](f"Unexpected error during scraping: {e}", exc_info=True)
# driver.save_screenshot("debug_unexpected_error.png") # Optional debug
with scraper_lock:
scraper_error = f"An unexpected error occurred: {str(e)}"
scraper_status = "error"
scraper_result = None
finally:
if driver:
try:
[Link]()
[Link]("WebDriver closed.")
except Exception as quit_e:
[Link](f"Error closing WebDriver: {quit_e}")
# Ensure status reflects completion or error even if finally block runs
before update
with scraper_lock:
if scraper_status == "running": # If it failed before setting status
if scraper_error is None: # Check if error was already set
scraper_error = "Scraping process ended unexpectedly."
scraper_status = "error"
scraper_result = None
[Link]("Scraping status set to 'error' in finally block.")

# --- Scraper Routes --- # << ADDED

@[Link]('/scrape/start', methods=['POST'])
def start_scrape():
"""Starts the Emirates Line scraping process in a background thread."""
global scraper_status, scraper_result, scraper_error, scraper_thread,
scraper_lock

with scraper_lock:
if scraper_status == "running":
[Link]("Scrape start requested, but already running.")
return jsonify({'success': False, 'message': 'Scraping process is
already running.'}), 409 # Conflict

# Reset state and start


scraper_status = "running"
scraper_result = None
scraper_error = None
[Link]("Starting new scraper thread.")
# Important: Pass the function to run, not the result of calling it
scraper_thread = [Link](target=run_emirates_scrape, daemon=True)
scraper_thread.start()

return jsonify({'success': True, 'message': 'Scraping process started.'})

@[Link]('/scrape/status', methods=['GET'])
def get_scrape_status():
"""Returns the current status of the scraping process."""
global scraper_status, scraper_error, scraper_lock
with scraper_lock:
response = {
'status': scraper_status,
'error': scraper_error
}
# [Link](f"Sending scrape status: {response}") # Can be verbose
return jsonify(response)

@[Link]('/scrape/results', methods=['GET'])
def get_scrape_results():
"""Returns the results of the last completed scrape."""
global scraper_status, scraper_result, scraper_error, scraper_lock
with scraper_lock:
if scraper_status == "completed":
[Link]("Sending completed scrape results.")
return jsonify({'status': 'completed', 'results': scraper_result})
elif scraper_status == "error":
[Link]("Sending scrape error details.")
return jsonify({'status': 'error', 'error': scraper_error})
elif scraper_status == "running":
[Link]("Scrape results requested, but still running.")
return jsonify({'status': 'running', 'message': 'Scraping is still in
progress.'})
else: # idle
[Link]("Scrape results requested, but no scrape has been run
yet.")
return jsonify({'status': 'idle', 'message': 'Scraping has not been
started yet.'})

You might also like