# --- Selenium Imports --- # << ADDED
from selenium import webdriver
from [Link] import By
from [Link] import WebDriverWait
from [Link] import expected_conditions as EC
from [Link] import TimeoutException, NoSuchElementException,
WebDriverException
from webdriver_manager.chrome import ChromeDriverManager # Optional: Auto-manages
chromedriver
# --- Web Scraper Function --- # << ADDED
def run_emirates_scrape():
"""
Performs the web scraping task for Emirates Line schedule.
Updates global state variables upon completion or error.
"""
global scraper_status, scraper_result, scraper_error, scraper_lock
[Link]("Starting Emirates Line scraping task...")
driver = None
try:
options = [Link]()
# options.add_argument("--disable-gpu")
# options.add_argument("--headless") # Enable for production/server
environments
# options.add_argument("--no-sandbox") # Often needed in
containerized/headless environments
# options.add_argument("--disable-dev-shm-usage") # Overcomes limited
resource problems
# Use WebDriverManager or specify path directly
try:
# Attempt to use WebDriverManager first
service =
[Link](ChromeDriverManager().install())
driver = [Link](service=service, options=options)
[Link]("ChromeDriver started using WebDriverManager.")
except Exception as wdm_error:
[Link](f"WebDriverManager failed ({wdm_error}). Falling back
to default ChromeDriver path.")
# Fallback if WebDriverManager fails or isn't used
driver = [Link](options=options)
[Link]("ChromeDriver started using default path.")
[Link]("[Link]
wait = WebDriverWait(driver, 20) # Increased wait time
[Link]("Page loaded. Waiting for elements...")
# driver.save_screenshot("debug_page_loaded.png") # Optional debug
screenshot
# --- Origin Port ---
origin_port = [Link](EC.visibility_of_element_located(([Link],
"originPort")))
[Link]("Origin port input found.")
text = "Je"
for ch in text:
origin_port.send_keys(ch)
[Link](1) # Shorter delay might work, adjust if needed
[Link]("Typed 'Je'. Waiting for origin suggestions...")
# Wait for the dropdown suggestion and click
origin_suggestion = [Link](EC.element_to_be_clickable(
([Link], "//li[contains(@class,
'ui-menu-item')]/div[contains(text(),'JEBEL ALI')]"))) # More specific XPath
[Link](f"Found origin suggestion: {origin_suggestion.text}")
origin_suggestion.click()
[Link]("Clicked origin suggestion.")
[Link](0.5) # Small pause after click
# --- Destination Port ---
destination_port = [Link](EC.visibility_of_element_located(([Link],
"destinationPort")))
[Link]("Destination port input found.")
text1 = "Mu"
for ch in text1:
destination_port.send_keys(ch)
[Link](1) # Shorter delay
[Link]("Typed 'Mu'. Waiting for destination suggestions...")
# Wait for the dropdown suggestion and click
dest_suggestion = [Link](EC.element_to_be_clickable(
([Link], "//li[contains(@class,
'ui-menu-item')]/div[contains(text(),'MUNDRA, INDIA')]"))) # More specific XPath
[Link](f"Found destination suggestion: {dest_suggestion.text}")
dest_suggestion.click()
[Link]("Clicked destination suggestion.")
[Link](0.5) # Small pause
# --- Click Search ---
search_button = [Link](EC.element_to_be_clickable(
([Link], "//button[contains(@class, 'primary-btn') and
contains(text(), 'Search')]")))
[Link]("Search button found.")
search_button.click()
[Link]("Clicked search button.")
# --- Wait for and Extract Results ---
[Link]("Waiting for schedule results table...")
schedule_div = [Link](EC.presence_of_element_located(
(By.CLASS_NAME, "schedule-viewer-table-main")))
[Link]("Results table located.")
# Get only the visible text using JavaScript for cleaner output
visible_text = driver.execute_script(
"return arguments[0].innerText || arguments[0].textContent;",
schedule_div
)
[Link]("Extracted visible text from results table.")
# driver.save_screenshot("debug_results_found.png") # Optional debug
# --- Update Global State (Success) ---
with scraper_lock:
scraper_result = visible_text.strip() if visible_text else "No schedule
data found."
scraper_status = "completed"
scraper_error = None
[Link]("Scraping completed successfully.")
except TimeoutException as te:
[Link](f"Scraping timed out waiting for element: {te}",
exc_info=True)
# driver.save_screenshot("debug_timeout_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Timeout waiting for element: {str(te).splitlines()
[0]}"
scraper_status = "error"
scraper_result = None
except NoSuchElementException as nse:
[Link](f"Scraping failed: Element not found: {nse}", exc_info=True)
# driver.save_screenshot("debug_notfound_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Element not found: {str(nse).splitlines()[0]}"
scraper_status = "error"
scraper_result = None
except WebDriverException as wde:
[Link](f"WebDriver error during scraping: {wde}", exc_info=True)
# driver.save_screenshot("debug_webdriver_error.png") # Optional debug
with scraper_lock:
scraper_error = f"Browser/Driver error: {str(wde).splitlines()[0]}"
scraper_status = "error"
scraper_result = None
except Exception as e:
[Link](f"Unexpected error during scraping: {e}", exc_info=True)
# driver.save_screenshot("debug_unexpected_error.png") # Optional debug
with scraper_lock:
scraper_error = f"An unexpected error occurred: {str(e)}"
scraper_status = "error"
scraper_result = None
finally:
if driver:
try:
[Link]()
[Link]("WebDriver closed.")
except Exception as quit_e:
[Link](f"Error closing WebDriver: {quit_e}")
# Ensure status reflects completion or error even if finally block runs
before update
with scraper_lock:
if scraper_status == "running": # If it failed before setting status
if scraper_error is None: # Check if error was already set
scraper_error = "Scraping process ended unexpectedly."
scraper_status = "error"
scraper_result = None
[Link]("Scraping status set to 'error' in finally block.")
# --- Scraper Routes --- # << ADDED
@[Link]('/scrape/start', methods=['POST'])
def start_scrape():
"""Starts the Emirates Line scraping process in a background thread."""
global scraper_status, scraper_result, scraper_error, scraper_thread,
scraper_lock
with scraper_lock:
if scraper_status == "running":
[Link]("Scrape start requested, but already running.")
return jsonify({'success': False, 'message': 'Scraping process is
already running.'}), 409 # Conflict
# Reset state and start
scraper_status = "running"
scraper_result = None
scraper_error = None
[Link]("Starting new scraper thread.")
# Important: Pass the function to run, not the result of calling it
scraper_thread = [Link](target=run_emirates_scrape, daemon=True)
scraper_thread.start()
return jsonify({'success': True, 'message': 'Scraping process started.'})
@[Link]('/scrape/status', methods=['GET'])
def get_scrape_status():
"""Returns the current status of the scraping process."""
global scraper_status, scraper_error, scraper_lock
with scraper_lock:
response = {
'status': scraper_status,
'error': scraper_error
}
# [Link](f"Sending scrape status: {response}") # Can be verbose
return jsonify(response)
@[Link]('/scrape/results', methods=['GET'])
def get_scrape_results():
"""Returns the results of the last completed scrape."""
global scraper_status, scraper_result, scraper_error, scraper_lock
with scraper_lock:
if scraper_status == "completed":
[Link]("Sending completed scrape results.")
return jsonify({'status': 'completed', 'results': scraper_result})
elif scraper_status == "error":
[Link]("Sending scrape error details.")
return jsonify({'status': 'error', 'error': scraper_error})
elif scraper_status == "running":
[Link]("Scrape results requested, but still running.")
return jsonify({'status': 'running', 'message': 'Scraping is still in
progress.'})
else: # idle
[Link]("Scrape results requested, but no scrape has been run
yet.")
return jsonify({'status': 'idle', 'message': 'Scraping has not been
started yet.'})