import logging import pymysql from datetime import datetime from urllib.parse import unquote, parse_qs from django.utils import timezone import html from hotels.models import Room, Hotel from .models import UserActivityLog, ExternalDBSettings from touchh.utils.log import CustomLogger from concurrent.futures import ThreadPoolExecutor, TimeoutError from decouple import config class DatabaseConnector: def __init__(self, db_settings_id): self.db_settings_id = db_settings_id self.logger = CustomLogger(name="DatabaseConnector", log_level="DEBUG").get_logger() self.connection = None self.db_settings = self.get_db_settings() def get_db_settings(self): try: settings = ExternalDBSettings.objects.get(id=self.db_settings_id) self.logger.info(f"Retrieved DB settings: {settings}") return { "host": settings.host, "port": settings.port, "user": settings.user, "password": settings.password, "database": settings.database, "table_name": settings.table_name } except ExternalDBSettings.DoesNotExist: self.logger.error(f"Settings with ID {self.db_settings_id} not found.") raise ValueError("Invalid db_settings_id") except Exception as e: self.logger.error(f"Error retrieving settings: {e}") raise def connect(self): try: self.logger.info(f"Connecting to DB with settings: {self.db_settings}") self.connection = pymysql.connect( host=self.db_settings["host"], port=self.db_settings["port"], user=self.db_settings["user"], password=self.db_settings["password"], database=self.db_settings["database"], charset="utf8mb4", cursorclass=pymysql.cursors.DictCursor, ) self.logger.info("Database connection established successfully.") except pymysql.err.OperationalError as e: self.logger.error(f"Operational error during DB connection: {e}") raise ConnectionError(f"Operational error: {e}") except Exception as e: self.logger.error(f"Unexpected database connection error: {e}") raise ConnectionError(e) def execute_query(self, query): try: with self.connection.cursor() as cursor: cursor.execute(query) return cursor.fetchall() except Exception as e: self.logger.error(f"Query execution error: {e}") return [] def close(self): if self.connection: self.connection.close() self.logger.info("Database connection closed.") class DataProcessor: def __init__(self, logger): self.logger = logger def decode_html_entities(self, text): if text and isinstance(text, str): return html.unescape(unquote(text)) return text def parse_datetime(self, dt_str): try: if isinstance(dt_str, datetime): return timezone.make_aware(dt_str) if timezone.is_naive(dt_str) else dt_str if dt_str: return timezone.make_aware(datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")) except Exception as e: self.logger.error(f"Datetime parsing error: {e}") return None def url_parameters_parser(self, url_parameters): try: if not url_parameters: return {} decoded_params = unquote(url_parameters) parsed_params = parse_qs(decoded_params) return {key: value[0] for key, value in parsed_params.items()} except Exception as e: self.logger.error(f"URL parameters parsing error: {e}") return {} class HotelRoomManager: def __init__(self, logger): self.logger = logger def get_or_create_hotel(self, hotel_id, page_title): if not hotel_id: self.logger.warning("Hotel creation skipped: missing hotel_id.") return None hotel, created = Hotel.objects.get_or_create( hotel_id=hotel_id, defaults={ "name": html.unescape(page_title) or f"Отель {hotel_id}", "description": "Автоматически созданный отель", } ) if created: self.logger.info(f"Hotel '{hotel.name}' created with hotel_id: {hotel_id}") else: self.logger.info(f"Hotel '{hotel.name}' already exists with hotel_id: {hotel_id}") return hotel def get_or_create_room(self, hotel, room_number): if not hotel: self.logger.warning("Room creation skipped: missing hotel.") return None if not room_number: self.logger.warning(f"Room creation skipped: missing room_number for hotel {hotel.name}.") return None external_id = f"{hotel.hotel_id}_{room_number}".lower() room, created = Room.objects.get_or_create( hotel=hotel, number=room_number, defaults={ "external_id": external_id, "description": "Автоматически созданная комната", } ) if created: self.logger.info(f"Room '{room.number}' (external_id: {external_id}) created in hotel '{hotel.name}'") else: self.logger.info(f"Room '{room.number}' already exists in hotel '{hotel.name}'") return room class DataSyncManager: def __init__(self, db_settings_id): self.logger = CustomLogger(name="DataSyncManager", log_level="DEBUG").get_logger() self.db_connector = DatabaseConnector(db_settings_id) self.db_settings = self.db_connector.db_settings # Сохраняем настройки базы данных self.data_processor = DataProcessor(self.logger) self.hotel_manager = HotelRoomManager(self.logger) def get_last_saved_record(self): record = UserActivityLog.objects.order_by("-id").first() return record.id if record else 0 def fetch_new_data(self, last_id): query = f""" SELECT * FROM `{self.db_settings.get('table_name')}` WHERE id > {last_id} AND url_parameters IS NOT NULL AND url_parameters LIKE '%utm_medium%' AND page_url IS NOT NULL ORDER BY id ASC LIMIT 1000; """ self.logger.info(f"Fetching new data with query: {query}") return self.db_connector.execute_query(query) def process_and_save_data(self, rows): for row in rows: try: url_params = self.data_processor.decode_html_entities(row.get("url_parameters", "")) params = self.data_processor.url_parameters_parser(url_params) hotel_id = params.get("utm_content") room_number = params.get("utm_term") page_title = row.get("page_title") external_id = row.get("id") hits = row.get("hits") or 0 hotel = self.hotel_manager.get_or_create_hotel(hotel_id, page_title) room = self.hotel_manager.get_or_create_room(hotel, room_number) page_url = row.get("page_url") if hits != 0 and page_title is not None: UserActivityLog.objects.update_or_create( external_id=external_id, defaults={ "user_id": row.get("user_id") or 0, "timestamp": row.get("timestamp"), "date_time": row.get("date_time"), "ip": row.get("ip") or "0.0.0.0", "created": self.data_processor.parse_datetime(row.get("created")) or timezone.now(), "url_parameters": url_params, "page_id": room.id if room else None, "page_title": html.unescape(page_title), "hits": hits, "page_url": html.unescape(page_url), } ) else: self.logger.warning("Invalid data for UserActivityLog.") self.logger.info(f"Record ID {external_id} processed successfully.") except Exception as e: self.logger.error(f"Error processing record ID {row.get('id')}: {e}") def sync(self): self.db_connector.connect() try: last_id = self.get_last_saved_record() rows = self.fetch_new_data(last_id) self.process_and_save_data(rows) self.logger.info("Sync completed.") finally: self.db_connector.close() def scheduled_sync(): logger = CustomLogger(name="DatabaseSyncScheduler", log_level="DEBUG").get_logger() logger.info("Starting scheduled sync.") active_db_settings = ExternalDBSettings.objects.filter(is_active=True) if not active_db_settings.exists(): logger.warning("No active database connections found.") return logger.info(f"Found {len(active_db_settings)} active database connections.") def sync_task(db_settings): try: logger.info(f"Syncing connection: {db_settings.name} (ID={db_settings.id})") sync_manager = DataSyncManager(db_settings.id) sync_manager.sync() logger.info(f"Sync completed for connection: {db_settings}") except Exception as e: logger.error(f"Error syncing connection {db_settings}: {e}") with ThreadPoolExecutor(max_workers=5) as executor: futures = [executor.submit(sync_task, db_settings) for db_settings in active_db_settings] for future in futures: try: future.result(timeout=300) except TimeoutError: logger.error("Sync task timed out.") logger.info("Scheduled sync completed.")