modify scripts

This commit is contained in:
oscarz
2025-07-03 16:24:09 +08:00
parent c34cfb458c
commit 9561f57aa5
2 changed files with 19 additions and 13 deletions

View File

@ -60,9 +60,9 @@ class SQLiteDBHandler:
return processed_data
def insert_or_update_common(self, data, tbl_name, uniq_key='url'):
def insert_or_update_common(self, data, tbl_name, uniq_key='url', exists_do_nothing=False):
if self.lower_sqlite_version:
return self.insert_or_update_common_lower(data, tbl_name, uniq_key)
return self.insert_or_update_common_lower(data, tbl_name, uniq_key, exists_do_nothing)
try:
processed_data = self.check_and_process_data(data, tbl_name)
@ -72,12 +72,17 @@ class SQLiteDBHandler:
columns = ', '.join(processed_data.keys())
values = list(processed_data.values())
placeholders = ', '.join(['?' for _ in values])
update_clause = ', '.join([f"{col}=EXCLUDED.{col}" for col in processed_data.keys() if col != uniq_key])
if exists_do_nothing:
conflict_clause = f'ON CONFLICT ({uniq_key}) DO NOTHING'
else:
update_clause = ', '.join([f"{col}=EXCLUDED.{col}" for col in processed_data.keys() if col != uniq_key])
conflict_clause = f"ON CONFLICT ({uniq_key}) DO UPDATE SET {update_clause}"
sql = f'''
INSERT INTO {tbl_name} ({columns})
VALUES ({placeholders})
ON CONFLICT ({uniq_key}) DO UPDATE SET {update_clause}
VALUES ({placeholders})
{conflict_clause}
'''
self.cursor.execute(sql, values)
self.conn.commit()
@ -90,7 +95,7 @@ class SQLiteDBHandler:
logging.error(f"Error inserting or updating data: {e}")
return None
def insert_or_update_common_lower(self, data, tbl_name, uniq_key='url'):
def insert_or_update_common_lower(self, data, tbl_name, uniq_key='url', exists_do_nothing=False):
try:
processed_data = self.check_and_process_data(data, tbl_name)
if processed_data is None:
@ -109,12 +114,13 @@ class SQLiteDBHandler:
self.cursor.execute(sql, values)
self.conn.commit()
except sqlite3.IntegrityError: # 唯一键冲突,执行更新操作
update_clause = ', '.join([f"{col}=?" for col in processed_data.keys() if col != uniq_key])
update_values = [processed_data[col] for col in processed_data.keys() if col != uniq_key]
update_values.append(data[uniq_key])
sql = f"UPDATE {tbl_name} SET {update_clause} WHERE {uniq_key} = ?"
self.cursor.execute(sql, update_values)
self.conn.commit()
if not exists_do_nothing:
update_clause = ', '.join([f"{col}=?" for col in processed_data.keys() if col != uniq_key])
update_values = [processed_data[col] for col in processed_data.keys() if col != uniq_key]
update_values.append(data[uniq_key])
sql = f"UPDATE {tbl_name} SET {update_clause} WHERE {uniq_key} = ?"
self.cursor.execute(sql, update_values)
self.conn.commit()
# 获取插入或更新后的记录 ID
self.cursor.execute(f"SELECT id FROM {tbl_name} WHERE {uniq_key} = ?", (data[uniq_key],))

View File

@ -76,7 +76,7 @@ class SQLitePipeline(SQLiteDBHandler):
def _process_u001_item(self, item, spider):
logging.debug(f"insert one item. href:{spider.name}")
return self.insert_or_update_common(item, tbl_name=self.tbl_name_u3c3, uniq_key='url')
return self.insert_or_update_common(item, tbl_name=self.tbl_name_u3c3, uniq_key='url', exists_do_nothing=True)
def _process_sis001_item(self, item, spider):
self.cursor.execute('''