數(shù)據(jù)庫間數(shù)據(jù)同步方式很多,在上篇博文中有總結(jié)。本文是用py程序?qū)崿F(xiàn)數(shù)據(jù)同步。
A數(shù)據(jù)庫中有幾十張表,要匯聚到B數(shù)據(jù)庫中,且表結(jié)構(gòu)一致,需要準(zhǔn)實(shí)時(shí)的進(jìn)行數(shù)據(jù)同步,用工具實(shí)現(xiàn)時(shí)對(duì)其控制有限且配置較繁瑣,故自寫程序,可自由設(shè)置同步區(qū)間,記錄自己想要的日志
本代碼實(shí)現(xiàn)功能簡(jiǎn)單,采用面向過程,有需求的同學(xué)可以自己優(yōu)化成面向?qū)ο蠓绞?,在日志這塊缺少數(shù)據(jù)監(jiān)控,可根據(jù)需求增加。主要注意點(diǎn):
1、數(shù)據(jù)抽取時(shí)采用區(qū)間抽?。ò磿r(shí)間區(qū)間)、流式游標(biāo)迭代器+fetchone,避免內(nèi)存消耗
2、在數(shù)據(jù)插入時(shí)采用executemany(list),加快插入效率
import pymysql import os import datetime,time def update_time(content): with open(filepathtime, 'w') as f: f.writelines(content) def recode_log(content): with open(filepathlog, 'a') as f: f.writelines(content) def transferdata(): #1、獲取需要抽取的表,抽取數(shù)據(jù)的時(shí)間點(diǎn) with open(filepathtime, 'r') as f: lines = f.readlines() # 讀取所有數(shù)據(jù) print("需要同步的表信息",lines) for line in lines: startdatetime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) tablename_list =line.split(',') #print(tablename_list) #print(tablename_list[-1]) tablename_list[-1] = tablename_list[-1].replace('\n','') #print(tablename_list) tablename = tablename_list[0] updatetime = tablename_list[1] #print(tablename,updatetime) #2、抽取此表此時(shí)間點(diǎn)的數(shù)據(jù),同步 updatetime_s = datetime.datetime.strptime(updatetime, '%Y-%m-%d %H:%M:%S') updatetime_e = (updatetime_s + datetime.timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S") #print(updatetime_s) #print(q_sql) db = pymysql.connect(host=host_o, port=port_o, user=user_o, passwd=passwd_o, db=db_o) cursor = db.cursor() q_sql = "select a,b,c from %s where c >= '%s' " % \ (tablename, updatetime_s) #2.1 首先判斷下原表中是否有待同步數(shù)據(jù),若有則同步且更新同步的時(shí)間參考點(diǎn),若沒有則不同步且不更新同步的時(shí)間參考點(diǎn) try: cursor.execute(q_sql) results = cursor.fetchone() #print(results) #返回是元組 #print("查詢?cè)頂?shù)據(jù)成功!",tablename) except BaseException as e: print("查詢?cè)頂?shù)據(jù)失??!",tablename, str(e)) #記錄異常日志 updatetime_n = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) eachline_log = updatetime_n + '[erro]:' + tablename + str(e) + '\n' content_log.append(eachline_log) recode_log(content_log) db.close() if results: print("===============================================================================") print("有數(shù)據(jù)可同步",tablename) db = pymysql.connect(host=host_o, port=port_o, user=user_o, passwd=passwd_o, db=db_o, charset='utf8', cursorclass=pymysql.cursors.SSDictCursor) cursor = db.cursor() q_sql1 = "select a,b,c from %s where c >= '%s' and c '%s' " % \ (tablename, updatetime_s, updatetime_e) #print(q_sql1) result_list = [] try: # startdatetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) cursor.execute(q_sql1) #results = cursor.fetchall() # enddatetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # print(results) #返回是元組 #使用流式游標(biāo)迭代器+fetchone,減少內(nèi)存消耗 while True: result = cursor.fetchone() if not result: print("此區(qū)間無數(shù)據(jù)", q_sql1) break else: one_list = list(result.values()) # print(result_list) result_list.append(one_list) print(result_list) #返回是列表 #print("查詢數(shù)據(jù)成功!", tablename) except BaseException as e: print("查詢數(shù)據(jù)失?。?, tablename, str(e)) # 記錄異常日志 updatetime_n = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) eachline_log = updatetime_n + '[erro]:' + tablename + str(e) + '\n' content_log.append(eachline_log) recode_log(content_log) db.close() results_len = (len(result_list)) if results_len>0: #3、將數(shù)據(jù)插入到目標(biāo)表中,利用list提高插入效率 i_sql = "insert into table_t(a,b,c) values (%s,%s,%s)" #print(i_sql) db = pymysql.connect(host=host_d, port=port_d, user=user_d, passwd=passwd_d, db=db_d) cursor = db.cursor() try: #startdatetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) cursor.executemany(i_sql, result_list) db.commit() #enddatetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print("插入成功!",tablename) except BaseException as e: db.rollback() print("插入失??!", tablename,str(e)) #記錄異常日志 updatetime_n = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) eachline_log = updatetime_n + '[erro]:' + tablename + str(e) + '\n' content_log.append(eachline_log) recode_log(content_log) db.close() enddatetime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) #4、如果有數(shù)據(jù)同步,則更新參考點(diǎn)時(shí)間為下一個(gè)節(jié)點(diǎn)時(shí)間 eachline_time = tablename+','+updatetime_e+'\n' #此時(shí)間點(diǎn)是下一個(gè)時(shí)間點(diǎn)updatetime_e content_time.append(eachline_time) print("更新表時(shí)間點(diǎn)",content_time) # 5、記錄成功日志 eachline_log = enddatetime + '[success]:' + tablename + '開始時(shí)間' + startdatetime + \ '結(jié)束時(shí)間' + enddatetime + ',同步數(shù)據(jù)量'+str(results_len)+',當(dāng)前參考點(diǎn)' + updatetime_e + '\n' content_log.append(eachline_log) print("日志信息",content_log) #print("===============================================================================") else: print("===============================================================================") print("無數(shù)據(jù)可同步",tablename) #db.close() enddatetime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) # 4、如果無數(shù)據(jù)同步,則參考點(diǎn)時(shí)間不更新 eachline_time = tablename + ',' + updatetime + '\n' #此時(shí)間點(diǎn)還是原時(shí)間updatetime content_time.append(eachline_time) print("不更新表時(shí)間點(diǎn)",content_time) # 5、成功日志信息 eachline_log = enddatetime + '[success]:' + tablename + '開始時(shí)間' + startdatetime + \ '結(jié)束時(shí)間' + enddatetime + ',同步數(shù)據(jù)量0'+ ',當(dāng)前參考點(diǎn)' + updatetime_e + '\n' content_log.append(eachline_log) print("日志信息",content_log) #print("===============================================================================") #更新配置文件,記錄日志 update_time(content_time) recode_log(content_log) if __name__ == '__main__': filepathtime = 'D:/test/table-time.txt' filepathlog = 'D:/test/table-log.txt' host_o = 'localhost' port_o = 3306 user_o = 'root' passwd_o = 'root@123' db_o = 'csdn' host_d = 'localhost' port_d = 3306 user_d = 'root' passwd_d = 'root@123' db_d = 'csdn' content_time = [] content_log = [] transferdata() #每5分鐘執(zhí)行一次同步 # while True: # transferdata() # time.sleep(300)
table-time.txt配置文件,格式說明:
每行包括源庫表名、此表的最小時(shí)間time,以逗號(hào)分隔
若多個(gè)表,可配置多個(gè)時(shí)間
每次腳本執(zhí)行后,同步更新時(shí)間time。時(shí)間間隔設(shè)置為1小時(shí),可根據(jù)情況在updatetime_e中對(duì)增量進(jìn)行修改
table-log.txt
記錄每次同步任務(wù)執(zhí)行的結(jié)果,或執(zhí)行中發(fā)生異常的日志
此文件需要定期進(jìn)行清理
到此這篇關(guān)于python實(shí)現(xiàn)不同數(shù)據(jù)庫間數(shù)據(jù)同步功能的文章就介紹到這了,更多相關(guān)python實(shí)現(xiàn)數(shù)據(jù)同步內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
標(biāo)簽:昭通 合肥 阜新 興安盟 隨州 信陽 濟(jì)源 淘寶好評(píng)回訪
巨人網(wǎng)絡(luò)通訊聲明:本文標(biāo)題《python實(shí)現(xiàn)不同數(shù)據(jù)庫間數(shù)據(jù)同步功能》,本文關(guān)鍵詞 python,實(shí)現(xiàn),不同,數(shù)據(jù)庫,;如發(fā)現(xiàn)本文內(nèi)容存在版權(quán)問題,煩請(qǐng)?zhí)峁┫嚓P(guān)信息告之我們,我們將及時(shí)溝通與處理。本站內(nèi)容系統(tǒng)采集于網(wǎng)絡(luò),涉及言論、版權(quán)與本站無關(guān)。