日志合并:三天之前的日志,合并为单日单个文件,以解决增量处理时小文件过多问题

This commit is contained in:
lededev
2021-10-21 19:57:09 +08:00
parent c440315488
commit 1f9bf6b4c2

View File

@@ -162,14 +162,15 @@ def close_logfile(logdir: str):
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
except: except:
pass pass
# 合并日志 只检测日志目录内的文本日志,忽略子目录。三个月前的日志,按合并为一个月志, # 合并日志 只检测日志目录内的文本日志,忽略子目录。三前的日志,按合并为单个日志,三个月前的日志,
# 去年及以前的月志今年4月以后将之按年合并为年志 # 按月合并为单个月志,去年及以前的月志今年4月以后将之按年合并为年志
# 测试步骤: # 测试步骤:
""" """
LOGDIR=/tmp/avlog LOGDIR=/tmp/avlog
mkdir -p $LOGDIR mkdir -p $LOGDIR
for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done for f in {2016..2020}{01..12}{01..28};do;echo $f>$LOGDIR/avdc_${f}T235959.txt;done
for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done for f in {01..09}{01..28};do;echo 2021$f>$LOGDIR/avdc_2021${f}T235959.txt;done
for f in {00..23};do;echo 20211001T$f>$LOGDIR/avdc_20211001T${f}5959.txt;done
echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR" echo "$(ls -1 $LOGDIR|wc -l) files in $LOGDIR"
# 1932 files in /tmp/avlog # 1932 files in /tmp/avlog
avdc -zgic1 -d0 -m3 -o $LOGDIR avdc -zgic1 -d0 -m3 -o $LOGDIR
@@ -177,19 +178,40 @@ def close_logfile(logdir: str):
ls $LOGDIR ls $LOGDIR
# rm -rf $LOGDIR # rm -rf $LOGDIR
""" """
# 第一步,合并到月
today = datetime.today() today = datetime.today()
# 第一步合并到日。3天前的日志文件名是同一天的合并为一份日志
for i in range(1):
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}T\d{6}$', f.stem, re.A)]
if not txts or not len(txts):
break
e = [f for f in txts if '_err' in f.stem]
txts.sort()
tmstr_3_days_ago = (today.replace(hour=0) - timedelta(days=3)).strftime("%Y%m%dT99")
deadline_day = f'avdc_{tmstr_3_days_ago}'
day_merge = [f for f in txts if f.stem < deadline_day]
if not day_merge or not len(day_merge):
break
cutday = len('T235959.txt') # cut length avdc_20201201|T235959.txt
for f in day_merge:
try:
day_file_name = str(f)[:-cutday] + '.txt' # avdc_20201201.txt
with open(day_file_name, 'a', encoding='utf-8') as m:
m.write(f.read_text(encoding='utf-8'))
f.unlink(missing_ok=True)
except:
pass
# 第二步,合并到月
for i in range(1): # 利用1次循环的break跳到第二步避免大块if缩进或者使用goto语法 for i in range(1): # 利用1次循环的break跳到第二步避免大块if缩进或者使用goto语法
txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'avdc_\d{8}T\d{6}', f.stem, re.A)] txts = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{8}$', f.stem, re.A)]
if not txts or not len(txts): if not txts or not len(txts):
break break
txts.sort() txts.sort()
tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32T") tmstr_3_month_ago = (today.replace(day=1) - timedelta(days=3*30)).strftime("%Y%m32")
deadline_month = f'avdc_{tmstr_3_month_ago}' deadline_month = f'avdc_{tmstr_3_month_ago}'
month_merge = [f for f in txts if f.stem < deadline_month] month_merge = [f for f in txts if f.stem < deadline_month]
if not month_merge or not len(month_merge): if not month_merge or not len(month_merge):
break break
tomonth = len('01T235959.txt') # cut length avdc_202012|01T235959.txt tomonth = len('01.txt') # cut length avdc_202012|01.txt
for f in month_merge: for f in month_merge:
try: try:
month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt month_file_name = str(f)[:-tomonth] + '.txt' # avdc_202012.txt
@@ -198,10 +220,10 @@ def close_logfile(logdir: str):
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
except: except:
pass pass
# 第步,月合并到年 # 第步,月合并到年
if today.month < 4: if today.month < 4:
return return
mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'avdc_\d{6}', f.stem, re.A)] mons = [f for f in log_dir.glob(r'*.txt') if re.match(r'^avdc_\d{6}$', f.stem, re.A)]
if not mons or not len(mons): if not mons or not len(mons):
return return
mons.sort() mons.sort()
@@ -218,7 +240,7 @@ def close_logfile(logdir: str):
f.unlink(missing_ok=True) f.unlink(missing_ok=True)
except: except:
pass pass
# 第步,压缩年志 如果有压缩需求请自行手工压缩或者使用外部脚本来定时完成。推荐nongnu的lzip对于 # 第步,压缩年志 如果有压缩需求请自行手工压缩或者使用外部脚本来定时完成。推荐nongnu的lzip对于
# 这种粒度的文本日志压缩比是目前最好的。lzip -9的运行参数下日志压缩比要高于xz -9而且内存占用更少 # 这种粒度的文本日志压缩比是目前最好的。lzip -9的运行参数下日志压缩比要高于xz -9而且内存占用更少
# 多核利用率更高(plzip多线程版本)解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右, # 多核利用率更高(plzip多线程版本)解压速度更快。压缩后的大小差不多是未压缩时的2.4%到3.7%左右,
# 100MB的日志文件能缩小到3.7MB。 # 100MB的日志文件能缩小到3.7MB。