编辑1:我删除了memcpy,结果稍有改动,但抖动主要依然存在:
new max,time is 3029new max,time is 3746new max,time is 5287new max,time is 6043new max,time is 9882min,max,average,1837,9882,2087.033864
编辑2:我删除了sendto()调用,循环执行memcpy 600次,结果如下:
new max,time is 1894new max,time is 1922new max,time is 1923new max,time is 1925new max,time is 1928new max,time is 2002new max,time is 4530new max,time is 9269new max,time is 9466min,1880,9466,1881.731031
由于这些结果非常接近,它与sendto()调用本身无关,而是与其他一些系统性问题无关.
这是设置:
uname -alinux ubuntu 3.13.0-24-generic #47-Ubuntu SMP Fri May 2 23:30:00 UTC 2014 x86_64 x86_64 x86_64 GNU/linuxcat /proc/cmdlineBOOT_IMAGE=/boot/vmlinuz-3.13.0-24-generic root=UUID=9cd0a310-313a-4445-8594-39a7fc037b1e ro isolcpus=1-11 nohz_full=1-11 rcu_nocbs=1-11 intel_IDle.max_cstate=0 intel_pstate=disablecat /proc/cpuinfo | moreprocessor : 0vendor_ID : GenuineIntelcpu family : 6model : 62model name : Intel(R) Xeon(R) cpu E5-2630 v2 @ 2.60GHzstepPing : 4microcode : 0x415cpu MHz : 2599.941cache size : 15360 KBphysical ID : 0siblings : 6core ID : 0cpu cores : 6APIcID : 0initial APIcID : 0fpu : yesfpu_exception : yescpuID level : 13wp : yesflags : fpu vme de pse tsc msr pae mce cx8 APIc sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcID dca sse4_1 sse4_2 x2APIc popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm arat epb xsaveopt pln pts dtherm tpr_shadow vnmi flexpriority ept vpID fsgsbase smep ermsbogomips : 5199.88clflush size : 64cache_alignment : 64address sizes : 46 bits physical,48 bits virtualpower management:
……切断了5个核心.我关闭了超线程,但这并不重要.
我用以下代码编译以下代码:g -O2 jitter.cpp
#include <stdio.h>#include <sys/time.h>#include <inttypes.h>#include <stdint.h>#include <stdlib.h>#include <string.h>#include <sys/socket.h>#include <netinet/in.h>#include <strings.h>#include <arpa/inet.h>#include <time.h>#include <unistd.h>#include <sys/syscall.h>voID udp_send(){ struct timespec tpe1,tpe2; int sockfd,n; struct sockaddr_in servaddr,cliaddr; char sendline[1000]; sockfd=socket(AF_INET,SOCK_DGRAM,0); bzero(&servaddr,sizeof(servaddr)); servaddr.sin_family = AF_INET; // servaddr.sin_addr.s_addr=inet_addr("127.0.0.1"); servaddr.sin_addr.s_addr=inet_addr("192.168.5.51"); servaddr.sin_port=htons(3000); int nIterations = 2000000; int64_t *tTime = new int64_t[nIterations]; int k = 0; for( k = 0; k < nIterations; k++) { clock_gettime(CLOCK_REALTIME,&tpe1); memcpy( sendline,sendline+200,64 ); int n = sendto(sockfd,sendline,64,(struct sockaddr *)&servaddr,sizeof(servaddr)); if ( n != 64 ) printf("Failed to send\n"); clock_gettime(CLOCK_REALTIME,&tpe2); int64_t t2 = ( tpe2.tv_sec - tpe1.tv_sec ) *1000000000 + tpe2.tv_nsec - tpe1.tv_nsec; if ( k > 0 ) tTime[k] = t2; } int64_t tMax = 0; int64_t tMin = 99999999999; int64_t tTotal = 0; for ( k = 1; k < nIterations; k++ ) { int64_t t2 = tTime[k]; if ( t2 > tMax ) { tMax = t2; printf("new max,time is %ld\n",tMax ); } if ( t2 < tMin ) tMin = t2; if ( t2 > 20000 ) { printf("that took too long,took,%ld\n",t2 ); } tTotal += t2; } printf("min,%ld,%f\n",tMin,tMax,tTotal*1.0 / nIterations );}int main(){ udp_send(); return 0;}
我使用这个脚本(我从linux dynticks项目中获取)(sudo ./run)运行它,它将所有中断移动到核心0并设置跟踪.
#!/bin/bash# Full dyntick cpu on which we'll run the user loop,# it must be part of nohz_full kernel parameterTARGET=5 NR_cpuS=$(getconf _NPROCESSORS_ONLN)# Migrate nocb tasks to cpu 0# Beware,this assume that there is no online cpu > NR_cpuSfor cpu in $(seq $(($NR_cpuS-1)))do PIDS=$(ps -o pID= -C rcuob/$cpu,rcuos/$cpu,rcuop/$cpu) for PID in $PIDS do taskset -cp 0 $PID donedone# Migrate irqs to cpu 0for D in $(ls /proc/irq)do if [[ -x "/proc/irq/$D" && $D != "0" ]] then echo $D echo 1 > /proc/irq/$D/smp_affinity fidone# Delay the annoying vmstat timer far awaysysctl vm.stat_interval=300# Shutdown nmi watchdog as it uses perf eventssysctl -w kernel.watchdog=0# Pin the writeback workqueue to cpu0echo 1 > /sys/bus/workqueue/devices/writeback/cpumaskDIR=/sys/kernel/deBUG/tracingecho > $DIR/traceecho 0 > $DIR/tracing_on# Uncomment the below for more details on what disturbs the cpuecho 0 > $DIR/events/irq/enableecho 1 > $DIR/events/sched/sched_switch/enableecho 1 > $DIR/events/workqueue/workqueue_queue_work/enableecho 1 > $DIR/events/workqueue/workqueue_execute_start/enableecho 1 > $DIR/events/timer/hrtimer_expire_entry/enableecho 1 > $DIR/events/timer/tick_stop/enableecho 1 > $DIR/events/timer/enableecho 0 > $DIR/events/net/enableecho 0 > $DIR/events/sock/enableecho 0 > $DIR/events/udp/enableecho 0 > $DIR/events/skb/enableecho nop > $DIR/current_tracerecho 1 > $DIR/tracing_on# Run a 10 secs user loop on targettaskset -c $TARGET /usr/lib/linux-tools/3.13.0-24-generic/perf stat ./a.out > dfa.tmp &# chrt -f 99 taskset -c 1 ./a.out sleep 30killall a.out# Checkout the trace in trace.* filecat /sys/kernel/deBUG/tracing/per_cpu/cpu$TARGET/trace > trace.$TARGET
这是生成的dfa.tmp文件:
new max,time is 2196new max,time is 2729new max,time is 5900new max,time is 11621new max,time is 14211min,1322.858685
以下是生成的系统跟踪文件:
# tracer: nop## entrIEs-in-buffer/entrIEs-written: 34361/34361 #P:6## _-----=> irqs-off# / _----=> need-resched# | / _---=> hardirq/softirq# || / _--=> preempt-depth# ||| / delay# TASK-PID cpu# |||| TIMESTAMP FUNCTION# | | | |||| | | <IDle>-0 [005] dN.. 733.720555: hrtimer_cancel: hrtimer=ffff88087fd4eca0 <IDle>-0 [005] dN.. 733.720555: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=733280000000 softexpires=733280000000 <IDle>-0 [005] d... 733.720556: sched_switch: prev_comm=swapper/5 prev_pID=0 prev_prio=120 prev_state=R ==> next_comm=taskset next_pID=1312 next_prio=120 perf-1312 [005] .... 733.721806: hrtimer_init: hrtimer=ffff880850b53378 clockID=CLOCK_MONOTONIC mode=HRTIMER_MODE_REL perf-1312 [005] d... 733.721857: tick_stop: success=no msg=more than 1 task in runqueue perf-1312 [005] d... 733.721883: sched_switch: prev_comm=perf prev_pID=1312 prev_prio=120 prev_state=S ==> next_comm=perf next_pID=1314 next_prio=120 a.out-1314 [005] dN.. 733.721934: tick_stop: success=no msg=more than 1 task in runqueue a.out-1314 [005] d... 733.721936: sched_switch: prev_comm=perf prev_pID=1314 prev_prio=120 prev_state=R ==> next_comm=perf next_pID=1312 next_prio=120 perf-1312 [005] d... 733.722111: sched_switch: prev_comm=perf prev_pID=1312 prev_prio=120 prev_state=S ==> next_comm=perf next_pID=1314 next_prio=120 a.out-1314 [005] d.h. 733.722727: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 733.722728: hrtimer_expire_entry: hrtimer=ffff88087fd4eca0 function=tick_sched_timer Now=733280000768 a.out-1314 [005] d.h. 733.722731: hrtimer_expire_exit: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 733.722731: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=733284000000 softexpires=733284000000 a.out-1314 [005] d... 733.722736: tick_stop: success=yes msg= a.out-1314 [005] d... 733.722737: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 733.722737: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734280000000 softexpires=734280000000 a.out-1314 [005] .... 733.722960: timer_init: timer=ffff88084f8ec1d8 a.out-1314 [005] d.s. 733.723023: timer_start: timer=ffff8808518d0650 function=neigh_timer_handler expires=4295075867 [timeout=250] a.out-1314 [005] d... 733.986838: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 733.986839: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734044000000 softexpires=734044000000 a.out-1314 [005] d.h. 734.487025: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 734.487025: hrtimer_expire_entry: hrtimer=ffff88087fd4eca0 function=tick_sched_timer Now=734044000419 a.out-1314 [005] d.h. 734.487028: hrtimer_expire_exit: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 734.487028: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734048000000 softexpires=734048000000 a.out-1314 [005] d.s. 734.487029: timer_cancel: timer=ffffffff81efd6a0 a.out-1314 [005] ..s. 734.487030: timer_expire_entry: timer=ffffffff81efd6a0 function=clocksource_watchdog Now=4295075808 a.out-1314 [005] d.s. 734.487031: timer_start: timer=ffffffff81efd6a0 function=clocksource_watchdog expires=4295075933 [timeout=125] a.out-1314 [005] ..s. 734.487032: timer_expire_exit: timer=ffffffff81efd6a0 a.out-1314 [005] d... 734.487033: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 734.487033: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734280000000 softexpires=734280000000 a.out-1314 [005] d... 734.487033: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 734.487033: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734280000000 softexpires=734280000000 a.out-1314 [005] d.h. 734.723117: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 734.723117: hrtimer_expire_entry: hrtimer=ffff88087fd4eca0 function=tick_sched_timer Now=734280000410 a.out-1314 [005] d.h. 734.723119: hrtimer_expire_exit: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 734.723119: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=734284000000 softexpires=734284000000 a.out-1314 [005] d.s. 734.723120: timer_cancel: timer=ffff8808518d0650 a.out-1314 [005] ..s. 734.723120: timer_expire_entry: timer=ffff8808518d0650 function=neigh_timer_handler Now=4295075867 a.out-1314 [005] d.s. 734.723122: timer_start: timer=ffff8808518d0650 function=neigh_timer_handler expires=4295076117 [timeout=250] a.out-1314 [005] ..s. 734.723126: timer_expire_exit: timer=ffff8808518d0650 a.out-1314 [005] d... 734.723127: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 734.723127: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=735280000000 softexpires=735280000000 a.out-1314 [005] d.h. 735.723507: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 735.723508: hrtimer_expire_entry: hrtimer=ffff88087fd4eca0 function=tick_sched_timer Now=735280000448 a.out-1314 [005] d.h. 735.723509: hrtimer_expire_exit: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 735.723509: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=735284000000 softexpires=735284000000 a.out-1314 [005] d.s. 735.723511: timer_cancel: timer=ffff8808518d0650 a.out-1314 [005] ..s. 735.723511: timer_expire_entry: timer=ffff8808518d0650 function=neigh_timer_handler Now=4295076117 a.out-1314 [005] d.s. 735.723512: timer_start: timer=ffff8808518d0650 function=neigh_timer_handler expires=4295076367 [timeout=250] a.out-1314 [005] ..s. 735.723515: timer_expire_exit: timer=ffff8808518d0650 a.out-1314 [005] d... 735.723516: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d... 735.723516: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=736280000000 softexpires=736280000000 a.out-1314 [005] d... 736.452298: timer_start: timer=ffff8800368823e0 function=commit_timeout expires=4295077736 [timeout=1437] a.out-1314 [005] d.h. 736.452533: tick_stop: success=no msg=more than 1 task in runqueue a.out-1314 [005] d.h. 736.452534: hrtimer_cancel: hrtimer=ffff88087fd4eca0 a.out-1314 [005] d.h. 736.452535: hrtimer_start: hrtimer=ffff88087fd4eca0 function=tick_sched_timer expires=736012000000 softexpires=736012000000 a.out-1314 [005] d... 736.452536: tick_stop: success=no msg=more than 1 task in runqueue a.out-1314 [005] d... 736.452537: sched_switch: prev_comm=a.out prev_pID=1314 prev_prio=120 prev_state=x ==> next_comm=perf next_pID=1312 next_prio=120 perf-1312 [005] d... 736.452647: timer_start: timer=ffffffff81d14ae0 function=delayed_work_timer_fn expires=4295076549 [timeout=250] perf-1312 [005] d.h. 736.455802: hrtimer_cancel: hrtimer=ffff88087fd4eca0
perf输出是:
4223.769000 task-clock (msec) # 1.000 cpus utilized 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.000 K/sec 1,129 page-faults # 0.267 K/sec 10,977,496,067 cycles # 2.599 GHz 5,798,270,864 stalled-cycles-frontend # 52.82% frontend cycles IDle <not supported> stalled-cycles-backend 11,756,238,450 instructions # 1.07 insns per cycle # 0.49 stalled cycles per insn 2,093,679,462 branches # 495.690 M/sec 14,232,955 branch-misses # 0.68% of all branches 4.222272487 seconds time elapsed解决方法 我错了.抖动来自各种软件和 *** 作系统正在做的其他事情.我设法通过运行来看到这个:
sudo trace-cmd record -s 1000 -b 10000 -e all -o trace_1.dat `taskset -c 5 ./a.out > dfa2.tmp`sudo trace-cmd report -i trace_1.dat > tmp_1.txt
文件tmp_1.txt清楚地显示了这样的事情:
a.out-3346 [005] 33832.957658: softirq_raise: vec=1 [action=TIMER] a.out-3346 [005] 33832.957658: rcu_utilization: Start scheduler-tick a.out-3346 [005] 33832.957658: softirq_raise: vec=9 [action=RCU] a.out-3346 [005] 33832.957659: rcu_utilization: End scheduler-tick a.out-3346 [005] 33832.957660: sched_stat_runtime: comm=a.out pID=3346 runtime=2950209 [ns] vruntime=38264172961 [ns] a.out-3346 [005] 33832.957662: hrtimer_expire_exit: hrtimer=0xffff88087fd4eca0 a.out-3346 [005] 33832.957663: hrtimer_start: hrtimer=0xffff88087fd4eca0 function=tick_sched_timer expires=33819608000000 softexpires=33819608000000 a.out-3346 [005] 33832.957663: local_timer_exit: vector=239 a.out-3346 [005] 33832.957664: softirq_entry: vec=1 [action=TIMER] a.out-3346 [005] 33832.957666: softirq_exit: vec=1 [action=TIMER] a.out-3346 [005] 33832.957666: softirq_entry: vec=9 [action=RCU] a.out-3346 [005] 33832.957666: rcu_utilization: Start RCU core a.out-3346 [005] 33832.957667: rcu_utilization: End RCU core a.out-3346 [005] 33832.957667: softirq_exit: vec=9 [action=RCU] a.out-3346 [005] 33832.957668: tick_stop: success=yes msg=
我假设是抖动的原因.我尝试打开并写入/ sys / kernel / deBUG / tracing / trace_marker,这确实显示在跟踪文件中,但它没有我希望的那么有用.您还可以在之前和之后读取/ proc / softirqs,看看是否存在抖动时是否存在问题.
我添加了代码来读取/ proc / softirqs,并确保每次有抖动时,计时器滴答计数增加了一个.相反的情况并非如此,有时计时器计数在/ proc / softirqs中上升,但没有抖动,所以显然它取决于内核在计时器关闭时决定做什么:
HI:,685 TIMER:,33295 NET_TX:,12 NET_RX:,4030892 BLOCK:,0BLOCK_IOPolL:,0 tasklet:,55873 SCHED:,0 HRTIMER:,5 RCU:,7946slow,11148 HI:,33296 NET_TX:,7947
有时RCU计数会增加1但有时不会增加.计时器计数总是加1.
这是最新的代码:
#include <stdio.h>#include <sys/time.h>#include <inttypes.h>#include <stdint.h>#include <stdlib.h>#include <string.h>#include <sys/socket.h>#include <netinet/in.h>#include <strings.h>#include <arpa/inet.h>#include <time.h>#include <unistd.h>#include <sys/syscall.h>/* g++ -O2 jitter.cpp sudo trace-cmd record -s 1000 -b 10000 -e all -o trace_1.dat `taskset -c 5 ./a.out > dfa2.tmp` sudo trace-cmd report -i trace_1.dat > tmp_1.txt*/#pragma pack( push )#pragma pack( 1 )#define MAX_cpuS 6#define TARGET_cpu 5typedef struct{ char chname[13]; char chcpu[11*MAX_cpuS]; char chNewline[8];} procSoftIRQline1;typedef struct{ char chname[13]; char chcpu[11*MAX_cpuS]; char chNewline;} procSoftIRQline2;typedef struct{ procSoftIRQline1 h; procSoftIRQline2 b[30];} procSoftIRQ;#pragma pack(pop)file *pFProcSoftirq = NulL;procSoftIRQ data;voID read_proc_softirqs(){ rewind( pFProcSoftirq ); size_t tR = fread( (char *)&data,1,sizeof( data ),pFProcSoftirq ); size_t nP = sizeof( data.h ); for ( int i = 0; i < sizeof( data.b ) / sizeof( data.b[0] ) && nP < tR; i++ ) { printf("%.13s,%.11s\n",data.b[i].chname,data.b[i].chcpu+11*(TARGET_cpu) ); nP += sizeof( procSoftIRQline2 ) + 1; }}voID udp_send(){ struct timespec tpe1,0); file *pF = NulL; pFProcSoftirq = fopen( "/proc/softirqs","r" ); // pF = fopen( "/sys/kernel/deBUG/tracing/trace_marker","w" ); bzero(&servaddr,sizeof(servaddr)); servaddr.sin_family = AF_INET; // servaddr.sin_addr.s_addr=inet_addr("127.0.0.1"); servaddr.sin_addr.s_addr=inet_addr("192.168.5.51"); servaddr.sin_port=htons(3000); int nIterations = 1000000; // int64_t *tTime = new int64_t[nIterations]; int k = 0; for( k = 0; k < nIterations; k++) { // fputs( "#dfa,starting\n",pF ); read_proc_softirqs(); clock_gettime(CLOCK_REALTIME,&tpe1); for ( int j = 0; j < 600; j++ ) memcpy( sendline,64 ); /* int n = sendto(sockfd,sizeof(servaddr)); if ( n != 64 ) printf("Failed to send\n"); */ clock_gettime(CLOCK_REALTIME,&tpe2); int64_t t2 = ( tpe2.tv_sec - tpe1.tv_sec ) *1000000000 + tpe2.tv_nsec - tpe1.tv_nsec; if ( k > 0 ) { if ( t2 > 10000 ) { printf("slow,t2 ); read_proc_softirqs(); // if ( t2 > 10000 ) // fputs( "#dfa,slow\n",pF ); // else // fputs( "#dfa,fast\n",pF ); // tTime[k] = t2; } } }}int main(){ udp_send(); return 0;}
您必须将TARGET_cpu的值硬编码为要运行程序的cpu. TARGET_cpu应该是您将在taskset中使用的值-c TARGET_cpu ./a.out> out.txt.
我也用echo function_graph>进行了ftrace. $DIR / current_tracer我发现很多时间花在了arch / x86 / kernel / APIc / APIc.c中的函数smp_APIc_timer_interrupt中所以我添加了代码来计算在该函数中花费了多少时间并添加了一个系统这样我可以从用户空间获得在该功能中花费的时间.果然,大部分失踪时间都是来自这个功能,但不是全部.还有许多其他功能也需要一些时间.
总结以上是内存溢出为你收集整理的如何在Linux上查找此抖动的来源?全部内容,希望文章能够帮你解决如何在Linux上查找此抖动的来源?所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)