如何寻找bug(5)

230    while (1) {
231       TS_RECORD(TS_H1,stamp_offset + 2000);
232       if (m_request==0) {
233          DEBUG_TEXT(DFDB_QUEUE, 8, "RequestHandler(" << m_handlerId <<")::run: popping request Q");
234          m_request=m_requestQueue->pop();
235       }
236       else {
237          DEBUG_TEXT(DFDB_QUEUE, 8, "RequestHandler(" << m_handlerId <<")::run: repeating oldRequest");
238       }
239       DEBUG_TEXT(DFDB_QUEUE, 8, "RequestHandler(" << m_handlerId <<")::run: request Q popped OK");
240 
241          int requestOk=-1; // 将requestOk赋值拿到try块外,通过在程序块内打印requestOk的值,推断程序块的执行情况。
242       try {
243          TS_RECORD(TS_H1,stamp_offset + 2100);
244          //int requestOk;
245          //unsigned int tsdata = m_handlerId * 100 + 3000000000;
246          //TS_RECORD(TS_H5,tsdata);
247 
248          requestOk = m_request->execute();
249          TS_RECORD(TS_H1,stamp_offset + 2200);
250 
251          if (requestOk == Request::REQUEST_OK) {
252             m_requestsHandled++;
253             deleteRequest();
254             DEBUG_TEXT(DFDB_ROSCORE, 8, "RequestHandler(" << m_handlerId <<")::run requestOk");
255             TS_RECORD(TS_H1, stamp_offset + 2300);
256          }
257          else if (requestOk == Request::REQUEST_TIMEOUT) {
258             m_requestsTimedOut++;
259             deleteRequest();
260             TS_RECORD(TS_H1, stamp_offset + 2300);
261          }
262          else {
263             TS_RECORD(TS_H1, stamp_offset + 2400);
264             DEBUG_TEXT(DFDB_ROSCORE, 8, "RequestHandler(" << m_handlerId <<")::run: Request non completed: will be requeued.");
265             m_requestsFailed++;
266       m_request = m_requestQueue->swap(m_request);
267             DFThread::yieldOrCancel();
268             TS_RECORD(TS_H1, stamp_offset + 2500);
269          }
270 
271          TS_RECORD(TS_H1, stamp_offset + 2990);
272       }
273       catch (std::exception& issue) {
274          ENCAPSULATE_ROS_EXCEPTION(newIssue, CoreException, UNCLASSIFIED, issue, "RequestHandler caught exception, deleting Request    ");
275          DEBUG_TEXT(DFDB_ROSCORE, 8, "requestOk: " << requestOk<< std::endl);
276          ers::error(newIssue);
277          deleteRequest();
278       }
279    }

查看log文件ROS-Eth-02_cmm03node01_1487247826.out:

3006978 Debug(12,140507831203584): RequestHandler(1)::run: popping request Q
3006979 Debug(7,140507829102336): DcTriggerIn::processDataRequest() EB message (xid 131047)
3006980 Debug(7,140507829102336): new FragmentRequest level1Id:131047
3006981 
3006982 Debug(12,140507831203584): RequestHandler(1)::run: request Q popped OK
3006983 Debug(13,140507831203584): FragmentRequest::execute for L1Id 131047
3006984 
3006985 Debug(13,140507831203584): FragmentRequest::execute calling builder->createFragment for L1Id 131047
3006986 
3006987 Debug(13,140507831203584): FragmentRequest::execute calling main sendData for L1Id 131047
3006988 
3006989 Debug(7,140507829102336): DcTriggerIn::processDataRequest() EB message (xid 131076)
3006990 Debug(7,140507829102336): new FragmentRequest level1Id:131076
3006991 
3006992 Debug(7,140507829102336): DcTriggerIn::processDataRequest() EB message (xid 131052)
3006993 Debug(7,140507829102336): new FragmentRequest level1Id:131052
3006994 
3006995 Debug(13,140507831203584): FragmentRequest::execute finished with L1Id 131047fragmentOk=1
3006996 
3006997 Debug(13,140507831203584): FragmentRequest::~FragmentRequest
3006998 
3006999 Debug(13,140507831203584): RequestHandler(1)::run requestOk
3007000 Debug(12,140507831203584): RequestHandler(1)::run: popping request Q
3007001 Debug(12,140507831203584): RequestHandler(1)::run: request Q popped OK
3007002 Debug(13,140507831203584): ReleaseRequest::execute started
3007003 
3007004 Debug(13,140507831203584): ReleaseRequest::~ReleaseRequest
3007005 
3007006 Debug(13,140507831203584): RequestHandler(1)::run requestOk
3007007 Debug(12,140507831203584): RequestHandler(1)::run: popping request Q
3007008 Debug(12,140507831203584): RequestHandler(1)::run: request Q popped OK
3007009 Debug(13,140507831203584): FragmentRequest::execute for L1Id 131076
3007010 
3007011 Debug(13,140507831203584): FragmentRequest::execute calling builder->createFragment for L1Id 131076
3007012 
3007013 Debug(13,140507831203584): requestOk: -1 //说明try..catch块抓到异常requestOk的值为-1,说明Request的execute执行没有成功完成
3007014 
3007015 Debug(13,140507831203584): FragmentRequest::~FragmentRequest
3007016 
3007017 Debug(12,140507831203584): RequestHandler(1)::run: popping request Q
3007018 Debug(12,140507831203584): RequestHandler(1)::run: request Q popped OK
3007019 Debug(13,140507831203584): FragmentRequest::execute for L1Id 131052
3007020 
3007021 Debug(7,140507829102336): DcTriggerIn::processDataRequest() EB message (xid 131049)
3007022 Debug(7,140507829102336): new FragmentRequest level1Id:131049
3007023 
3007024 Debug(7,140507829102336): DcTriggerIn::processDataRequest() EB message (xid 131077)
3007025 Debug(7,140507829102336): new FragmentRequest level1Id:131077

由于requestOk的值为-1,得出,Request的execute函数没有正确返回,因为正确返回的结果为0,1或者2。

在log文件里查找L1id为131076的FragmentRequest在execute函数执行的最后一句话有没有打印: 

[lhaaso@cmm03node01 part_dk_ef]$ grep "FragmentRequest::execute finished with L1Id 131076fragmentOk=1" ROS-Eth-02_cmm03node01_1487247826.out
[lhaaso@cmm03node01 part_dk_ef]$ 

[lhaaso@cmm03node01 part_dk_ef]$ grep "FragmentRequest::execute calling main sendData for L1Id 131076" ROS-Eth-02_cmm03node01_1487247826.out
[lhaaso@cmm03node01 part_dk_ef]$

 

没有找到,说明是FragmentRequest的execute()没有正常执行,并且FragmentRequest::execute()函数在”FragmentRequest::execute calling main sendData for L1Id”打印出来之前就已经出现问题,后面没有接着执行发送131076的数据片段给SFI,这也导致了SFI报的warning:

[lhaaso@cmm03node13 part_dk_ef]$ grep -n "131076" SFI*.err
SFI-4_cmm03node13_1487247819.err:159:WARNING 2017-Feb-16 20:31:36 [DC::StatusWord EventAssembly::EventCompleted(...) at SFI/src/EventAssembly.cxx:480] Problem with the flow of data: Event with LVL1ID 131076 misses 2 data fragment(s) from: ROS-Eth-01, ROS-Eth-02, 

 

程序出错的范围为第117行~第170行。

107 int FragmentRequest::execute(void)
108 {
109   DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute for L1Id "<< m_level1Id<< std::endl); //已被打印
110    //This code is to record the last runs through this function with 
111    //the ring buffer mode of the rcc_time_stamp package
112    //unsigned int tsdata = (m_level1Id * 100) + 2000000000;
113    //TS_RECORD(TS_H5,(tsdata+1));
114 
115    // Ask the builder to create us an empty fragment.
116    s_mutex->lock() ;
117    DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute calling builder->createFragment for L1Id "<< m_level1Id<< std::endl);//已被打印
118    m_eventFragment=m_builder->createFragment(m_level1Id, m_dataChannels->size());
119    s_mutex->unlock() ;
120 
121 
122    std::vector<DataChannel*>::iterator chanStartIter=m_dataChannels->begin();
123    std::vector<DataChannel*>::iterator chanEndIter=m_dataChannels->end();
124    // Pre-request data from all DataChannels involved
125    int index=0;
126    for (std::vector<DataChannel*>::iterator dc=chanStartIter; dc!=chanEndIter; dc++) {
127       m_ticket[index++] = (*dc)->requestFragment(m_level1Id);
128    }
129 
130    bool fragmentOk=true;
131    index=0;
132    int partsReceived=0;
133    for (std::vector<DataChannel*>::iterator dc=chanStartIter; dc!=chanEndIter; dc++) {
134       DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling dc->getFragment for L1Id "<< m_level1Id<< std::endl);
135       EventFragment* subFragment = ((*dc)->getFragment(m_ticket[index])) ;
136       if (subFragment != 0) {
137          partsReceived++;
138          DEBUG_TEXT(DFDB_ROSCORE, 20, "FragmentRequest::execute calling builder->appendFragment for L1Id "<< m_level1Id<< std::endl    );
139          m_builder->appendFragment(m_eventFragment,subFragment);
140          TS_RECORD(TS_H1,2350);
141 
142          fragmentOk=subFragment->fragmentReady();
143 
144          s_mutex->lock() ;
145          delete subFragment;
146          s_mutex->unlock() ;
147       }
148       else {
149          DEBUG_TEXT(DFDB_ROSCORE, 15, "FragmentRequest for L1Id "<< m_level1Id << " missing data aborting\n");
150          fragmentOk=false;
151          TS_RECORD(TS_H1,2360);
152       }
153 
154       index++;
155    }
156 
157 
158    bool retired=false;
159    if (!fragmentOk) {
160      retired=checkAge(s_maxAge);
161    }
162 
163    if (fragmentOk || (retired && partsReceived>0)) {
164 
165       if (retired) {
166          // Issue warning message
167          CREATE_ROS_EXCEPTION(tIssue,CoreException,CoreException::TIMEOUT,
168                               "in request for fragment with L1 ID "<<m_level1Id);
169          ers::warning(tIssue);
170       }
171       DEBUG_TEXT(DFDB_ROSCORE, 8, "FragmentRequest::execute calling main sendData for L1Id "<< m_level1Id<< std::endl);//未被打印

 

posted @ 2017-02-16 21:38  小荷才楼尖尖角  Views(269)  Comments(0Edit  收藏  举报