html agility pack - I have code in C# that looks like this that I need to extract the same fields, but I am uncertain how to do it -


in python, have code looks using libxml

    parser = etree.htmlparser()      id = 0     nodes = node.findall(r'.//div[@id="flexbox_flex_calendar_maincal"]//table/tr[@class]')      x in nodes:         if x.attrib['class'].startswith('calendar'):             item = getarow(x, id)             newsitems.addrow(item)             id = id + 1      id in range(0, newsitems.getlength()):         rowdict = newsitems.getrow(id)         if rowdict not none:                         rowitems = qstringlist([rowdict['time'], rowdict['currency'], rowdict['impact'], rowdict['event'], rowdict['actual'], rowdict['forecast'],  rowdict['previous']] )             #newsitems[rowdict['time']].append(rowitems)             newstable.addrow(rowitems) 

i have code in c# looks need extract same fields, uncertain how it. whatnodestofind string problematical.

using system; using system.text; using system.threading; using system.threading.tasks; using system.linq; using system.windows.forms; using htmlagilitypack;  namespace consoleapplication276 {         // container url , parser action     public class link     {         public string link { get; set; }         public action<string> parser { get; set; }     }      public class program     {         static string[] monthstrings = new string[] { "", "jan", "feb", "mar", "apr", "may", "jun", "july", "aug", "sep", "oct", "nov", "dec" };          public static string getdateinforexfactoryformat()         {             var today = system.datetime.now;              var daystr = today.day.tostring();             var monthstr = monthstrings[today.month];             var yearstr = today.year.tostring();              return daystr + monthstr + '.' + yearstr;         }          // entry point of console app         public static void main(string[] args)         {             try             {                 // download each page , dump content                 // can add more links here, associate each link parser action, data should parser generate create property in link container                  var task = messageloopworker.run(doworkasync, new link()                 {                     link = "http://www.forexfactory.com/calendar.php?day=" + getdateinforexfactoryformat(),                     parser = (string html) =>                     {                         //do ever need hap here                         var doc = new htmlagilitypack.htmldocument();                         doc.loadhtml(html);                         string whatnodestofind = ".//div";                         //string whatnodestofind = "table";                         //var somenodes = doc.documentnode.selectsinglenode(whatnodestofind);                         var somenodes = doc.documentnode.selectnodes(whatnodestofind);                         foreach (var node in somenodes)                         {                             console.writeline(node);                         }                     }                 });                  task.wait();                  console.writeline("doworkasync completed.");             }             catch (exception ex)             {                 console.writeline("doworkasync failed: " + ex.message);             }              console.writeline("press enter exit.");             console.readline();         }          // navigate webbrowser list of urls in loop         public static async task<link> doworkasync(link[] args)         {             console.writeline("start working.");              using (var wb = new webbrowser())             {                 wb.scripterrorssuppressed = true;                  taskcompletionsource<bool> tcs = null;                 webbrowserdocumentcompletedeventhandler documentcompletedhandler = (s, e) =>                     tcs.trysetresult(true);                  // navigate each url in list                 foreach (var arg in args)                 {                     tcs = new taskcompletionsource<bool>();                     wb.documentcompleted += documentcompletedhandler;                     try                     {                         wb.navigate(arg.link.tostring());                         // await documentcompleted                         await tcs.task;                         // after page loads pass html parser                          arg.parser(wb.documenttext);                     }                                         {                         wb.documentcompleted -= documentcompletedhandler;                     }                     // dom ready                     console.writeline(arg.link.tostring());                     console.writeline(wb.document.body.outerhtml);                 }             }              console.writeline("end working.");             return null;         }      }      // helper class start message loop , execute asynchronous task     public static class messageloopworker     {         public static async task<object> run(func<link[], task<link>> worker, params link[] args)         {             var tcs = new taskcompletionsource<object>();              var thread = new thread(() =>             {                 eventhandler idlehandler = null;                  idlehandler = async (s, e) =>                 {                     // handle application.idle once                     application.idle -= idlehandler;                      // return message loop                     await task.yield();                      // , continue asynchronously                     // propogate result or exception                     try                     {                         var result = await worker(args);                         tcs.setresult(result);                     }                     catch (exception ex)                     {                         tcs.setexception(ex);                     }                      // signal exit message loop                     // application.run exit @ point                     application.exitthread();                 };                  // handle application.idle once                 // make sure we're inside message loop                 // , synchronizationcontext has been correctly installed                 application.idle += idlehandler;                 application.run();             });              // set sta model new thread             thread.setapartmentstate(apartmentstate.sta);              // start thread , await task             thread.start();             try             {                 return await tcs.task;             }                         {                 thread.join();             }         }     } } 

i tried doesn't work, meaning returns no nodes. yet, can see nodes using google chrome inspect element:

              var findclasses = doc.documentnode.descendants("div").where(d =>                      d.attributes.contains("class") && d.attributes["id"].value.contains("flex"));                  foreach (var d in findclasses)                 {                     console.writeline(d);                                        } 

regarding edit 1 section, i'd recommend use d.getattributevalue("id", "") replace d.attributes["id"].value, because latter throw exception in case current d element doesn't have attribute id (and did happen when parsing html page retrieved url in sample) :

var link = "http://www.forexfactory.com/calendar.php?day=aug7.2015"; var doc = new htmlweb().load(link); var findclasses = doc.documentnode                      .descendants("div")                      .where(d => d.attributes.contains("class")                                      &&                                   d.getattributevalue("id", "").contains("flex")                             );  foreach (var d in findclasses) {     console.writeline("{0}, {1}", d.name, d.getattributevalue("id", "")); } 

dotnetfiddle demo

output :

div, flexbox_flex_minicalendar_ div, flexbox_flex_calendar_maincal div, flexdatepicker_calendar_maincal_begindate div, flexdatepicker_calendar_maincal_enddate 

Comments

Popular posts from this blog

yii2 - Yii 2 Running a Cron in the basic template -

asp.net - 'System.Web.HttpContext' does not contain a definition for 'GetOwinContext' Mystery -

mercurial graft feature, can it copy? -