←

Parsing JSON and ATOM Twitter Search Results in Go | by ajstarks

→

Back to photostream

ajstarks

Parsing JSON and ATOM Twitter Search Results in Go

The top chart shows the time a Go program takes to search twitter, parse the results, and output the tweets, varying the number of search results from 10-100. The blue shows the results for JSON, the green for ATOM (XML).

Also depicted is the number of bytes to parsed, and the resulting parsing rate.

The conclusion is that JSON is more efficient; HTTP rates are constant, but JSON requires less data, with less complex data structures to unmarshal. Both methods deliver identical results:

ts -f json -n 10 '#golang'

RT @koizuka: RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
.....

Here is the program: it demonstrates command line parsing, error checking, http processing and unmarshaling both JSON and XML.

// ts -- twitter search
//
// Anthony Starks (ajstarks@gmail.com)
//

package main

import (
   "fmt";
   "http";
   "io";
   "flag";
   "os";
   "xml";
   "json";
)

var format = flag.String("f", "atom", "Output format (json or atom)")
var nresults = flag.Int("n", 20, "Maximum results (up to 100)")
var since = flag.String("d", "", "Search since this date (YYYY-MM-DD)")

const queryURI = "http://search.twitter.com/search.%s?q=%s&since=%s&rpp=%d"
//const outputfmt = "%s \u27BE %s\n"
const outputfmt = "%s\n"

// Atom Feed Data Structure

type Feed struct {
   XMLName   xml.Name   "http://www.w3.org/2005/Atom feed";
   Title   string;
   Id   string;
   Link   []Link;
   Updated   Time;
   Author   Person;
   Entry   []Entry;
}

type Entry struct {
   Title   string;
   Id   string;
   Link   []Link;
   Updated   Time;
   Author   Person;
   Summary   Text;
}

type Link struct {
   Rel   string   "attr";
   Href   string   "attr";
}

type Person struct {
   Name   string;
   URI   string;
   Email   string;
}

type Text struct {
   Type   string   "attr";
   Body   string   "chardata";
}

type Time string

// JSON Data Structure

type JTweets struct {
   Results      []Result;
   Max_id      int;
   Since_id   int;
   Refresh_url   string;
   Next_page   string;
   Page      int;
   Completed_in   float;
   Query      string;
}

type Result struct {
   Profile_image_url   string;
   Created_at      string;
   From_user      string;
   To_user_id      string;
   Text         string;
   Id         string;
   From_user_id      int;
   Geo         string;
   Iso_language_code   string;
   Source         string;
}

func ts(s string, how string, date string, n int) {

   if how == "json" || how == "atom" {
      r, _, err := http.Get(fmt.Sprintf(queryURI, how, http.URLEscape(s), date, n));
      if err == nil {
         fmt.Fprintf(os.Stderr, "\nSearching for '%s' (%d results in %s)\n", s, n, how);
         if r.StatusCode == http.StatusOK {
            if how == "atom" {
               readatom(r.Body)
            } else if how == "json" {
               readjson(r.Body)
            }
         } else {
            fmt.Fprintf(os.Stderr,
               "Twitter is unable to search for %s as %s (%s)\n", s, how, r.Status)
         }
         r.Body.Close();
      } else {
         fmt.Fprintf(os.Stderr, "%v\n", err)
      }
   }
}

func readatom(r io.Reader) {
   var twitter Feed;
   err := xml.Unmarshal(r, &twitter);

   if err == nil {
      for i := 0; i < len(twitter.Entry); i++ {
         fmt.Printf(outputfmt,
            /twitter.Entry[i].Author.Name,/ twitter.Entry[i].Title)
      }
   } else {
      fmt.Fprintf(os.Stderr, "Unable to parse the Atom feed (%v)\n", err)
   }
}

func readjson(r io.ReadCloser) {
   var twitter JTweets;
   var b []byte;
   b, err := io.ReadAll(r);

   if err == nil {
      ok, errtok := json.Unmarshal(string(b), &twitter);
      if ok {
         for i := 0; i < len(twitter.Results); i++ {
            fmt.Printf(outputfmt,
               /twitter.Results[i].From_user,/ twitter.Results[i].Text)
         }
      } else {
         fmt.Fprintf(os.Stderr, "Unable to parse the JSON feed (%v)\n", errtok)
      }
   } else {
      fmt.Fprintf(os.Stderr, "%v\n", err)
   }
}

func dump(r io.ReadCloser) {
   var b []byte;
   b, err := io.ReadAll(r);

   if err == nil {
      fmt.Printf(string(b))
   } else {
      fmt.Fprintf(os.Stderr, "%v\n", err)
   }
}

func main() {
   flag.Parse();
   for i := 0; i < flag.NArg(); i++ {
      ts(flag.Arg(i), format, since, *nresults)
   }
}

3,199 views

0 faves

0 comments

Uploaded on November 18, 2009

Parsing JSON and ATOM Twitter Search Results in Go

The top chart shows the time a Go program takes to search twitter, parse the results, and output the tweets, varying the number of search results from 10-100. The blue shows the results for JSON, the green for ATOM (XML).

Also depicted is the number of bytes to parsed, and the resulting parsing rate.

The conclusion is that JSON is more efficient; HTTP rates are constant, but JSON requires less data, with less complex data structures to unmarshal. Both methods deliver identical results:

ts -f json -n 10 '#golang'

RT @koizuka: RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
.....

Here is the program: it demonstrates command line parsing, error checking, http processing and unmarshaling both JSON and XML.

// ts -- twitter search
//
// Anthony Starks (ajstarks@gmail.com)
//

package main

import (
   "fmt";
   "http";
   "io";
   "flag";
   "os";
   "xml";
   "json";
)

var format = flag.String("f", "atom", "Output format (json or atom)")
var nresults = flag.Int("n", 20, "Maximum results (up to 100)")
var since = flag.String("d", "", "Search since this date (YYYY-MM-DD)")

const queryURI = "http://search.twitter.com/search.%s?q=%s&since=%s&rpp=%d"
//const outputfmt = "%s \u27BE %s\n"
const outputfmt = "%s\n"

// Atom Feed Data Structure

type Feed struct {
   XMLName   xml.Name   "http://www.w3.org/2005/Atom feed";
   Title   string;
   Id   string;
   Link   []Link;
   Updated   Time;
   Author   Person;
   Entry   []Entry;
}

type Entry struct {
   Title   string;
   Id   string;
   Link   []Link;
   Updated   Time;
   Author   Person;
   Summary   Text;
}

type Link struct {
   Rel   string   "attr";
   Href   string   "attr";
}

type Person struct {
   Name   string;
   URI   string;
   Email   string;
}

type Text struct {
   Type   string   "attr";
   Body   string   "chardata";
}

type Time string

// JSON Data Structure

type JTweets struct {
   Results      []Result;
   Max_id      int;
   Since_id   int;
   Refresh_url   string;
   Next_page   string;
   Page      int;
   Completed_in   float;
   Query      string;
}

type Result struct {
   Profile_image_url   string;
   Created_at      string;
   From_user      string;
   To_user_id      string;
   Text         string;
   Id         string;
   From_user_id      int;
   Geo         string;
   Iso_language_code   string;
   Source         string;
}

func ts(s string, how string, date string, n int) {

   if how == "json" || how == "atom" {
      r, _, err := http.Get(fmt.Sprintf(queryURI, how, http.URLEscape(s), date, n));
      if err == nil {
         fmt.Fprintf(os.Stderr, "\nSearching for '%s' (%d results in %s)\n", s, n, how);
         if r.StatusCode == http.StatusOK {
            if how == "atom" {
               readatom(r.Body)
            } else if how == "json" {
               readjson(r.Body)
            }
         } else {
            fmt.Fprintf(os.Stderr,
               "Twitter is unable to search for %s as %s (%s)\n", s, how, r.Status)
         }
         r.Body.Close();
      } else {
         fmt.Fprintf(os.Stderr, "%v\n", err)
      }
   }
}

func readatom(r io.Reader) {
   var twitter Feed;
   err := xml.Unmarshal(r, &twitter);

   if err == nil {
      for i := 0; i < len(twitter.Entry); i++ {
         fmt.Printf(outputfmt,
            /twitter.Entry[i].Author.Name,/ twitter.Entry[i].Title)
      }
   } else {
      fmt.Fprintf(os.Stderr, "Unable to parse the Atom feed (%v)\n", err)
   }
}

func readjson(r io.ReadCloser) {
   var twitter JTweets;
   var b []byte;
   b, err := io.ReadAll(r);

   if err == nil {
      ok, errtok := json.Unmarshal(string(b), &twitter);
      if ok {
         for i := 0; i < len(twitter.Results); i++ {
            fmt.Printf(outputfmt,
               /twitter.Results[i].From_user,/ twitter.Results[i].Text)
         }
      } else {
         fmt.Fprintf(os.Stderr, "Unable to parse the JSON feed (%v)\n", errtok)
      }
   } else {
      fmt.Fprintf(os.Stderr, "%v\n", err)
   }
}

func dump(r io.ReadCloser) {
   var b []byte;
   b, err := io.ReadAll(r);

   if err == nil {
      fmt.Printf(string(b))
   } else {
      fmt.Fprintf(os.Stderr, "%v\n", err)
   }
}

func main() {
   flag.Parse();
   for i := 0; i < flag.NArg(); i++ {
      ts(flag.Arg(i), format, since, *nresults)
   }
}