Parsing JSON and ATOM Twitter Search Results in Go
The top chart shows the time a Go program takes to search twitter, parse the results, and output the tweets, varying the number of search results from 10-100. The blue shows the results for JSON, the green for ATOM (XML).
Also depicted is the number of bytes to parsed, and the resulting parsing rate.
The conclusion is that JSON is more efficient; HTTP rates are constant, but JSON requires less data, with less complex data structures to unmarshal. Both methods deliver identical results:
ts -f json -n 10 '#golang'
RT @koizuka: RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
.....
Here is the program: it demonstrates command line parsing, error checking, http processing and unmarshaling both JSON and XML.
// ts -- twitter search
//
// Anthony Starks (ajstarks@gmail.com)
//
package main
import (
"fmt";
"http";
"io";
"flag";
"os";
"xml";
"json";
)
var format = flag.String("f", "atom", "Output format (json or atom)")
var nresults = flag.Int("n", 20, "Maximum results (up to 100)")
var since = flag.String("d", "", "Search since this date (YYYY-MM-DD)")
const queryURI = "http://search.twitter.com/search.%s?q=%s&since=%s&rpp=%d"
//const outputfmt = "%s \u27BE %s\n"
const outputfmt = "%s\n"
// Atom Feed Data Structure
type Feed struct {
XMLName xml.Name "http://www.w3.org/2005/Atom feed";
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Entry []Entry;
}
type Entry struct {
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Summary Text;
}
type Link struct {
Rel string "attr";
Href string "attr";
}
type Person struct {
Name string;
URI string;
Email string;
}
type Text struct {
Type string "attr";
Body string "chardata";
}
type Time string
// JSON Data Structure
type JTweets struct {
Results []Result;
Max_id int;
Since_id int;
Refresh_url string;
Next_page string;
Page int;
Completed_in float;
Query string;
}
type Result struct {
Profile_image_url string;
Created_at string;
From_user string;
To_user_id string;
Text string;
Id string;
From_user_id int;
Geo string;
Iso_language_code string;
Source string;
}
func ts(s string, how string, date string, n int) {
if how == "json" || how == "atom" {
r, _, err := http.Get(fmt.Sprintf(queryURI, how, http.URLEscape(s), date, n));
if err == nil {
fmt.Fprintf(os.Stderr, "\nSearching for '%s' (%d results in %s)\n", s, n, how);
if r.StatusCode == http.StatusOK {
if how == "atom" {
readatom(r.Body)
} else if how == "json" {
readjson(r.Body)
}
} else {
fmt.Fprintf(os.Stderr,
"Twitter is unable to search for %s as %s (%s)\n", s, how, r.Status)
}
r.Body.Close();
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
}
func readatom(r io.Reader) {
var twitter Feed;
err := xml.Unmarshal(r, &twitter);
if err == nil {
for i := 0; i < len(twitter.Entry); i++ {
fmt.Printf(outputfmt,
/*twitter.Entry[i].Author.Name,*/ twitter.Entry[i].Title)
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the Atom feed (%v)\n", err)
}
}
func readjson(r io.ReadCloser) {
var twitter JTweets;
var b []byte;
b, err := io.ReadAll(r);
if err == nil {
ok, errtok := json.Unmarshal(string(b), &twitter);
if ok {
for i := 0; i < len(twitter.Results); i++ {
fmt.Printf(outputfmt,
/*twitter.Results[i].From_user,*/ twitter.Results[i].Text)
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the JSON feed (%v)\n", errtok)
}
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
func dump(r io.ReadCloser) {
var b []byte;
b, err := io.ReadAll(r);
if err == nil {
fmt.Printf(string(b))
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
func main() {
flag.Parse();
for i := 0; i < flag.NArg(); i++ {
ts(flag.Arg(i), *format, *since, *nresults)
}
}
Parsing JSON and ATOM Twitter Search Results in Go
The top chart shows the time a Go program takes to search twitter, parse the results, and output the tweets, varying the number of search results from 10-100. The blue shows the results for JSON, the green for ATOM (XML).
Also depicted is the number of bytes to parsed, and the resulting parsing rate.
The conclusion is that JSON is more efficient; HTTP rates are constant, but JSON requires less data, with less complex data structures to unmarshal. Both methods deliver identical results:
ts -f json -n 10 '#golang'
RT @koizuka: RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
RT @tokuhirom: #golang は C/C++ のかわりにつかうというよりは。python のかわりに使うという領域の方がおおきいんだとおもう
.....
Here is the program: it demonstrates command line parsing, error checking, http processing and unmarshaling both JSON and XML.
// ts -- twitter search
//
// Anthony Starks (ajstarks@gmail.com)
//
package main
import (
"fmt";
"http";
"io";
"flag";
"os";
"xml";
"json";
)
var format = flag.String("f", "atom", "Output format (json or atom)")
var nresults = flag.Int("n", 20, "Maximum results (up to 100)")
var since = flag.String("d", "", "Search since this date (YYYY-MM-DD)")
const queryURI = "http://search.twitter.com/search.%s?q=%s&since=%s&rpp=%d"
//const outputfmt = "%s \u27BE %s\n"
const outputfmt = "%s\n"
// Atom Feed Data Structure
type Feed struct {
XMLName xml.Name "http://www.w3.org/2005/Atom feed";
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Entry []Entry;
}
type Entry struct {
Title string;
Id string;
Link []Link;
Updated Time;
Author Person;
Summary Text;
}
type Link struct {
Rel string "attr";
Href string "attr";
}
type Person struct {
Name string;
URI string;
Email string;
}
type Text struct {
Type string "attr";
Body string "chardata";
}
type Time string
// JSON Data Structure
type JTweets struct {
Results []Result;
Max_id int;
Since_id int;
Refresh_url string;
Next_page string;
Page int;
Completed_in float;
Query string;
}
type Result struct {
Profile_image_url string;
Created_at string;
From_user string;
To_user_id string;
Text string;
Id string;
From_user_id int;
Geo string;
Iso_language_code string;
Source string;
}
func ts(s string, how string, date string, n int) {
if how == "json" || how == "atom" {
r, _, err := http.Get(fmt.Sprintf(queryURI, how, http.URLEscape(s), date, n));
if err == nil {
fmt.Fprintf(os.Stderr, "\nSearching for '%s' (%d results in %s)\n", s, n, how);
if r.StatusCode == http.StatusOK {
if how == "atom" {
readatom(r.Body)
} else if how == "json" {
readjson(r.Body)
}
} else {
fmt.Fprintf(os.Stderr,
"Twitter is unable to search for %s as %s (%s)\n", s, how, r.Status)
}
r.Body.Close();
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
}
func readatom(r io.Reader) {
var twitter Feed;
err := xml.Unmarshal(r, &twitter);
if err == nil {
for i := 0; i < len(twitter.Entry); i++ {
fmt.Printf(outputfmt,
/*twitter.Entry[i].Author.Name,*/ twitter.Entry[i].Title)
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the Atom feed (%v)\n", err)
}
}
func readjson(r io.ReadCloser) {
var twitter JTweets;
var b []byte;
b, err := io.ReadAll(r);
if err == nil {
ok, errtok := json.Unmarshal(string(b), &twitter);
if ok {
for i := 0; i < len(twitter.Results); i++ {
fmt.Printf(outputfmt,
/*twitter.Results[i].From_user,*/ twitter.Results[i].Text)
}
} else {
fmt.Fprintf(os.Stderr, "Unable to parse the JSON feed (%v)\n", errtok)
}
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
func dump(r io.ReadCloser) {
var b []byte;
b, err := io.ReadAll(r);
if err == nil {
fmt.Printf(string(b))
} else {
fmt.Fprintf(os.Stderr, "%v\n", err)
}
}
func main() {
flag.Parse();
for i := 0; i < flag.NArg(); i++ {
ts(flag.Arg(i), *format, *since, *nresults)
}
}