all repos — honk @ 0e231e7045b770837ab7f5f8d1acbae29f047a1a

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/csv"
 20	"encoding/json"
 21	"fmt"
 22	"html"
 23	"io/ioutil"
 24	"log"
 25	"os"
 26	"regexp"
 27	"sort"
 28	"strings"
 29	"time"
 30)
 31
 32func importMain(username, flavor, source string) {
 33	switch flavor {
 34	case "mastodon":
 35		importMastodon(username, source)
 36	case "twitter":
 37		importTwitter(username, source)
 38	default:
 39		elog.Fatal("unknown source flavor")
 40	}
 41}
 42
 43type TootObject struct {
 44	Summary      string
 45	Content      string
 46	InReplyTo    string
 47	Conversation string
 48	Published    time.Time
 49	Tag          []struct {
 50		Type string
 51		Name string
 52	}
 53	Attachment []struct {
 54		Type      string
 55		MediaType string
 56		Url       string
 57		Name      string
 58	}
 59}
 60
 61type PlainTootObject TootObject
 62
 63func (obj *TootObject) UnmarshalJSON(b []byte) error {
 64	p := (*PlainTootObject)(obj)
 65	json.Unmarshal(b, p)
 66	return nil
 67}
 68
 69func importMastodon(username, source string) {
 70	user, err := butwhatabout(username)
 71	if err != nil {
 72		elog.Fatal(err)
 73	}
 74
 75	if _, err := os.Stat(source + "/outbox.json"); err == nil {
 76		importMastotoots(user, source)
 77	} else {
 78		ilog.Printf("skipping outbox.json!")
 79	}
 80	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 81		importMastotooters(user, source)
 82	} else {
 83		ilog.Printf("skipping following_accounts.csv!")
 84	}
 85}
 86
 87func importMastotoots(user *WhatAbout, source string) {
 88	type Toot struct {
 89		Id     string
 90		Type   string
 91		To     []string
 92		Cc     []string
 93		Object TootObject
 94	}
 95	var outbox struct {
 96		OrderedItems []Toot
 97	}
 98	ilog.Println("Importing honks...")
 99	fd, err := os.Open(source + "/outbox.json")
100	if err != nil {
101		elog.Fatal(err)
102	}
103	dec := json.NewDecoder(fd)
104	err = dec.Decode(&outbox)
105	if err != nil {
106		elog.Fatalf("error parsing json: %s", err)
107	}
108	fd.Close()
109
110	havetoot := func(xid string) bool {
111		var id int64
112		row := stmtFindXonk.QueryRow(user.ID, xid)
113		err := row.Scan(&id)
114		if err == nil {
115			return true
116		}
117		return false
118	}
119
120	re_tootid := regexp.MustCompile("[^/]+$")
121	for _, item := range outbox.OrderedItems {
122		toot := item
123		if toot.Type != "Create" {
124			continue
125		}
126		if strings.HasSuffix(toot.Id, "/activity") {
127			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
128		}
129		tootid := re_tootid.FindString(toot.Id)
130		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
131		if havetoot(xid) {
132			continue
133		}
134		honk := Honk{
135			UserID:   user.ID,
136			What:     "honk",
137			Honker:   user.URL,
138			XID:      xid,
139			RID:      toot.Object.InReplyTo,
140			Date:     toot.Object.Published,
141			URL:      xid,
142			Audience: append(toot.To, toot.Cc...),
143			Noise:    toot.Object.Content,
144			Convoy:   toot.Object.Conversation,
145			Whofore:  2,
146			Format:   "html",
147			Precis:   toot.Object.Summary,
148		}
149		if honk.RID != "" {
150			honk.What = "tonk"
151		}
152		if !loudandproud(honk.Audience) {
153			honk.Whofore = 3
154		}
155		for _, att := range toot.Object.Attachment {
156			switch att.Type {
157			case "Document":
158				fname := fmt.Sprintf("%s/%s", source, att.Url)
159				data, err := ioutil.ReadFile(fname)
160				if err != nil {
161					elog.Printf("error reading media: %s", fname)
162					continue
163				}
164				u := xfiltrate()
165				name := att.Name
166				desc := name
167				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
168				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
169				if err != nil {
170					elog.Printf("error saving media: %s", fname)
171					continue
172				}
173				donk := &Donk{
174					FileID: fileid,
175				}
176				honk.Donks = append(honk.Donks, donk)
177			}
178		}
179		for _, t := range toot.Object.Tag {
180			switch t.Type {
181			case "Hashtag":
182				honk.Onts = append(honk.Onts, t.Name)
183			}
184		}
185		savehonk(&honk)
186	}
187}
188
189func importMastotooters(user *WhatAbout, source string) {
190	ilog.Println("Importing honkers...")
191	fd, err := os.Open(source + "/following_accounts.csv")
192	if err != nil {
193		elog.Fatal(err)
194	}
195	r := csv.NewReader(fd)
196	data, err := r.ReadAll()
197	if err != nil {
198		elog.Fatal(err)
199	}
200	fd.Close()
201
202	var meta HonkerMeta
203	mj, _ := jsonify(&meta)
204
205	for i, d := range data {
206		if i == 0 {
207			continue
208		}
209		url := "@" + d[0]
210		name := ""
211		flavor := "peep"
212		combos := ""
213		_, err := savehonker(user, url, name, flavor, combos, mj)
214		if err != nil {
215			elog.Printf("trouble with a honker: %s", err)
216		}
217	}
218}
219
220func importTwitter(username, source string) {
221	user, err := butwhatabout(username)
222	if err != nil {
223		elog.Fatal(err)
224	}
225
226	type Tweet struct {
227		date   time.Time
228		convoy string
229		Tweet  struct {
230			CreatedAt        string   `json:"created_at"`
231			DisplayTextRange []string `json:"display_text_range"`
232			EditInfo         struct {
233				Initial struct {
234					EditTweetIds   []string `json:"editTweetIds"`
235					EditableUntil  string   `json:"editableUntil"`
236					EditsRemaining string   `json:"editsRemaining"`
237					IsEditEligible bool     `json:"isEditEligible"`
238				} `json:"initial"`
239			} `json:"edit_info"`
240			Entities struct {
241				Hashtags []struct {
242					Indices []string `json:"indices"`
243					Text    string   `json:"text"`
244				} `json:"hashtags"`
245				Media []struct {
246					DisplayURL    string   `json:"display_url"`
247					ExpandedURL   string   `json:"expanded_url"`
248					ID            string   `json:"id"`
249					IdStr         string   `json:"id_str"`
250					Indices       []string `json:"indices"`
251					MediaURL      string   `json:"media_url"`
252					MediaUrlHttps string   `json:"media_url_https"`
253					Sizes         struct {
254						Large struct {
255							H      string `json:"h"`
256							Resize string `json:"resize"`
257							W      string `json:"w"`
258						} `json:"large"`
259						Medium struct {
260							H      string `json:"h"`
261							Resize string `json:"resize"`
262							W      string `json:"w"`
263						} `json:"medium"`
264						Small struct {
265							H      string `json:"h"`
266							Resize string `json:"resize"`
267							W      string `json:"w"`
268						} `json:"small"`
269						Thumb struct {
270							H      string `json:"h"`
271							Resize string `json:"resize"`
272							W      string `json:"w"`
273						} `json:"thumb"`
274					} `json:"sizes"`
275					Type string `json:"type"`
276					URL  string `json:"url"`
277				} `json:"media"`
278				Symbols []interface{} `json:"symbols"`
279				Urls    []struct {
280					DisplayURL  string   `json:"display_url"`
281					ExpandedURL string   `json:"expanded_url"`
282					Indices     []string `json:"indices"`
283					URL         string   `json:"url"`
284				} `json:"urls"`
285				UserMentions []interface{} `json:"user_mentions"`
286			} `json:"entities"`
287			ExtendedEntities struct {
288				Media []struct {
289					DisplayURL    string   `json:"display_url"`
290					ExpandedURL   string   `json:"expanded_url"`
291					ID            string   `json:"id"`
292					IdStr         string   `json:"id_str"`
293					Indices       []string `json:"indices"`
294					MediaURL      string   `json:"media_url"`
295					MediaUrlHttps string   `json:"media_url_https"`
296					Sizes         struct {
297						Large struct {
298							H      string `json:"h"`
299							Resize string `json:"resize"`
300							W      string `json:"w"`
301						} `json:"large"`
302						Medium struct {
303							H      string `json:"h"`
304							Resize string `json:"resize"`
305							W      string `json:"w"`
306						} `json:"medium"`
307						Small struct {
308							H      string `json:"h"`
309							Resize string `json:"resize"`
310							W      string `json:"w"`
311						} `json:"small"`
312						Thumb struct {
313							H      string `json:"h"`
314							Resize string `json:"resize"`
315							W      string `json:"w"`
316						} `json:"thumb"`
317					} `json:"sizes"`
318					Type string `json:"type"`
319					URL  string `json:"url"`
320				} `json:"media"`
321			} `json:"extended_entities"`
322			FavoriteCount        string `json:"favorite_count"`
323			Favorited            bool   `json:"favorited"`
324			FullText             string `json:"full_text"`
325			ID                   string `json:"id"`
326			IdStr                string `json:"id_str"`
327			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
328			InReplyToStatusID    string `json:"in_reply_to_status_id"`
329			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
330			InReplyToUserID      string `json:"in_reply_to_user_id"`
331			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
332			Lang                 string `json:"lang"`
333			PossiblySensitive    bool   `json:"possibly_sensitive"`
334			RetweetCount         string `json:"retweet_count"`
335			Retweeted            bool   `json:"retweeted"`
336			Source               string `json:"source"`
337			Truncated            bool   `json:"truncated"`
338		} `json:"tweet"`
339	}
340
341	var tweets []*Tweet
342	fd, err := os.Open(source + "/tweet.js")
343	if err != nil {
344		elog.Fatal(err)
345	}
346	// skip past window.YTD.tweet.part0 =
347	fd.Seek(25, 0)
348	dec := json.NewDecoder(fd)
349	err = dec.Decode(&tweets)
350	if err != nil {
351		elog.Fatalf("error parsing json: %s", err)
352	}
353	fd.Close()
354	tweetmap := make(map[string]*Tweet)
355	for _, t := range tweets {
356		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
357		tweetmap[t.Tweet.IdStr] = t
358	}
359	sort.Slice(tweets, func(i, j int) bool {
360		return tweets[i].date.Before(tweets[j].date)
361	})
362	havetwid := func(xid string) bool {
363		var id int64
364		row := stmtFindXonk.QueryRow(user.ID, xid)
365		err := row.Scan(&id)
366		if err == nil {
367			log.Printf("id = %v", id)
368			return true
369		}
370		return false
371	}
372	log.Printf("importing %v tweets", len(tweets))
373	for _, t := range tweets {
374		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
375		if havetwid(xid) {
376			continue
377		}
378
379		if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
380			log.Printf("skipping, unworthy tweet")
381			continue
382		}
383
384		what := "honk"
385		noise := ""
386		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
387			t.convoy = parent.convoy
388			what = "tonk"
389		} else {
390			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
391			if t.Tweet.InReplyToScreenName != "" {
392				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
393					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
394				what = "tonk"
395			}
396		}
397		audience := []string{thewholeworld}
398		honk := Honk{
399			UserID:   user.ID,
400			Username: user.Name,
401			What:     what,
402			Honker:   user.URL,
403			XID:      xid,
404			Date:     t.date,
405			Format:   "markdown",
406			Audience: audience,
407			Convoy:   t.convoy,
408			Public:   true,
409			Whofore:  2,
410		}
411		noise += t.Tweet.FullText
412		// unbelievable
413		noise = html.UnescapeString(noise)
414		for _, r := range t.Tweet.Entities.Urls {
415			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
416		}
417		for _, m := range t.Tweet.Entities.Media {
418			u := m.MediaURL
419			idx := strings.LastIndexByte(u, '/')
420			u = u[idx+1:]
421			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
422			data, err := ioutil.ReadFile(fname)
423			if err != nil {
424				elog.Printf("error reading media: %s", fname)
425				continue
426			}
427			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
428
429			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
430			if err != nil {
431				elog.Printf("error saving media: %s", fname)
432				continue
433			}
434			donk := &Donk{
435				FileID: fileid,
436			}
437			honk.Donks = append(honk.Donks, donk)
438			noise = strings.Replace(noise, m.URL, "", -1)
439		}
440		for _, ht := range t.Tweet.Entities.Hashtags {
441			honk.Onts = append(honk.Onts, "#"+ht.Text)
442		}
443		honk.Noise = noise
444		err := savehonk(&honk)
445		log.Printf("honk saved %v -> %v", xid, err)
446	}
447}