all repos — honk @ d142286f521764f3c8b71727582a67ad695dfa70

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/csv"
 20	"encoding/json"
 21	"fmt"
 22	"html"
 23	"io/ioutil"
 24	"log"
 25	"os"
 26	"regexp"
 27	"sort"
 28	"strings"
 29	"time"
 30)
 31
 32func importMain(username, flavor, source string) {
 33	switch flavor {
 34	case "mastodon":
 35		importMastodon(username, source)
 36	case "twitter":
 37		importTwitter(username, source)
 38	case "instagram":
 39		importInstagram(username, source)
 40	default:
 41		elog.Fatal("unknown source flavor")
 42	}
 43}
 44
 45type TootObject struct {
 46	Summary      string
 47	Content      string
 48	InReplyTo    string
 49	Conversation string
 50	Published    time.Time
 51	Tag          []struct {
 52		Type string
 53		Name string
 54	}
 55	Attachment []struct {
 56		Type      string
 57		MediaType string
 58		Url       string
 59		Name      string
 60	}
 61}
 62
 63type PlainTootObject TootObject
 64
 65func (obj *TootObject) UnmarshalJSON(b []byte) error {
 66	p := (*PlainTootObject)(obj)
 67	json.Unmarshal(b, p)
 68	return nil
 69}
 70
 71func importMastodon(username, source string) {
 72	user, err := butwhatabout(username)
 73	if err != nil {
 74		elog.Fatal(err)
 75	}
 76
 77	if _, err := os.Stat(source + "/outbox.json"); err == nil {
 78		importMastotoots(user, source)
 79	} else {
 80		ilog.Printf("skipping outbox.json!")
 81	}
 82	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 83		importMastotooters(user, source)
 84	} else {
 85		ilog.Printf("skipping following_accounts.csv!")
 86	}
 87}
 88
 89func importMastotoots(user *WhatAbout, source string) {
 90	type Toot struct {
 91		Id     string
 92		Type   string
 93		To     []string
 94		Cc     []string
 95		Object TootObject
 96	}
 97	var outbox struct {
 98		OrderedItems []Toot
 99	}
100	ilog.Println("Importing honks...")
101	fd, err := os.Open(source + "/outbox.json")
102	if err != nil {
103		elog.Fatal(err)
104	}
105	dec := json.NewDecoder(fd)
106	err = dec.Decode(&outbox)
107	if err != nil {
108		elog.Fatalf("error parsing json: %s", err)
109	}
110	fd.Close()
111
112	havetoot := func(xid string) bool {
113		var id int64
114		row := stmtFindXonk.QueryRow(user.ID, xid)
115		err := row.Scan(&id)
116		if err == nil {
117			return true
118		}
119		return false
120	}
121
122	re_tootid := regexp.MustCompile("[^/]+$")
123	for _, item := range outbox.OrderedItems {
124		toot := item
125		if toot.Type != "Create" {
126			continue
127		}
128		if strings.HasSuffix(toot.Id, "/activity") {
129			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
130		}
131		tootid := re_tootid.FindString(toot.Id)
132		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
133		if havetoot(xid) {
134			continue
135		}
136		honk := Honk{
137			UserID:   user.ID,
138			What:     "honk",
139			Honker:   user.URL,
140			XID:      xid,
141			RID:      toot.Object.InReplyTo,
142			Date:     toot.Object.Published,
143			URL:      xid,
144			Audience: append(toot.To, toot.Cc...),
145			Noise:    toot.Object.Content,
146			Convoy:   toot.Object.Conversation,
147			Whofore:  2,
148			Format:   "html",
149			Precis:   toot.Object.Summary,
150		}
151		if honk.RID != "" {
152			honk.What = "tonk"
153		}
154		if !loudandproud(honk.Audience) {
155			honk.Whofore = 3
156		}
157		for _, att := range toot.Object.Attachment {
158			switch att.Type {
159			case "Document":
160				fname := fmt.Sprintf("%s/%s", source, att.Url)
161				data, err := ioutil.ReadFile(fname)
162				if err != nil {
163					elog.Printf("error reading media: %s", fname)
164					continue
165				}
166				u := xfiltrate()
167				name := att.Name
168				desc := name
169				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
170				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
171				if err != nil {
172					elog.Printf("error saving media: %s", fname)
173					continue
174				}
175				donk := &Donk{
176					FileID: fileid,
177				}
178				honk.Donks = append(honk.Donks, donk)
179			}
180		}
181		for _, t := range toot.Object.Tag {
182			switch t.Type {
183			case "Hashtag":
184				honk.Onts = append(honk.Onts, t.Name)
185			}
186		}
187		savehonk(&honk)
188	}
189}
190
191func importMastotooters(user *WhatAbout, source string) {
192	ilog.Println("Importing honkers...")
193	fd, err := os.Open(source + "/following_accounts.csv")
194	if err != nil {
195		elog.Fatal(err)
196	}
197	r := csv.NewReader(fd)
198	data, err := r.ReadAll()
199	if err != nil {
200		elog.Fatal(err)
201	}
202	fd.Close()
203
204	var meta HonkerMeta
205	mj, _ := jsonify(&meta)
206
207	for i, d := range data {
208		if i == 0 {
209			continue
210		}
211		url := "@" + d[0]
212		name := ""
213		flavor := "peep"
214		combos := ""
215		_, err := savehonker(user, url, name, flavor, combos, mj)
216		if err != nil {
217			elog.Printf("trouble with a honker: %s", err)
218		}
219	}
220}
221
222func importTwitter(username, source string) {
223	user, err := butwhatabout(username)
224	if err != nil {
225		elog.Fatal(err)
226	}
227
228	type Tweet struct {
229		date   time.Time
230		convoy string
231		Tweet  struct {
232			CreatedAt        string   `json:"created_at"`
233			DisplayTextRange []string `json:"display_text_range"`
234			EditInfo         struct {
235				Initial struct {
236					EditTweetIds   []string `json:"editTweetIds"`
237					EditableUntil  string   `json:"editableUntil"`
238					EditsRemaining string   `json:"editsRemaining"`
239					IsEditEligible bool     `json:"isEditEligible"`
240				} `json:"initial"`
241			} `json:"edit_info"`
242			Entities struct {
243				Hashtags []struct {
244					Indices []string `json:"indices"`
245					Text    string   `json:"text"`
246				} `json:"hashtags"`
247				Media []struct {
248					DisplayURL    string   `json:"display_url"`
249					ExpandedURL   string   `json:"expanded_url"`
250					ID            string   `json:"id"`
251					IdStr         string   `json:"id_str"`
252					Indices       []string `json:"indices"`
253					MediaURL      string   `json:"media_url"`
254					MediaUrlHttps string   `json:"media_url_https"`
255					Sizes         struct {
256						Large struct {
257							H      string `json:"h"`
258							Resize string `json:"resize"`
259							W      string `json:"w"`
260						} `json:"large"`
261						Medium struct {
262							H      string `json:"h"`
263							Resize string `json:"resize"`
264							W      string `json:"w"`
265						} `json:"medium"`
266						Small struct {
267							H      string `json:"h"`
268							Resize string `json:"resize"`
269							W      string `json:"w"`
270						} `json:"small"`
271						Thumb struct {
272							H      string `json:"h"`
273							Resize string `json:"resize"`
274							W      string `json:"w"`
275						} `json:"thumb"`
276					} `json:"sizes"`
277					Type string `json:"type"`
278					URL  string `json:"url"`
279				} `json:"media"`
280				Symbols []interface{} `json:"symbols"`
281				Urls    []struct {
282					DisplayURL  string   `json:"display_url"`
283					ExpandedURL string   `json:"expanded_url"`
284					Indices     []string `json:"indices"`
285					URL         string   `json:"url"`
286				} `json:"urls"`
287				UserMentions []interface{} `json:"user_mentions"`
288			} `json:"entities"`
289			ExtendedEntities struct {
290				Media []struct {
291					DisplayURL    string   `json:"display_url"`
292					ExpandedURL   string   `json:"expanded_url"`
293					ID            string   `json:"id"`
294					IdStr         string   `json:"id_str"`
295					Indices       []string `json:"indices"`
296					MediaURL      string   `json:"media_url"`
297					MediaUrlHttps string   `json:"media_url_https"`
298					Sizes         struct {
299						Large struct {
300							H      string `json:"h"`
301							Resize string `json:"resize"`
302							W      string `json:"w"`
303						} `json:"large"`
304						Medium struct {
305							H      string `json:"h"`
306							Resize string `json:"resize"`
307							W      string `json:"w"`
308						} `json:"medium"`
309						Small struct {
310							H      string `json:"h"`
311							Resize string `json:"resize"`
312							W      string `json:"w"`
313						} `json:"small"`
314						Thumb struct {
315							H      string `json:"h"`
316							Resize string `json:"resize"`
317							W      string `json:"w"`
318						} `json:"thumb"`
319					} `json:"sizes"`
320					Type string `json:"type"`
321					URL  string `json:"url"`
322				} `json:"media"`
323			} `json:"extended_entities"`
324			FavoriteCount        string `json:"favorite_count"`
325			Favorited            bool   `json:"favorited"`
326			FullText             string `json:"full_text"`
327			ID                   string `json:"id"`
328			IdStr                string `json:"id_str"`
329			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
330			InReplyToStatusID    string `json:"in_reply_to_status_id"`
331			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
332			InReplyToUserID      string `json:"in_reply_to_user_id"`
333			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
334			Lang                 string `json:"lang"`
335			PossiblySensitive    bool   `json:"possibly_sensitive"`
336			RetweetCount         string `json:"retweet_count"`
337			Retweeted            bool   `json:"retweeted"`
338			Source               string `json:"source"`
339			Truncated            bool   `json:"truncated"`
340		} `json:"tweet"`
341	}
342
343	var tweets []*Tweet
344	fd, err := os.Open(source + "/tweet.js")
345	if err != nil {
346		elog.Fatal(err)
347	}
348	// skip past window.YTD.tweet.part0 =
349	fd.Seek(25, 0)
350	dec := json.NewDecoder(fd)
351	err = dec.Decode(&tweets)
352	if err != nil {
353		elog.Fatalf("error parsing json: %s", err)
354	}
355	fd.Close()
356	tweetmap := make(map[string]*Tweet)
357	for _, t := range tweets {
358		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
359		tweetmap[t.Tweet.IdStr] = t
360	}
361	sort.Slice(tweets, func(i, j int) bool {
362		return tweets[i].date.Before(tweets[j].date)
363	})
364	havetwid := func(xid string) bool {
365		var id int64
366		row := stmtFindXonk.QueryRow(user.ID, xid)
367		err := row.Scan(&id)
368		if err == nil {
369			log.Printf("id = %v", id)
370			return true
371		}
372		return false
373	}
374	log.Printf("importing %v tweets", len(tweets))
375	for _, t := range tweets {
376		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
377		if havetwid(xid) {
378			continue
379		}
380
381		if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
382			log.Printf("skipping, unworthy tweet")
383			continue
384		}
385
386		what := "honk"
387		noise := ""
388		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
389			t.convoy = parent.convoy
390			what = "tonk"
391		} else {
392			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
393			if t.Tweet.InReplyToScreenName != "" {
394				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
395					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
396				what = "tonk"
397			}
398		}
399		audience := []string{thewholeworld}
400		honk := Honk{
401			UserID:   user.ID,
402			Username: user.Name,
403			What:     what,
404			Honker:   user.URL,
405			XID:      xid,
406			Date:     t.date,
407			Format:   "markdown",
408			Audience: audience,
409			Convoy:   t.convoy,
410			Public:   true,
411			Whofore:  2,
412		}
413		noise += t.Tweet.FullText
414		// unbelievable
415		noise = html.UnescapeString(noise)
416		for _, r := range t.Tweet.Entities.Urls {
417			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
418		}
419		for _, m := range t.Tweet.Entities.Media {
420			u := m.MediaURL
421			idx := strings.LastIndexByte(u, '/')
422			u = u[idx+1:]
423			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
424			data, err := ioutil.ReadFile(fname)
425			if err != nil {
426				elog.Printf("error reading media: %s", fname)
427				continue
428			}
429			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
430
431			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
432			if err != nil {
433				elog.Printf("error saving media: %s", fname)
434				continue
435			}
436			donk := &Donk{
437				FileID: fileid,
438			}
439			honk.Donks = append(honk.Donks, donk)
440			noise = strings.Replace(noise, m.URL, "", -1)
441		}
442		for _, ht := range t.Tweet.Entities.Hashtags {
443			honk.Onts = append(honk.Onts, "#"+ht.Text)
444		}
445		honk.Noise = noise
446		err := savehonk(&honk)
447		log.Printf("honk saved %v -> %v", xid, err)
448	}
449}
450
451func importInstagram(username, source string) {
452	user, err := butwhatabout(username)
453	if err != nil {
454		elog.Fatal(err)
455	}
456
457	type Gram struct {
458		Media []struct {
459			URI      string
460			Creation int64 `json:"creation_timestamp"`
461			Title    string
462		}
463	}
464
465	var grams []*Gram
466	fd, err := os.Open(source + "/content/posts_1.json")
467	if err != nil {
468		elog.Fatal(err)
469	}
470	dec := json.NewDecoder(fd)
471	err = dec.Decode(&grams)
472	if err != nil {
473		elog.Fatalf("error parsing json: %s", err)
474	}
475	fd.Close()
476	log.Printf("importing %d grams", len(grams))
477	sort.Slice(grams, func(i, j int) bool {
478		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
479	})
480	for _, g0 := range grams {
481		g := g0.Media[0]
482		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
483		what := "honk"
484		noise := g.Title
485		convoy := "data:,acoustichonkytonk-" + xfiltrate()
486		date := time.Unix(g.Creation, 0)
487		audience := []string{thewholeworld}
488		honk := Honk{
489			UserID:   user.ID,
490			Username: user.Name,
491			What:     what,
492			Honker:   user.URL,
493			XID:      xid,
494			Date:     date,
495			Format:   "markdown",
496			Audience: audience,
497			Convoy:   convoy,
498			Public:   true,
499			Whofore:  2,
500		}
501		{
502			u := xfiltrate()
503			fname := fmt.Sprintf("%s/%s", source, g.URI)
504			data, err := ioutil.ReadFile(fname)
505			if err != nil {
506				elog.Printf("error reading media: %s", fname)
507				continue
508			}
509			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
510
511			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
512			if err != nil {
513				elog.Printf("error saving media: %s", fname)
514				continue
515			}
516			donk := &Donk{
517				FileID: fileid,
518			}
519			honk.Donks = append(honk.Donks, donk)
520		}
521		honk.Noise = noise
522		err := savehonk(&honk)
523		log.Printf("honk saved %v -> %v", xid, err)
524	}
525}