all repos — honk @ 784f93efdd9bbd18894a5a7f419818fcefc8ce48

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/csv"
 20	"encoding/json"
 21	"fmt"
 22	"html"
 23	"io/ioutil"
 24	"log"
 25	"os"
 26	"regexp"
 27	"sort"
 28	"strings"
 29	"time"
 30)
 31
 32func importMain(username, flavor, source string) {
 33	switch flavor {
 34	case "mastodon":
 35		importMastodon(username, source)
 36	case "twitter":
 37		importTwitter(username, source)
 38	case "instagram":
 39		importInstagram(username, source)
 40	default:
 41		elog.Fatal("unknown source flavor")
 42	}
 43}
 44
 45type TootObject struct {
 46	Summary      string
 47	Content      string
 48	InReplyTo    string
 49	Conversation string
 50	Published    time.Time
 51	Tag          []struct {
 52		Type string
 53		Name string
 54	}
 55	Attachment []struct {
 56		Type      string
 57		MediaType string
 58		Url       string
 59		Name      string
 60	}
 61}
 62
 63type PlainTootObject TootObject
 64
 65func (obj *TootObject) UnmarshalJSON(b []byte) error {
 66	p := (*PlainTootObject)(obj)
 67	json.Unmarshal(b, p)
 68	return nil
 69}
 70
 71func importMastodon(username, source string) {
 72	user, err := butwhatabout(username)
 73	if err != nil {
 74		elog.Fatal(err)
 75	}
 76
 77	if _, err := os.Stat(source + "/outbox.json"); err == nil {
 78		importMastotoots(user, source)
 79	} else {
 80		ilog.Printf("skipping outbox.json!")
 81	}
 82	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 83		importMastotooters(user, source)
 84	} else {
 85		ilog.Printf("skipping following_accounts.csv!")
 86	}
 87}
 88
 89func importMastotoots(user *WhatAbout, source string) {
 90	type Toot struct {
 91		Id     string
 92		Type   string
 93		To     []string
 94		Cc     []string
 95		Object TootObject
 96	}
 97	var outbox struct {
 98		OrderedItems []Toot
 99	}
100	ilog.Println("Importing honks...")
101	fd, err := os.Open(source + "/outbox.json")
102	if err != nil {
103		elog.Fatal(err)
104	}
105	dec := json.NewDecoder(fd)
106	err = dec.Decode(&outbox)
107	if err != nil {
108		elog.Fatalf("error parsing json: %s", err)
109	}
110	fd.Close()
111
112	havetoot := func(xid string) bool {
113		var id int64
114		row := stmtFindXonk.QueryRow(user.ID, xid)
115		err := row.Scan(&id)
116		if err == nil {
117			return true
118		}
119		return false
120	}
121
122	re_tootid := regexp.MustCompile("[^/]+$")
123	for _, item := range outbox.OrderedItems {
124		toot := item
125		if toot.Type != "Create" {
126			continue
127		}
128		if strings.HasSuffix(toot.Id, "/activity") {
129			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
130		}
131		tootid := re_tootid.FindString(toot.Id)
132		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
133		if havetoot(xid) {
134			continue
135		}
136		honk := Honk{
137			UserID:   user.ID,
138			What:     "honk",
139			Honker:   user.URL,
140			XID:      xid,
141			RID:      toot.Object.InReplyTo,
142			Date:     toot.Object.Published,
143			URL:      xid,
144			Audience: append(toot.To, toot.Cc...),
145			Noise:    toot.Object.Content,
146			Convoy:   toot.Object.Conversation,
147			Whofore:  2,
148			Format:   "html",
149			Precis:   toot.Object.Summary,
150		}
151		if !loudandproud(honk.Audience) {
152			honk.Whofore = 3
153		}
154		for _, att := range toot.Object.Attachment {
155			switch att.Type {
156			case "Document":
157				fname := fmt.Sprintf("%s/%s", source, att.Url)
158				data, err := ioutil.ReadFile(fname)
159				if err != nil {
160					elog.Printf("error reading media: %s", fname)
161					continue
162				}
163				u := xfiltrate()
164				name := att.Name
165				desc := name
166				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
167				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
168				if err != nil {
169					elog.Printf("error saving media: %s", fname)
170					continue
171				}
172				donk := &Donk{
173					FileID: fileid,
174				}
175				honk.Donks = append(honk.Donks, donk)
176			}
177		}
178		for _, t := range toot.Object.Tag {
179			switch t.Type {
180			case "Hashtag":
181				honk.Onts = append(honk.Onts, t.Name)
182			}
183		}
184		savehonk(&honk)
185	}
186}
187
188func importMastotooters(user *WhatAbout, source string) {
189	ilog.Println("Importing honkers...")
190	fd, err := os.Open(source + "/following_accounts.csv")
191	if err != nil {
192		elog.Fatal(err)
193	}
194	r := csv.NewReader(fd)
195	data, err := r.ReadAll()
196	if err != nil {
197		elog.Fatal(err)
198	}
199	fd.Close()
200
201	var meta HonkerMeta
202	mj, _ := jsonify(&meta)
203
204	for i, d := range data {
205		if i == 0 {
206			continue
207		}
208		url := "@" + d[0]
209		name := ""
210		flavor := "peep"
211		combos := ""
212		_, err := savehonker(user, url, name, flavor, combos, mj)
213		if err != nil {
214			elog.Printf("trouble with a honker: %s", err)
215		}
216	}
217}
218
219func importTwitter(username, source string) {
220	user, err := butwhatabout(username)
221	if err != nil {
222		elog.Fatal(err)
223	}
224
225	type Tweet struct {
226		date   time.Time
227		convoy string
228		Tweet  struct {
229			CreatedAt        string   `json:"created_at"`
230			DisplayTextRange []string `json:"display_text_range"`
231			EditInfo         struct {
232				Initial struct {
233					EditTweetIds   []string `json:"editTweetIds"`
234					EditableUntil  string   `json:"editableUntil"`
235					EditsRemaining string   `json:"editsRemaining"`
236					IsEditEligible bool     `json:"isEditEligible"`
237				} `json:"initial"`
238			} `json:"edit_info"`
239			Entities struct {
240				Hashtags []struct {
241					Indices []string `json:"indices"`
242					Text    string   `json:"text"`
243				} `json:"hashtags"`
244				Media []struct {
245					DisplayURL    string   `json:"display_url"`
246					ExpandedURL   string   `json:"expanded_url"`
247					ID            string   `json:"id"`
248					IdStr         string   `json:"id_str"`
249					Indices       []string `json:"indices"`
250					MediaURL      string   `json:"media_url"`
251					MediaUrlHttps string   `json:"media_url_https"`
252					Sizes         struct {
253						Large struct {
254							H      string `json:"h"`
255							Resize string `json:"resize"`
256							W      string `json:"w"`
257						} `json:"large"`
258						Medium struct {
259							H      string `json:"h"`
260							Resize string `json:"resize"`
261							W      string `json:"w"`
262						} `json:"medium"`
263						Small struct {
264							H      string `json:"h"`
265							Resize string `json:"resize"`
266							W      string `json:"w"`
267						} `json:"small"`
268						Thumb struct {
269							H      string `json:"h"`
270							Resize string `json:"resize"`
271							W      string `json:"w"`
272						} `json:"thumb"`
273					} `json:"sizes"`
274					Type string `json:"type"`
275					URL  string `json:"url"`
276				} `json:"media"`
277				Symbols []interface{} `json:"symbols"`
278				Urls    []struct {
279					DisplayURL  string   `json:"display_url"`
280					ExpandedURL string   `json:"expanded_url"`
281					Indices     []string `json:"indices"`
282					URL         string   `json:"url"`
283				} `json:"urls"`
284				UserMentions []interface{} `json:"user_mentions"`
285			} `json:"entities"`
286			ExtendedEntities struct {
287				Media []struct {
288					DisplayURL    string   `json:"display_url"`
289					ExpandedURL   string   `json:"expanded_url"`
290					ID            string   `json:"id"`
291					IdStr         string   `json:"id_str"`
292					Indices       []string `json:"indices"`
293					MediaURL      string   `json:"media_url"`
294					MediaUrlHttps string   `json:"media_url_https"`
295					Sizes         struct {
296						Large struct {
297							H      string `json:"h"`
298							Resize string `json:"resize"`
299							W      string `json:"w"`
300						} `json:"large"`
301						Medium struct {
302							H      string `json:"h"`
303							Resize string `json:"resize"`
304							W      string `json:"w"`
305						} `json:"medium"`
306						Small struct {
307							H      string `json:"h"`
308							Resize string `json:"resize"`
309							W      string `json:"w"`
310						} `json:"small"`
311						Thumb struct {
312							H      string `json:"h"`
313							Resize string `json:"resize"`
314							W      string `json:"w"`
315						} `json:"thumb"`
316					} `json:"sizes"`
317					Type string `json:"type"`
318					URL  string `json:"url"`
319				} `json:"media"`
320			} `json:"extended_entities"`
321			FavoriteCount        string `json:"favorite_count"`
322			Favorited            bool   `json:"favorited"`
323			FullText             string `json:"full_text"`
324			ID                   string `json:"id"`
325			IdStr                string `json:"id_str"`
326			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
327			InReplyToStatusID    string `json:"in_reply_to_status_id"`
328			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
329			InReplyToUserID      string `json:"in_reply_to_user_id"`
330			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
331			Lang                 string `json:"lang"`
332			PossiblySensitive    bool   `json:"possibly_sensitive"`
333			RetweetCount         string `json:"retweet_count"`
334			Retweeted            bool   `json:"retweeted"`
335			Source               string `json:"source"`
336			Truncated            bool   `json:"truncated"`
337		} `json:"tweet"`
338	}
339
340	var tweets []*Tweet
341	fd, err := os.Open(source + "/tweet.js")
342	if err != nil {
343		elog.Fatal(err)
344	}
345	// skip past window.YTD.tweet.part0 =
346	fd.Seek(25, 0)
347	dec := json.NewDecoder(fd)
348	err = dec.Decode(&tweets)
349	if err != nil {
350		elog.Fatalf("error parsing json: %s", err)
351	}
352	fd.Close()
353	tweetmap := make(map[string]*Tweet)
354	for _, t := range tweets {
355		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
356		tweetmap[t.Tweet.IdStr] = t
357	}
358	sort.Slice(tweets, func(i, j int) bool {
359		return tweets[i].date.Before(tweets[j].date)
360	})
361	havetwid := func(xid string) bool {
362		var id int64
363		row := stmtFindXonk.QueryRow(user.ID, xid)
364		err := row.Scan(&id)
365		if err == nil {
366			log.Printf("id = %v", id)
367			return true
368		}
369		return false
370	}
371	log.Printf("importing %v tweets", len(tweets))
372	for _, t := range tweets {
373		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
374		if havetwid(xid) {
375			continue
376		}
377
378		if t.Tweet.FavoriteCount == "0" || t.Tweet.FavoriteCount == "" {
379			log.Printf("skipping, unworthy tweet")
380			continue
381		}
382
383		what := "honk"
384		noise := ""
385		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
386			t.convoy = parent.convoy
387		} else {
388			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
389			if t.Tweet.InReplyToScreenName != "" {
390				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
391					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
392			}
393		}
394		audience := []string{thewholeworld}
395		honk := Honk{
396			UserID:   user.ID,
397			Username: user.Name,
398			What:     what,
399			Honker:   user.URL,
400			XID:      xid,
401			Date:     t.date,
402			Format:   "markdown",
403			Audience: audience,
404			Convoy:   t.convoy,
405			Public:   true,
406			Whofore:  2,
407		}
408		noise += t.Tweet.FullText
409		// unbelievable
410		noise = html.UnescapeString(noise)
411		for _, r := range t.Tweet.Entities.Urls {
412			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
413		}
414		for _, m := range t.Tweet.Entities.Media {
415			u := m.MediaURL
416			idx := strings.LastIndexByte(u, '/')
417			u = u[idx+1:]
418			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
419			data, err := ioutil.ReadFile(fname)
420			if err != nil {
421				elog.Printf("error reading media: %s", fname)
422				continue
423			}
424			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
425
426			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
427			if err != nil {
428				elog.Printf("error saving media: %s", fname)
429				continue
430			}
431			donk := &Donk{
432				FileID: fileid,
433			}
434			honk.Donks = append(honk.Donks, donk)
435			noise = strings.Replace(noise, m.URL, "", -1)
436		}
437		for _, ht := range t.Tweet.Entities.Hashtags {
438			honk.Onts = append(honk.Onts, "#"+ht.Text)
439		}
440		honk.Noise = noise
441		err := savehonk(&honk)
442		log.Printf("honk saved %v -> %v", xid, err)
443	}
444}
445
446func importInstagram(username, source string) {
447	user, err := butwhatabout(username)
448	if err != nil {
449		elog.Fatal(err)
450	}
451
452	type Gram struct {
453		Media []struct {
454			URI      string
455			Creation int64 `json:"creation_timestamp"`
456			Title    string
457		}
458	}
459
460	var grams []*Gram
461	fd, err := os.Open(source + "/content/posts_1.json")
462	if err != nil {
463		elog.Fatal(err)
464	}
465	dec := json.NewDecoder(fd)
466	err = dec.Decode(&grams)
467	if err != nil {
468		elog.Fatalf("error parsing json: %s", err)
469	}
470	fd.Close()
471	log.Printf("importing %d grams", len(grams))
472	sort.Slice(grams, func(i, j int) bool {
473		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
474	})
475	for _, g0 := range grams {
476		g := g0.Media[0]
477		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
478		what := "honk"
479		noise := g.Title
480		convoy := "data:,acoustichonkytonk-" + xfiltrate()
481		date := time.Unix(g.Creation, 0)
482		audience := []string{thewholeworld}
483		honk := Honk{
484			UserID:   user.ID,
485			Username: user.Name,
486			What:     what,
487			Honker:   user.URL,
488			XID:      xid,
489			Date:     date,
490			Format:   "markdown",
491			Audience: audience,
492			Convoy:   convoy,
493			Public:   true,
494			Whofore:  2,
495		}
496		{
497			u := xfiltrate()
498			fname := fmt.Sprintf("%s/%s", source, g.URI)
499			data, err := ioutil.ReadFile(fname)
500			if err != nil {
501				elog.Printf("error reading media: %s", fname)
502				continue
503			}
504			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
505
506			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
507			if err != nil {
508				elog.Printf("error saving media: %s", fname)
509				continue
510			}
511			donk := &Donk{
512				FileID: fileid,
513			}
514			honk.Donks = append(honk.Donks, donk)
515		}
516		honk.Noise = noise
517		err := savehonk(&honk)
518		log.Printf("honk saved %v -> %v", xid, err)
519	}
520}