all repos — honk @ d9607090ad6072f10b9d4414f271470d13aec583

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"archive/zip"
 20	"encoding/csv"
 21	"encoding/json"
 22	"fmt"
 23	"html"
 24	"io/ioutil"
 25	"log"
 26	"os"
 27	"regexp"
 28	"sort"
 29	"strings"
 30	"time"
 31
 32	"humungus.tedunangst.com/r/webs/junk"
 33)
 34
 35func importMain(username, flavor, source string) {
 36	switch flavor {
 37	case "mastodon":
 38		importMastodon(username, source)
 39	case "honk":
 40		importHonk(username, source)
 41	case "twitter":
 42		importTwitter(username, source)
 43	case "instagram":
 44		importInstagram(username, source)
 45	default:
 46		elog.Fatal("unknown source flavor")
 47	}
 48}
 49
 50type ActivityObject struct {
 51	AttributedTo string
 52	Summary      string
 53	Content      string
 54	InReplyTo    string
 55	Conversation string
 56	Context      string
 57	Published    time.Time
 58	Tag          []struct {
 59		Type string
 60		Name string
 61	}
 62	Attachment []struct {
 63		Type      string
 64		MediaType string
 65		Url       string
 66		Name      string
 67	}
 68}
 69
 70type PlainActivityObject ActivityObject
 71
 72func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
 73	p := (*PlainActivityObject)(obj)
 74	json.Unmarshal(b, p)
 75	return nil
 76}
 77
 78func importMastodon(username, source string) {
 79	user, err := butwhatabout(username)
 80	if err != nil {
 81		elog.Fatal(err)
 82	}
 83
 84	outbox := source + "/outbox.json"
 85	if _, err := os.Stat(outbox); err == nil {
 86		importActivities(user, outbox, source)
 87	} else {
 88		ilog.Printf("skipping outbox.json!")
 89	}
 90	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 91		importMastotooters(user, source)
 92	} else {
 93		ilog.Printf("skipping following_accounts.csv!")
 94	}
 95}
 96
 97func importHonk(username, source string) {
 98	user, err := butwhatabout(username)
 99	if err != nil {
100		elog.Fatal(err)
101	}
102
103	outbox := source + "/outbox.json"
104	if _, err := os.Stat(outbox); err == nil {
105		importActivities(user, outbox, source)
106	} else {
107		ilog.Printf("skipping outbox.json!")
108	}
109}
110
111func importActivities(user *WhatAbout, filename, source string) {
112	type Activity struct {
113		Id     string
114		Type   string
115		To     interface{}
116		Cc     []string
117		Object ActivityObject
118	}
119	var outbox struct {
120		OrderedItems []Activity
121	}
122	ilog.Println("Importing honks...")
123	fd, err := os.Open(filename)
124	if err != nil {
125		elog.Fatal(err)
126	}
127	dec := json.NewDecoder(fd)
128	err = dec.Decode(&outbox)
129	if err != nil {
130		elog.Fatalf("error parsing json: %s", err)
131	}
132	fd.Close()
133
134	havetoot := func(xid string) bool {
135		var id int64
136		row := stmtFindXonk.QueryRow(user.ID, xid)
137		err := row.Scan(&id)
138		if err == nil {
139			return true
140		}
141		return false
142	}
143
144	re_tootid := regexp.MustCompile("[^/]+$")
145	items := outbox.OrderedItems
146	for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
147		items[i], items[j] = items[j], items[i]
148	}
149	for _, item := range items {
150		toot := item
151		if toot.Type != "Create" {
152			continue
153		}
154		if strings.HasSuffix(toot.Id, "/activity") {
155			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
156		}
157		tootid := re_tootid.FindString(toot.Id)
158		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
159		if havetoot(xid) {
160			continue
161		}
162
163		convoy := toot.Object.Context
164		if convoy == "" {
165			convoy = toot.Object.Conversation
166		}
167		var audience []string
168		to, ok := toot.To.(string)
169		if ok {
170			audience = append(audience, to)
171		} else {
172			for _, t := range toot.To.([]interface{}) {
173				audience = append(audience, t.(string))
174			}
175		}
176		audience = append(audience, toot.Cc...)
177		honk := Honk{
178			UserID:   user.ID,
179			What:     "honk",
180			Honker:   user.URL,
181			XID:      xid,
182			RID:      toot.Object.InReplyTo,
183			Date:     toot.Object.Published,
184			URL:      xid,
185			Audience: audience,
186			Noise:    toot.Object.Content,
187			Convoy:   convoy,
188			Whofore:  2,
189			Format:   "html",
190			Precis:   toot.Object.Summary,
191		}
192		if !loudandproud(honk.Audience) {
193			honk.Whofore = 3
194		}
195		for _, att := range toot.Object.Attachment {
196			switch att.Type {
197			case "Document":
198				fname := fmt.Sprintf("%s/%s", source, att.Url)
199				data, err := ioutil.ReadFile(fname)
200				if err != nil {
201					elog.Printf("error reading media: %s", fname)
202					continue
203				}
204				u := xfiltrate()
205				name := att.Name
206				desc := name
207				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
208				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
209				if err != nil {
210					elog.Printf("error saving media: %s", fname)
211					continue
212				}
213				donk := &Donk{
214					FileID: fileid,
215				}
216				honk.Donks = append(honk.Donks, donk)
217			}
218		}
219		for _, t := range toot.Object.Tag {
220			switch t.Type {
221			case "Hashtag":
222				honk.Onts = append(honk.Onts, t.Name)
223			}
224		}
225		savehonk(&honk)
226	}
227}
228
229func importMastotooters(user *WhatAbout, source string) {
230	ilog.Println("Importing honkers...")
231	fd, err := os.Open(source + "/following_accounts.csv")
232	if err != nil {
233		elog.Fatal(err)
234	}
235	r := csv.NewReader(fd)
236	data, err := r.ReadAll()
237	if err != nil {
238		elog.Fatal(err)
239	}
240	fd.Close()
241
242	var meta HonkerMeta
243	mj, _ := jsonify(&meta)
244
245	for i, d := range data {
246		if i == 0 {
247			continue
248		}
249		url := "@" + d[0]
250		name := ""
251		flavor := "peep"
252		combos := ""
253		_, err := savehonker(user, url, name, flavor, combos, mj)
254		if err != nil {
255			elog.Printf("trouble with a honker: %s", err)
256		}
257	}
258}
259
260func importTwitter(username, source string) {
261	user, err := butwhatabout(username)
262	if err != nil {
263		elog.Fatal(err)
264	}
265
266	type Tweet struct {
267		date   time.Time
268		convoy string
269		Tweet  struct {
270			CreatedAt        string   `json:"created_at"`
271			DisplayTextRange []string `json:"display_text_range"`
272			EditInfo         struct {
273				Initial struct {
274					EditTweetIds   []string `json:"editTweetIds"`
275					EditableUntil  string   `json:"editableUntil"`
276					EditsRemaining string   `json:"editsRemaining"`
277					IsEditEligible bool     `json:"isEditEligible"`
278				} `json:"initial"`
279			} `json:"edit_info"`
280			Entities struct {
281				Hashtags []struct {
282					Indices []string `json:"indices"`
283					Text    string   `json:"text"`
284				} `json:"hashtags"`
285				Media []struct {
286					DisplayURL    string   `json:"display_url"`
287					ExpandedURL   string   `json:"expanded_url"`
288					ID            string   `json:"id"`
289					IdStr         string   `json:"id_str"`
290					Indices       []string `json:"indices"`
291					MediaURL      string   `json:"media_url"`
292					MediaUrlHttps string   `json:"media_url_https"`
293					Sizes         struct {
294						Large struct {
295							H      string `json:"h"`
296							Resize string `json:"resize"`
297							W      string `json:"w"`
298						} `json:"large"`
299						Medium struct {
300							H      string `json:"h"`
301							Resize string `json:"resize"`
302							W      string `json:"w"`
303						} `json:"medium"`
304						Small struct {
305							H      string `json:"h"`
306							Resize string `json:"resize"`
307							W      string `json:"w"`
308						} `json:"small"`
309						Thumb struct {
310							H      string `json:"h"`
311							Resize string `json:"resize"`
312							W      string `json:"w"`
313						} `json:"thumb"`
314					} `json:"sizes"`
315					Type string `json:"type"`
316					URL  string `json:"url"`
317				} `json:"media"`
318				Symbols []interface{} `json:"symbols"`
319				Urls    []struct {
320					DisplayURL  string   `json:"display_url"`
321					ExpandedURL string   `json:"expanded_url"`
322					Indices     []string `json:"indices"`
323					URL         string   `json:"url"`
324				} `json:"urls"`
325				UserMentions []interface{} `json:"user_mentions"`
326			} `json:"entities"`
327			ExtendedEntities struct {
328				Media []struct {
329					DisplayURL    string   `json:"display_url"`
330					ExpandedURL   string   `json:"expanded_url"`
331					ID            string   `json:"id"`
332					IdStr         string   `json:"id_str"`
333					Indices       []string `json:"indices"`
334					MediaURL      string   `json:"media_url"`
335					MediaUrlHttps string   `json:"media_url_https"`
336					Sizes         struct {
337						Large struct {
338							H      string `json:"h"`
339							Resize string `json:"resize"`
340							W      string `json:"w"`
341						} `json:"large"`
342						Medium struct {
343							H      string `json:"h"`
344							Resize string `json:"resize"`
345							W      string `json:"w"`
346						} `json:"medium"`
347						Small struct {
348							H      string `json:"h"`
349							Resize string `json:"resize"`
350							W      string `json:"w"`
351						} `json:"small"`
352						Thumb struct {
353							H      string `json:"h"`
354							Resize string `json:"resize"`
355							W      string `json:"w"`
356						} `json:"thumb"`
357					} `json:"sizes"`
358					Type string `json:"type"`
359					URL  string `json:"url"`
360				} `json:"media"`
361			} `json:"extended_entities"`
362			FavoriteCount        string `json:"favorite_count"`
363			Favorited            bool   `json:"favorited"`
364			FullText             string `json:"full_text"`
365			ID                   string `json:"id"`
366			IdStr                string `json:"id_str"`
367			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
368			InReplyToStatusID    string `json:"in_reply_to_status_id"`
369			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
370			InReplyToUserID      string `json:"in_reply_to_user_id"`
371			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
372			Lang                 string `json:"lang"`
373			PossiblySensitive    bool   `json:"possibly_sensitive"`
374			RetweetCount         string `json:"retweet_count"`
375			Retweeted            bool   `json:"retweeted"`
376			Source               string `json:"source"`
377			Truncated            bool   `json:"truncated"`
378		} `json:"tweet"`
379	}
380
381	var tweets []*Tweet
382	fd, err := os.Open(source + "/tweets.js")
383	if err != nil {
384		elog.Fatal(err)
385	}
386	// skip past window.YTD.tweet.part0 =
387	fd.Seek(25, 0)
388	dec := json.NewDecoder(fd)
389	err = dec.Decode(&tweets)
390	if err != nil {
391		elog.Fatalf("error parsing json: %s", err)
392	}
393	fd.Close()
394	tweetmap := make(map[string]*Tweet)
395	for _, t := range tweets {
396		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
397		tweetmap[t.Tweet.IdStr] = t
398	}
399	sort.Slice(tweets, func(i, j int) bool {
400		return tweets[i].date.Before(tweets[j].date)
401	})
402	havetwid := func(xid string) bool {
403		var id int64
404		row := stmtFindXonk.QueryRow(user.ID, xid)
405		err := row.Scan(&id)
406		if err == nil {
407			log.Printf("id = %v", id)
408			return true
409		}
410		return false
411	}
412	log.Printf("importing %v tweets", len(tweets))
413	for _, t := range tweets {
414		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
415		if havetwid(xid) {
416			continue
417		}
418
419		what := "honk"
420		noise := ""
421		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
422			t.convoy = parent.convoy
423		} else {
424			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
425			if t.Tweet.InReplyToScreenName != "" {
426				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
427					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
428			}
429		}
430		audience := []string{thewholeworld}
431		honk := Honk{
432			UserID:   user.ID,
433			Username: user.Name,
434			What:     what,
435			Honker:   user.URL,
436			XID:      xid,
437			Date:     t.date,
438			Format:   "markdown",
439			Audience: audience,
440			Convoy:   t.convoy,
441			Public:   true,
442			Whofore:  2,
443		}
444		noise += t.Tweet.FullText
445		// unbelievable
446		noise = html.UnescapeString(noise)
447		for _, r := range t.Tweet.Entities.Urls {
448			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
449		}
450		for _, m := range t.Tweet.Entities.Media {
451			u := m.MediaURL
452			idx := strings.LastIndexByte(u, '/')
453			u = u[idx+1:]
454			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
455			data, err := ioutil.ReadFile(fname)
456			if err != nil {
457				elog.Printf("error reading media: %s", fname)
458				continue
459			}
460			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
461
462			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
463			if err != nil {
464				elog.Printf("error saving media: %s", fname)
465				continue
466			}
467			donk := &Donk{
468				FileID: fileid,
469			}
470			honk.Donks = append(honk.Donks, donk)
471			noise = strings.Replace(noise, m.URL, "", -1)
472		}
473		for _, ht := range t.Tweet.Entities.Hashtags {
474			honk.Onts = append(honk.Onts, "#"+ht.Text)
475		}
476		honk.Noise = noise
477		err := savehonk(&honk)
478		log.Printf("honk saved %v -> %v", xid, err)
479	}
480}
481
482func importInstagram(username, source string) {
483	user, err := butwhatabout(username)
484	if err != nil {
485		elog.Fatal(err)
486	}
487
488	type Gram struct {
489		Media []struct {
490			URI      string
491			Creation int64 `json:"creation_timestamp"`
492			Title    string
493		}
494	}
495
496	var grams []*Gram
497	fd, err := os.Open(source + "/content/posts_1.json")
498	if err != nil {
499		elog.Fatal(err)
500	}
501	dec := json.NewDecoder(fd)
502	err = dec.Decode(&grams)
503	if err != nil {
504		elog.Fatalf("error parsing json: %s", err)
505	}
506	fd.Close()
507	log.Printf("importing %d grams", len(grams))
508	sort.Slice(grams, func(i, j int) bool {
509		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
510	})
511	for _, g0 := range grams {
512		g := g0.Media[0]
513		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
514		what := "honk"
515		noise := g.Title
516		convoy := "data:,acoustichonkytonk-" + xfiltrate()
517		date := time.Unix(g.Creation, 0)
518		audience := []string{thewholeworld}
519		honk := Honk{
520			UserID:   user.ID,
521			Username: user.Name,
522			What:     what,
523			Honker:   user.URL,
524			XID:      xid,
525			Date:     date,
526			Format:   "markdown",
527			Audience: audience,
528			Convoy:   convoy,
529			Public:   true,
530			Whofore:  2,
531		}
532		{
533			u := xfiltrate()
534			fname := fmt.Sprintf("%s/%s", source, g.URI)
535			data, err := ioutil.ReadFile(fname)
536			if err != nil {
537				elog.Printf("error reading media: %s", fname)
538				continue
539			}
540			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
541
542			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
543			if err != nil {
544				elog.Printf("error saving media: %s", fname)
545				continue
546			}
547			donk := &Donk{
548				FileID: fileid,
549			}
550			honk.Donks = append(honk.Donks, donk)
551		}
552		honk.Noise = noise
553		err := savehonk(&honk)
554		log.Printf("honk saved %v -> %v", xid, err)
555	}
556}
557
558func export(username, file string) {
559	user, err := butwhatabout(username)
560	if err != nil {
561		elog.Fatal(err)
562	}
563	fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
564	if err != nil {
565		elog.Fatal(err)
566	}
567	zd := zip.NewWriter(fd)
568	donks := make(map[string]bool)
569	{
570		w, err := zd.Create("outbox.json")
571		if err != nil {
572			elog.Fatal(err)
573		}
574		var jonks []junk.Junk
575		rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
576		honks := getsomehonks(rows, err)
577		for _, honk := range honks {
578			for _, donk := range honk.Donks {
579				donk.URL = "media/" + donk.XID
580				donks[donk.XID] = true
581			}
582			noise := honk.Noise
583			j, jo := jonkjonk(user, honk)
584			if honk.Format == "markdown" {
585				jo["source"] = noise
586			}
587			jonks = append(jonks, j)
588		}
589		j := junk.New()
590		j["@context"] = itiswhatitis
591		j["id"] = user.URL + "/outbox"
592		j["attributedTo"] = user.URL
593		j["type"] = "OrderedCollection"
594		j["totalItems"] = len(jonks)
595		j["orderedItems"] = jonks
596		j.Write(w)
597	}
598	{
599		w, err := zd.Create("inbox.json")
600		if err != nil {
601			elog.Fatal(err)
602		}
603		var jonks []junk.Junk
604		rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
605		honks := getsomehonks(rows, err)
606		for _, honk := range honks {
607			for _, donk := range honk.Donks {
608				donk.URL = "media/" + donk.XID
609				donks[donk.XID] = true
610			}
611			j, _ := jonkjonk(user, honk)
612			jonks = append(jonks, j)
613		}
614		j := junk.New()
615		j["@context"] = itiswhatitis
616		j["id"] = user.URL + "/inbox"
617		j["attributedTo"] = user.URL
618		j["type"] = "OrderedCollection"
619		j["totalItems"] = len(jonks)
620		j["orderedItems"] = jonks
621		j.Write(w)
622	}
623	zd.Create("media/")
624	for donk := range donks {
625		var media string
626		var data []byte
627		w, err := zd.Create("media/" + donk)
628		if err != nil {
629			elog.Fatal(err)
630		}
631		row := stmtGetFileData.QueryRow(donk)
632		err = row.Scan(&media, &data)
633		if err != nil {
634			elog.Fatal(err)
635		}
636		w.Write(data)
637	}
638	zd.Close()
639	fd.Close()
640}