all repos — honk @ b4c436d48b5319ab112317ac4c4a31bc12037671

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"archive/zip"
 20	"encoding/csv"
 21	"encoding/json"
 22	"fmt"
 23	"html"
 24	"io/ioutil"
 25	"log"
 26	"os"
 27	"regexp"
 28	"sort"
 29	"strings"
 30	"time"
 31
 32	"humungus.tedunangst.com/r/webs/junk"
 33)
 34
 35func importMain(username, flavor, source string) {
 36	switch flavor {
 37	case "mastodon":
 38		importMastodon(username, source)
 39	case "honk":
 40		importHonk(username, source)
 41	case "twitter":
 42		importTwitter(username, source)
 43	case "instagram":
 44		importInstagram(username, source)
 45	default:
 46		elog.Fatal("unknown source flavor")
 47	}
 48}
 49
 50type ActivityObject struct {
 51	AttributedTo string
 52	Summary      string
 53	Content      string
 54	Source       struct {
 55		MediaType string
 56		Content   string
 57	}
 58	InReplyTo    string
 59	Conversation string
 60	Context      string
 61	Published    time.Time
 62	Tag          []struct {
 63		Type string
 64		Name string
 65	}
 66	Attachment []struct {
 67		Type      string
 68		MediaType string
 69		Url       string
 70		Name      string
 71	}
 72}
 73
 74type PlainActivityObject ActivityObject
 75
 76func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
 77	p := (*PlainActivityObject)(obj)
 78	json.Unmarshal(b, p)
 79	return nil
 80}
 81
 82func importMastodon(username, source string) {
 83	user, err := butwhatabout(username)
 84	if err != nil {
 85		elog.Fatal(err)
 86	}
 87
 88	outbox := source + "/outbox.json"
 89	if _, err := os.Stat(outbox); err == nil {
 90		importActivities(user, outbox, source)
 91	} else {
 92		ilog.Printf("skipping outbox.json!")
 93	}
 94	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 95		importMastotooters(user, source)
 96	} else {
 97		ilog.Printf("skipping following_accounts.csv!")
 98	}
 99}
100
101func importHonk(username, source string) {
102	user, err := butwhatabout(username)
103	if err != nil {
104		elog.Fatal(err)
105	}
106
107	outbox := source + "/outbox.json"
108	if _, err := os.Stat(outbox); err == nil {
109		importActivities(user, outbox, source)
110	} else {
111		ilog.Printf("skipping outbox.json!")
112	}
113}
114
115func importActivities(user *WhatAbout, filename, source string) {
116	type Activity struct {
117		Id     string
118		Type   string
119		To     interface{}
120		Cc     []string
121		Object ActivityObject
122	}
123	var outbox struct {
124		OrderedItems []Activity
125	}
126	ilog.Println("Importing honks...")
127	fd, err := os.Open(filename)
128	if err != nil {
129		elog.Fatal(err)
130	}
131	dec := json.NewDecoder(fd)
132	err = dec.Decode(&outbox)
133	if err != nil {
134		elog.Fatalf("error parsing json: %s", err)
135	}
136	fd.Close()
137
138	havetoot := func(xid string) bool {
139		var id int64
140		row := stmtFindXonk.QueryRow(user.ID, xid)
141		err := row.Scan(&id)
142		if err == nil {
143			return true
144		}
145		return false
146	}
147
148	re_tootid := regexp.MustCompile("[^/]+$")
149	items := outbox.OrderedItems
150	for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
151		items[i], items[j] = items[j], items[i]
152	}
153	for _, item := range items {
154		toot := item
155		if toot.Type != "Create" {
156			continue
157		}
158		if strings.HasSuffix(toot.Id, "/activity") {
159			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
160		}
161		tootid := re_tootid.FindString(toot.Id)
162		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
163		if havetoot(xid) {
164			continue
165		}
166
167		convoy := toot.Object.Context
168		if convoy == "" {
169			convoy = toot.Object.Conversation
170		}
171		var audience []string
172		to, ok := toot.To.(string)
173		if ok {
174			audience = append(audience, to)
175		} else {
176			for _, t := range toot.To.([]interface{}) {
177				audience = append(audience, t.(string))
178			}
179		}
180		content := toot.Object.Content
181		format := "html"
182		if toot.Object.Source.MediaType == "text/markdown" {
183			content = toot.Object.Source.Content
184			format = "markdown"
185		}
186		audience = append(audience, toot.Cc...)
187		honk := Honk{
188			UserID:   user.ID,
189			What:     "honk",
190			Honker:   user.URL,
191			XID:      xid,
192			RID:      toot.Object.InReplyTo,
193			Date:     toot.Object.Published,
194			URL:      xid,
195			Audience: audience,
196			Noise:    content,
197			Convoy:   convoy,
198			Whofore:  2,
199			Format:   format,
200			Precis:   toot.Object.Summary,
201		}
202		if !loudandproud(honk.Audience) {
203			honk.Whofore = 3
204		}
205		for _, att := range toot.Object.Attachment {
206			var meta DonkMeta
207			switch att.Type {
208			case "Document":
209				fname := fmt.Sprintf("%s/%s", source, att.Url)
210				data, err := ioutil.ReadFile(fname)
211				if err != nil {
212					elog.Printf("error reading media for %s: %s", honk.XID, fname)
213					continue
214				}
215				u := xfiltrate()
216				name := att.Name
217				desc := name
218				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
219				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data, &meta)
220				if err != nil {
221					elog.Printf("error saving media: %s", fname)
222					continue
223				}
224				donk := &Donk{
225					FileID: fileid,
226				}
227				honk.Donks = append(honk.Donks, donk)
228			}
229		}
230		for _, t := range toot.Object.Tag {
231			switch t.Type {
232			case "Hashtag":
233				honk.Onts = append(honk.Onts, t.Name)
234			}
235		}
236		savehonk(&honk)
237	}
238}
239
240func importMastotooters(user *WhatAbout, source string) {
241	ilog.Println("Importing honkers...")
242	fd, err := os.Open(source + "/following_accounts.csv")
243	if err != nil {
244		elog.Fatal(err)
245	}
246	r := csv.NewReader(fd)
247	data, err := r.ReadAll()
248	if err != nil {
249		elog.Fatal(err)
250	}
251	fd.Close()
252
253	var meta HonkerMeta
254	mj, _ := jsonify(&meta)
255
256	for i, d := range data {
257		if i == 0 {
258			continue
259		}
260		url := "@" + d[0]
261		name := ""
262		flavor := "peep"
263		combos := ""
264		_, _, err := savehonker(user, url, name, flavor, combos, mj)
265		if err != nil {
266			elog.Printf("trouble with a honker: %s", err)
267		}
268	}
269}
270
271func importTwitter(username, source string) {
272	user, err := butwhatabout(username)
273	if err != nil {
274		elog.Fatal(err)
275	}
276
277	type Tweet struct {
278		date   time.Time
279		convoy string
280		Tweet  struct {
281			CreatedAt        string   `json:"created_at"`
282			DisplayTextRange []string `json:"display_text_range"`
283			EditInfo         struct {
284				Initial struct {
285					EditTweetIds   []string `json:"editTweetIds"`
286					EditableUntil  string   `json:"editableUntil"`
287					EditsRemaining string   `json:"editsRemaining"`
288					IsEditEligible bool     `json:"isEditEligible"`
289				} `json:"initial"`
290			} `json:"edit_info"`
291			Entities struct {
292				Hashtags []struct {
293					Indices []string `json:"indices"`
294					Text    string   `json:"text"`
295				} `json:"hashtags"`
296				Media []struct {
297					DisplayURL    string   `json:"display_url"`
298					ExpandedURL   string   `json:"expanded_url"`
299					ID            string   `json:"id"`
300					IdStr         string   `json:"id_str"`
301					Indices       []string `json:"indices"`
302					MediaURL      string   `json:"media_url"`
303					MediaUrlHttps string   `json:"media_url_https"`
304					Sizes         struct {
305						Large struct {
306							H      string `json:"h"`
307							Resize string `json:"resize"`
308							W      string `json:"w"`
309						} `json:"large"`
310						Medium struct {
311							H      string `json:"h"`
312							Resize string `json:"resize"`
313							W      string `json:"w"`
314						} `json:"medium"`
315						Small struct {
316							H      string `json:"h"`
317							Resize string `json:"resize"`
318							W      string `json:"w"`
319						} `json:"small"`
320						Thumb struct {
321							H      string `json:"h"`
322							Resize string `json:"resize"`
323							W      string `json:"w"`
324						} `json:"thumb"`
325					} `json:"sizes"`
326					Type string `json:"type"`
327					URL  string `json:"url"`
328				} `json:"media"`
329				Symbols []interface{} `json:"symbols"`
330				Urls    []struct {
331					DisplayURL  string   `json:"display_url"`
332					ExpandedURL string   `json:"expanded_url"`
333					Indices     []string `json:"indices"`
334					URL         string   `json:"url"`
335				} `json:"urls"`
336				UserMentions []interface{} `json:"user_mentions"`
337			} `json:"entities"`
338			ExtendedEntities struct {
339				Media []struct {
340					DisplayURL    string   `json:"display_url"`
341					ExpandedURL   string   `json:"expanded_url"`
342					ID            string   `json:"id"`
343					IdStr         string   `json:"id_str"`
344					Indices       []string `json:"indices"`
345					MediaURL      string   `json:"media_url"`
346					MediaUrlHttps string   `json:"media_url_https"`
347					Sizes         struct {
348						Large struct {
349							H      string `json:"h"`
350							Resize string `json:"resize"`
351							W      string `json:"w"`
352						} `json:"large"`
353						Medium struct {
354							H      string `json:"h"`
355							Resize string `json:"resize"`
356							W      string `json:"w"`
357						} `json:"medium"`
358						Small struct {
359							H      string `json:"h"`
360							Resize string `json:"resize"`
361							W      string `json:"w"`
362						} `json:"small"`
363						Thumb struct {
364							H      string `json:"h"`
365							Resize string `json:"resize"`
366							W      string `json:"w"`
367						} `json:"thumb"`
368					} `json:"sizes"`
369					Type string `json:"type"`
370					URL  string `json:"url"`
371				} `json:"media"`
372			} `json:"extended_entities"`
373			FavoriteCount        string `json:"favorite_count"`
374			Favorited            bool   `json:"favorited"`
375			FullText             string `json:"full_text"`
376			ID                   string `json:"id"`
377			IdStr                string `json:"id_str"`
378			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
379			InReplyToStatusID    string `json:"in_reply_to_status_id"`
380			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
381			InReplyToUserID      string `json:"in_reply_to_user_id"`
382			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
383			Lang                 string `json:"lang"`
384			PossiblySensitive    bool   `json:"possibly_sensitive"`
385			RetweetCount         string `json:"retweet_count"`
386			Retweeted            bool   `json:"retweeted"`
387			Source               string `json:"source"`
388			Truncated            bool   `json:"truncated"`
389		} `json:"tweet"`
390	}
391
392	var tweets []*Tweet
393	fd, err := os.Open(source + "/tweet.js")
394	if err != nil {
395		elog.Fatal(err)
396	}
397	// skip past window.YTD.tweet.part0 =
398	fd.Seek(25, 0)
399	dec := json.NewDecoder(fd)
400	err = dec.Decode(&tweets)
401	if err != nil {
402		elog.Fatalf("error parsing json: %s", err)
403	}
404	fd.Close()
405	tweetmap := make(map[string]*Tweet)
406	for _, t := range tweets {
407		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
408		tweetmap[t.Tweet.IdStr] = t
409	}
410	sort.Slice(tweets, func(i, j int) bool {
411		return tweets[i].date.Before(tweets[j].date)
412	})
413	havetwid := func(xid string) bool {
414		var id int64
415		row := stmtFindXonk.QueryRow(user.ID, xid)
416		err := row.Scan(&id)
417		if err == nil {
418			log.Printf("id = %v", id)
419			return true
420		}
421		return false
422	}
423	log.Printf("importing %v tweets", len(tweets))
424	for _, t := range tweets {
425		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
426		if havetwid(xid) {
427			continue
428		}
429
430		what := "honk"
431		noise := ""
432		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
433			t.convoy = parent.convoy
434		} else {
435			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
436			if t.Tweet.InReplyToScreenName != "" {
437				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
438					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
439			}
440		}
441		audience := []string{thewholeworld}
442		honk := Honk{
443			UserID:   user.ID,
444			Username: user.Name,
445			What:     what,
446			Honker:   user.URL,
447			XID:      xid,
448			Date:     t.date,
449			Format:   "markdown",
450			Audience: audience,
451			Convoy:   t.convoy,
452			Public:   true,
453			Whofore:  2,
454		}
455		noise += t.Tweet.FullText
456		// unbelievable
457		noise = html.UnescapeString(noise)
458		for _, r := range t.Tweet.Entities.Urls {
459			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
460		}
461		for _, m := range t.Tweet.Entities.Media {
462			var meta DonkMeta
463			u := m.MediaURL
464			idx := strings.LastIndexByte(u, '/')
465			u = u[idx+1:]
466			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.Tweet.IdStr, u)
467			data, err := ioutil.ReadFile(fname)
468			if err != nil {
469				elog.Printf("error reading media: %s", fname)
470				continue
471			}
472			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
473
474			fileid, err := savefile(u, u, newurl, "image/jpg", true, data, &meta)
475			if err != nil {
476				elog.Printf("error saving media: %s", fname)
477				continue
478			}
479			donk := &Donk{
480				FileID: fileid,
481			}
482			honk.Donks = append(honk.Donks, donk)
483			noise = strings.Replace(noise, m.URL, "", -1)
484		}
485		for _, ht := range t.Tweet.Entities.Hashtags {
486			honk.Onts = append(honk.Onts, "#"+ht.Text)
487		}
488		honk.Noise = noise
489		err := savehonk(&honk)
490		log.Printf("honk saved %v -> %v", xid, err)
491	}
492}
493
494func importInstagram(username, source string) {
495	user, err := butwhatabout(username)
496	if err != nil {
497		elog.Fatal(err)
498	}
499
500	type Gram struct {
501		Media []struct {
502			URI      string
503			Creation int64 `json:"creation_timestamp"`
504			Title    string
505		}
506	}
507
508	var grams []*Gram
509	fd, err := os.Open(source + "/content/posts_1.json")
510	if err != nil {
511		elog.Fatal(err)
512	}
513	dec := json.NewDecoder(fd)
514	err = dec.Decode(&grams)
515	if err != nil {
516		elog.Fatalf("error parsing json: %s", err)
517	}
518	fd.Close()
519	log.Printf("importing %d grams", len(grams))
520	sort.Slice(grams, func(i, j int) bool {
521		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
522	})
523	for _, g0 := range grams {
524		g := g0.Media[0]
525		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
526		what := "honk"
527		noise := g.Title
528		convoy := "data:,acoustichonkytonk-" + xfiltrate()
529		date := time.Unix(g.Creation, 0)
530		audience := []string{thewholeworld}
531		honk := Honk{
532			UserID:   user.ID,
533			Username: user.Name,
534			What:     what,
535			Honker:   user.URL,
536			XID:      xid,
537			Date:     date,
538			Format:   "markdown",
539			Audience: audience,
540			Convoy:   convoy,
541			Public:   true,
542			Whofore:  2,
543		}
544		{
545			var meta DonkMeta
546			u := xfiltrate()
547			fname := fmt.Sprintf("%s/%s", source, g.URI)
548			data, err := ioutil.ReadFile(fname)
549			if err != nil {
550				elog.Printf("error reading media: %s", fname)
551				continue
552			}
553			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
554
555			fileid, err := savefile(u, u, newurl, "image/jpg", true, data, &meta)
556			if err != nil {
557				elog.Printf("error saving media: %s", fname)
558				continue
559			}
560			donk := &Donk{
561				FileID: fileid,
562			}
563			honk.Donks = append(honk.Donks, donk)
564		}
565		honk.Noise = noise
566		err := savehonk(&honk)
567		log.Printf("honk saved %v -> %v", xid, err)
568	}
569}
570
571func export(username, file string) {
572	user, err := butwhatabout(username)
573	if err != nil {
574		elog.Fatal(err)
575	}
576	fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
577	if err != nil {
578		elog.Fatal(err)
579	}
580	zd := zip.NewWriter(fd)
581	donks := make(map[string]bool)
582	{
583		w, err := zd.Create("outbox.json")
584		if err != nil {
585			elog.Fatal("error creating outbox.json", err)
586		}
587		var jonks []junk.Junk
588		rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
589		honks := getsomehonks(rows, err)
590		for _, honk := range honks {
591			for _, donk := range honk.Donks {
592				donk.URL = "media/" + donk.XID
593				donks[donk.XID] = true
594			}
595			noise := honk.Noise
596			j, jo := jonkjonk(user, honk)
597			if honk.Format == "markdown" {
598				source := junk.New()
599				source["mediaType"] = "text/markdown"
600				source["content"] = noise
601				jo["source"] = source
602			}
603			jonks = append(jonks, j)
604		}
605		j := junk.New()
606		j["@context"] = itiswhatitis
607		j["id"] = user.URL + "/outbox"
608		j["attributedTo"] = user.URL
609		j["type"] = "OrderedCollection"
610		j["totalItems"] = len(jonks)
611		j["orderedItems"] = jonks
612		j.Write(w)
613	}
614	{
615		w, err := zd.Create("inbox.json")
616		if err != nil {
617			elog.Fatal("error creating inbox.json", err)
618		}
619		var jonks []junk.Junk
620		rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
621		honks := getsomehonks(rows, err)
622		for _, honk := range honks {
623			for _, donk := range honk.Donks {
624				donk.URL = "media/" + donk.XID
625				donks[donk.XID] = true
626			}
627			j, _ := jonkjonk(user, honk)
628			jonks = append(jonks, j)
629		}
630		j := junk.New()
631		j["@context"] = itiswhatitis
632		j["id"] = user.URL + "/inbox"
633		j["attributedTo"] = user.URL
634		j["type"] = "OrderedCollection"
635		j["totalItems"] = len(jonks)
636		j["orderedItems"] = jonks
637		j.Write(w)
638	}
639	zd.Create("media/")
640	for donk := range donks {
641		if donk == "" {
642			continue
643		}
644		var media string
645		var data []byte
646		w, err := zd.Create("media/" + donk)
647		if err != nil {
648			elog.Printf("error creating %s: %s", donk, err)
649			continue
650		}
651		row := stmtGetFileData.QueryRow(donk)
652		err = row.Scan(&media, &data)
653		if err != nil {
654			elog.Printf("error scanning file %s: %s", donk, err)
655			continue
656		}
657		w.Write(data)
658	}
659	zd.Close()
660	fd.Close()
661}