all repos — honk @ 5ca0f4998c8d02c9ac8a95cdf893c09b5d34a1b9

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"archive/zip"
 20	"encoding/csv"
 21	"encoding/json"
 22	"fmt"
 23	"html"
 24	"io/ioutil"
 25	"log"
 26	"os"
 27	"regexp"
 28	"sort"
 29	"strings"
 30	"time"
 31
 32	"humungus.tedunangst.com/r/webs/junk"
 33)
 34
 35func importMain(username, flavor, source string) {
 36	switch flavor {
 37	case "mastodon":
 38		importMastodon(username, source)
 39	case "honk":
 40		importHonk(username, source)
 41	case "twitter":
 42		importTwitter(username, source)
 43	case "instagram":
 44		importInstagram(username, source)
 45	default:
 46		elog.Fatal("unknown source flavor")
 47	}
 48}
 49
 50type ActivityObject struct {
 51	AttributedTo string
 52	Summary      string
 53	Content      string
 54	Source       struct {
 55		MediaType string
 56		Content   string
 57	}
 58	InReplyTo    string
 59	Conversation string
 60	Context      string
 61	Published    time.Time
 62	Tag          []struct {
 63		Type string
 64		Name string
 65	}
 66	Attachment []struct {
 67		Type      string
 68		MediaType string
 69		Url       string
 70		Name      string
 71	}
 72}
 73
 74type PlainActivityObject ActivityObject
 75
 76func (obj *ActivityObject) UnmarshalJSON(b []byte) error {
 77	p := (*PlainActivityObject)(obj)
 78	json.Unmarshal(b, p)
 79	return nil
 80}
 81
 82func importMastodon(username, source string) {
 83	user, err := butwhatabout(username)
 84	if err != nil {
 85		elog.Fatal(err)
 86	}
 87
 88	outbox := source + "/outbox.json"
 89	if _, err := os.Stat(outbox); err == nil {
 90		importActivities(user, outbox, source)
 91	} else {
 92		ilog.Printf("skipping outbox.json!")
 93	}
 94	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 95		importMastotooters(user, source)
 96	} else {
 97		ilog.Printf("skipping following_accounts.csv!")
 98	}
 99}
100
101func importHonk(username, source string) {
102	user, err := butwhatabout(username)
103	if err != nil {
104		elog.Fatal(err)
105	}
106
107	outbox := source + "/outbox.json"
108	if _, err := os.Stat(outbox); err == nil {
109		importActivities(user, outbox, source)
110	} else {
111		ilog.Printf("skipping outbox.json!")
112	}
113}
114
115func importActivities(user *WhatAbout, filename, source string) {
116	type Activity struct {
117		Id     string
118		Type   string
119		To     interface{}
120		Cc     []string
121		Object ActivityObject
122	}
123	var outbox struct {
124		OrderedItems []Activity
125	}
126	ilog.Println("Importing honks...")
127	fd, err := os.Open(filename)
128	if err != nil {
129		elog.Fatal(err)
130	}
131	dec := json.NewDecoder(fd)
132	err = dec.Decode(&outbox)
133	if err != nil {
134		elog.Fatalf("error parsing json: %s", err)
135	}
136	fd.Close()
137
138	havetoot := func(xid string) bool {
139		var id int64
140		row := stmtFindXonk.QueryRow(user.ID, xid)
141		err := row.Scan(&id)
142		if err == nil {
143			return true
144		}
145		return false
146	}
147
148	re_tootid := regexp.MustCompile("[^/]+$")
149	items := outbox.OrderedItems
150	for i, j := 0, len(items)-1; i < j; i, j = i+1, j-1 {
151		items[i], items[j] = items[j], items[i]
152	}
153	for _, item := range items {
154		toot := item
155		if toot.Type != "Create" {
156			continue
157		}
158		if strings.HasSuffix(toot.Id, "/activity") {
159			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
160		}
161		tootid := re_tootid.FindString(toot.Id)
162		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
163		if havetoot(xid) {
164			continue
165		}
166
167		convoy := toot.Object.Context
168		if convoy == "" {
169			convoy = toot.Object.Conversation
170		}
171		var audience []string
172		to, ok := toot.To.(string)
173		if ok {
174			audience = append(audience, to)
175		} else {
176			for _, t := range toot.To.([]interface{}) {
177				audience = append(audience, t.(string))
178			}
179		}
180		content := toot.Object.Content
181		format := "html"
182		if toot.Object.Source.MediaType == "text/markdown" {
183			content = toot.Object.Source.Content
184			format = "markdown"
185		}
186		audience = append(audience, toot.Cc...)
187		honk := Honk{
188			UserID:   user.ID,
189			What:     "honk",
190			Honker:   user.URL,
191			XID:      xid,
192			RID:      toot.Object.InReplyTo,
193			Date:     toot.Object.Published,
194			URL:      xid,
195			Audience: audience,
196			Noise:    content,
197			Convoy:   convoy,
198			Whofore:  2,
199			Format:   format,
200			Precis:   toot.Object.Summary,
201		}
202		if !loudandproud(honk.Audience) {
203			honk.Whofore = 3
204		}
205		for _, att := range toot.Object.Attachment {
206			switch att.Type {
207			case "Document":
208				fname := fmt.Sprintf("%s/%s", source, att.Url)
209				data, err := ioutil.ReadFile(fname)
210				if err != nil {
211					elog.Printf("error reading media for %s: %s", honk.XID, fname)
212					continue
213				}
214				u := xfiltrate()
215				name := att.Name
216				desc := name
217				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
218				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
219				if err != nil {
220					elog.Printf("error saving media: %s", fname)
221					continue
222				}
223				donk := &Donk{
224					FileID: fileid,
225				}
226				honk.Donks = append(honk.Donks, donk)
227			}
228		}
229		for _, t := range toot.Object.Tag {
230			switch t.Type {
231			case "Hashtag":
232				honk.Onts = append(honk.Onts, t.Name)
233			}
234		}
235		savehonk(&honk)
236	}
237}
238
239func importMastotooters(user *WhatAbout, source string) {
240	ilog.Println("Importing honkers...")
241	fd, err := os.Open(source + "/following_accounts.csv")
242	if err != nil {
243		elog.Fatal(err)
244	}
245	r := csv.NewReader(fd)
246	data, err := r.ReadAll()
247	if err != nil {
248		elog.Fatal(err)
249	}
250	fd.Close()
251
252	var meta HonkerMeta
253	mj, _ := jsonify(&meta)
254
255	for i, d := range data {
256		if i == 0 {
257			continue
258		}
259		url := "@" + d[0]
260		name := ""
261		flavor := "peep"
262		combos := ""
263		_, err := savehonker(user, url, name, flavor, combos, mj)
264		if err != nil {
265			elog.Printf("trouble with a honker: %s", err)
266		}
267	}
268}
269
270func importTwitter(username, source string) {
271	user, err := butwhatabout(username)
272	if err != nil {
273		elog.Fatal(err)
274	}
275
276	type Tweet struct {
277		date   time.Time
278		convoy string
279		Tweet  struct {
280			CreatedAt        string   `json:"created_at"`
281			DisplayTextRange []string `json:"display_text_range"`
282			EditInfo         struct {
283				Initial struct {
284					EditTweetIds   []string `json:"editTweetIds"`
285					EditableUntil  string   `json:"editableUntil"`
286					EditsRemaining string   `json:"editsRemaining"`
287					IsEditEligible bool     `json:"isEditEligible"`
288				} `json:"initial"`
289			} `json:"edit_info"`
290			Entities struct {
291				Hashtags []struct {
292					Indices []string `json:"indices"`
293					Text    string   `json:"text"`
294				} `json:"hashtags"`
295				Media []struct {
296					DisplayURL    string   `json:"display_url"`
297					ExpandedURL   string   `json:"expanded_url"`
298					ID            string   `json:"id"`
299					IdStr         string   `json:"id_str"`
300					Indices       []string `json:"indices"`
301					MediaURL      string   `json:"media_url"`
302					MediaUrlHttps string   `json:"media_url_https"`
303					Sizes         struct {
304						Large struct {
305							H      string `json:"h"`
306							Resize string `json:"resize"`
307							W      string `json:"w"`
308						} `json:"large"`
309						Medium struct {
310							H      string `json:"h"`
311							Resize string `json:"resize"`
312							W      string `json:"w"`
313						} `json:"medium"`
314						Small struct {
315							H      string `json:"h"`
316							Resize string `json:"resize"`
317							W      string `json:"w"`
318						} `json:"small"`
319						Thumb struct {
320							H      string `json:"h"`
321							Resize string `json:"resize"`
322							W      string `json:"w"`
323						} `json:"thumb"`
324					} `json:"sizes"`
325					Type string `json:"type"`
326					URL  string `json:"url"`
327				} `json:"media"`
328				Symbols []interface{} `json:"symbols"`
329				Urls    []struct {
330					DisplayURL  string   `json:"display_url"`
331					ExpandedURL string   `json:"expanded_url"`
332					Indices     []string `json:"indices"`
333					URL         string   `json:"url"`
334				} `json:"urls"`
335				UserMentions []interface{} `json:"user_mentions"`
336			} `json:"entities"`
337			ExtendedEntities struct {
338				Media []struct {
339					DisplayURL    string   `json:"display_url"`
340					ExpandedURL   string   `json:"expanded_url"`
341					ID            string   `json:"id"`
342					IdStr         string   `json:"id_str"`
343					Indices       []string `json:"indices"`
344					MediaURL      string   `json:"media_url"`
345					MediaUrlHttps string   `json:"media_url_https"`
346					Sizes         struct {
347						Large struct {
348							H      string `json:"h"`
349							Resize string `json:"resize"`
350							W      string `json:"w"`
351						} `json:"large"`
352						Medium struct {
353							H      string `json:"h"`
354							Resize string `json:"resize"`
355							W      string `json:"w"`
356						} `json:"medium"`
357						Small struct {
358							H      string `json:"h"`
359							Resize string `json:"resize"`
360							W      string `json:"w"`
361						} `json:"small"`
362						Thumb struct {
363							H      string `json:"h"`
364							Resize string `json:"resize"`
365							W      string `json:"w"`
366						} `json:"thumb"`
367					} `json:"sizes"`
368					Type string `json:"type"`
369					URL  string `json:"url"`
370				} `json:"media"`
371			} `json:"extended_entities"`
372			FavoriteCount        string `json:"favorite_count"`
373			Favorited            bool   `json:"favorited"`
374			FullText             string `json:"full_text"`
375			ID                   string `json:"id"`
376			IdStr                string `json:"id_str"`
377			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
378			InReplyToStatusID    string `json:"in_reply_to_status_id"`
379			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
380			InReplyToUserID      string `json:"in_reply_to_user_id"`
381			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
382			Lang                 string `json:"lang"`
383			PossiblySensitive    bool   `json:"possibly_sensitive"`
384			RetweetCount         string `json:"retweet_count"`
385			Retweeted            bool   `json:"retweeted"`
386			Source               string `json:"source"`
387			Truncated            bool   `json:"truncated"`
388		} `json:"tweet"`
389	}
390
391	var tweets []*Tweet
392	fd, err := os.Open(source + "/tweets.js")
393	if err != nil {
394		elog.Fatal(err)
395	}
396	// skip past window.YTD.tweet.part0 =
397	fd.Seek(25, 0)
398	dec := json.NewDecoder(fd)
399	err = dec.Decode(&tweets)
400	if err != nil {
401		elog.Fatalf("error parsing json: %s", err)
402	}
403	fd.Close()
404	tweetmap := make(map[string]*Tweet)
405	for _, t := range tweets {
406		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
407		tweetmap[t.Tweet.IdStr] = t
408	}
409	sort.Slice(tweets, func(i, j int) bool {
410		return tweets[i].date.Before(tweets[j].date)
411	})
412	havetwid := func(xid string) bool {
413		var id int64
414		row := stmtFindXonk.QueryRow(user.ID, xid)
415		err := row.Scan(&id)
416		if err == nil {
417			log.Printf("id = %v", id)
418			return true
419		}
420		return false
421	}
422	log.Printf("importing %v tweets", len(tweets))
423	for _, t := range tweets {
424		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
425		if havetwid(xid) {
426			continue
427		}
428
429		what := "honk"
430		noise := ""
431		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
432			t.convoy = parent.convoy
433		} else {
434			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
435			if t.Tweet.InReplyToScreenName != "" {
436				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
437					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
438			}
439		}
440		audience := []string{thewholeworld}
441		honk := Honk{
442			UserID:   user.ID,
443			Username: user.Name,
444			What:     what,
445			Honker:   user.URL,
446			XID:      xid,
447			Date:     t.date,
448			Format:   "markdown",
449			Audience: audience,
450			Convoy:   t.convoy,
451			Public:   true,
452			Whofore:  2,
453		}
454		noise += t.Tweet.FullText
455		// unbelievable
456		noise = html.UnescapeString(noise)
457		for _, r := range t.Tweet.Entities.Urls {
458			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
459		}
460		for _, m := range t.Tweet.Entities.Media {
461			u := m.MediaURL
462			idx := strings.LastIndexByte(u, '/')
463			u = u[idx+1:]
464			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
465			data, err := ioutil.ReadFile(fname)
466			if err != nil {
467				elog.Printf("error reading media: %s", fname)
468				continue
469			}
470			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
471
472			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
473			if err != nil {
474				elog.Printf("error saving media: %s", fname)
475				continue
476			}
477			donk := &Donk{
478				FileID: fileid,
479			}
480			honk.Donks = append(honk.Donks, donk)
481			noise = strings.Replace(noise, m.URL, "", -1)
482		}
483		for _, ht := range t.Tweet.Entities.Hashtags {
484			honk.Onts = append(honk.Onts, "#"+ht.Text)
485		}
486		honk.Noise = noise
487		err := savehonk(&honk)
488		log.Printf("honk saved %v -> %v", xid, err)
489	}
490}
491
492func importInstagram(username, source string) {
493	user, err := butwhatabout(username)
494	if err != nil {
495		elog.Fatal(err)
496	}
497
498	type Gram struct {
499		Media []struct {
500			URI      string
501			Creation int64 `json:"creation_timestamp"`
502			Title    string
503		}
504	}
505
506	var grams []*Gram
507	fd, err := os.Open(source + "/content/posts_1.json")
508	if err != nil {
509		elog.Fatal(err)
510	}
511	dec := json.NewDecoder(fd)
512	err = dec.Decode(&grams)
513	if err != nil {
514		elog.Fatalf("error parsing json: %s", err)
515	}
516	fd.Close()
517	log.Printf("importing %d grams", len(grams))
518	sort.Slice(grams, func(i, j int) bool {
519		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
520	})
521	for _, g0 := range grams {
522		g := g0.Media[0]
523		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
524		what := "honk"
525		noise := g.Title
526		convoy := "data:,acoustichonkytonk-" + xfiltrate()
527		date := time.Unix(g.Creation, 0)
528		audience := []string{thewholeworld}
529		honk := Honk{
530			UserID:   user.ID,
531			Username: user.Name,
532			What:     what,
533			Honker:   user.URL,
534			XID:      xid,
535			Date:     date,
536			Format:   "markdown",
537			Audience: audience,
538			Convoy:   convoy,
539			Public:   true,
540			Whofore:  2,
541		}
542		{
543			u := xfiltrate()
544			fname := fmt.Sprintf("%s/%s", source, g.URI)
545			data, err := ioutil.ReadFile(fname)
546			if err != nil {
547				elog.Printf("error reading media: %s", fname)
548				continue
549			}
550			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
551
552			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
553			if err != nil {
554				elog.Printf("error saving media: %s", fname)
555				continue
556			}
557			donk := &Donk{
558				FileID: fileid,
559			}
560			honk.Donks = append(honk.Donks, donk)
561		}
562		honk.Noise = noise
563		err := savehonk(&honk)
564		log.Printf("honk saved %v -> %v", xid, err)
565	}
566}
567
568func export(username, file string) {
569	user, err := butwhatabout(username)
570	if err != nil {
571		elog.Fatal(err)
572	}
573	fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
574	if err != nil {
575		elog.Fatal(err)
576	}
577	zd := zip.NewWriter(fd)
578	donks := make(map[string]bool)
579	{
580		w, err := zd.Create("outbox.json")
581		if err != nil {
582			elog.Fatal("error creating outbox.json", err)
583		}
584		var jonks []junk.Junk
585		rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
586		honks := getsomehonks(rows, err)
587		for _, honk := range honks {
588			for _, donk := range honk.Donks {
589				donk.URL = "media/" + donk.XID
590				donks[donk.XID] = true
591			}
592			noise := honk.Noise
593			j, jo := jonkjonk(user, honk)
594			if honk.Format == "markdown" {
595				source := junk.New()
596				source["mediaType"] = "text/markdown"
597				source["content"] = noise
598				jo["source"] = source
599			}
600			jonks = append(jonks, j)
601		}
602		j := junk.New()
603		j["@context"] = itiswhatitis
604		j["id"] = user.URL + "/outbox"
605		j["attributedTo"] = user.URL
606		j["type"] = "OrderedCollection"
607		j["totalItems"] = len(jonks)
608		j["orderedItems"] = jonks
609		j.Write(w)
610	}
611	{
612		w, err := zd.Create("inbox.json")
613		if err != nil {
614			elog.Fatal("error creating inbox.json", err)
615		}
616		var jonks []junk.Junk
617		rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
618		honks := getsomehonks(rows, err)
619		for _, honk := range honks {
620			for _, donk := range honk.Donks {
621				donk.URL = "media/" + donk.XID
622				donks[donk.XID] = true
623			}
624			j, _ := jonkjonk(user, honk)
625			jonks = append(jonks, j)
626		}
627		j := junk.New()
628		j["@context"] = itiswhatitis
629		j["id"] = user.URL + "/inbox"
630		j["attributedTo"] = user.URL
631		j["type"] = "OrderedCollection"
632		j["totalItems"] = len(jonks)
633		j["orderedItems"] = jonks
634		j.Write(w)
635	}
636	zd.Create("media/")
637	for donk := range donks {
638		if donk == "" {
639			continue
640		}
641		var media string
642		var data []byte
643		w, err := zd.Create("media/" + donk)
644		if err != nil {
645			elog.Printf("error creating %s: %s", donk, err)
646			continue
647		}
648		row := stmtGetFileData.QueryRow(donk)
649		err = row.Scan(&media, &data)
650		if err != nil {
651			elog.Printf("error scanning file %s: %s", donk, err)
652			continue
653		}
654		w.Write(data)
655	}
656	zd.Close()
657	fd.Close()
658}