all repos — honk @ 891c8562c955782e663693f1de3d802a0d7ce758

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"archive/zip"
 20	"encoding/csv"
 21	"encoding/json"
 22	"fmt"
 23	"html"
 24	"io/ioutil"
 25	"log"
 26	"os"
 27	"regexp"
 28	"sort"
 29	"strings"
 30	"time"
 31
 32	"humungus.tedunangst.com/r/webs/junk"
 33)
 34
 35func importMain(username, flavor, source string) {
 36	switch flavor {
 37	case "mastodon":
 38		importMastodon(username, source)
 39	case "twitter":
 40		importTwitter(username, source)
 41	case "instagram":
 42		importInstagram(username, source)
 43	default:
 44		elog.Fatal("unknown source flavor")
 45	}
 46}
 47
 48type TootObject struct {
 49	Summary      string
 50	Content      string
 51	InReplyTo    string
 52	Conversation string
 53	Published    time.Time
 54	Tag          []struct {
 55		Type string
 56		Name string
 57	}
 58	Attachment []struct {
 59		Type      string
 60		MediaType string
 61		Url       string
 62		Name      string
 63	}
 64}
 65
 66type PlainTootObject TootObject
 67
 68func (obj *TootObject) UnmarshalJSON(b []byte) error {
 69	p := (*PlainTootObject)(obj)
 70	json.Unmarshal(b, p)
 71	return nil
 72}
 73
 74func importMastodon(username, source string) {
 75	user, err := butwhatabout(username)
 76	if err != nil {
 77		elog.Fatal(err)
 78	}
 79
 80	if _, err := os.Stat(source + "/outbox.json"); err == nil {
 81		importMastotoots(user, source)
 82	} else {
 83		ilog.Printf("skipping outbox.json!")
 84	}
 85	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 86		importMastotooters(user, source)
 87	} else {
 88		ilog.Printf("skipping following_accounts.csv!")
 89	}
 90}
 91
 92func importMastotoots(user *WhatAbout, source string) {
 93	type Toot struct {
 94		Id     string
 95		Type   string
 96		To     []string
 97		Cc     []string
 98		Object TootObject
 99	}
100	var outbox struct {
101		OrderedItems []Toot
102	}
103	ilog.Println("Importing honks...")
104	fd, err := os.Open(source + "/outbox.json")
105	if err != nil {
106		elog.Fatal(err)
107	}
108	dec := json.NewDecoder(fd)
109	err = dec.Decode(&outbox)
110	if err != nil {
111		elog.Fatalf("error parsing json: %s", err)
112	}
113	fd.Close()
114
115	havetoot := func(xid string) bool {
116		var id int64
117		row := stmtFindXonk.QueryRow(user.ID, xid)
118		err := row.Scan(&id)
119		if err == nil {
120			return true
121		}
122		return false
123	}
124
125	re_tootid := regexp.MustCompile("[^/]+$")
126	for _, item := range outbox.OrderedItems {
127		toot := item
128		if toot.Type != "Create" {
129			continue
130		}
131		if strings.HasSuffix(toot.Id, "/activity") {
132			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
133		}
134		tootid := re_tootid.FindString(toot.Id)
135		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
136		if havetoot(xid) {
137			continue
138		}
139		honk := Honk{
140			UserID:   user.ID,
141			What:     "honk",
142			Honker:   user.URL,
143			XID:      xid,
144			RID:      toot.Object.InReplyTo,
145			Date:     toot.Object.Published,
146			URL:      xid,
147			Audience: append(toot.To, toot.Cc...),
148			Noise:    toot.Object.Content,
149			Convoy:   toot.Object.Conversation,
150			Whofore:  2,
151			Format:   "html",
152			Precis:   toot.Object.Summary,
153		}
154		if !loudandproud(honk.Audience) {
155			honk.Whofore = 3
156		}
157		for _, att := range toot.Object.Attachment {
158			switch att.Type {
159			case "Document":
160				fname := fmt.Sprintf("%s/%s", source, att.Url)
161				data, err := ioutil.ReadFile(fname)
162				if err != nil {
163					elog.Printf("error reading media: %s", fname)
164					continue
165				}
166				u := xfiltrate()
167				name := att.Name
168				desc := name
169				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
170				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
171				if err != nil {
172					elog.Printf("error saving media: %s", fname)
173					continue
174				}
175				donk := &Donk{
176					FileID: fileid,
177				}
178				honk.Donks = append(honk.Donks, donk)
179			}
180		}
181		for _, t := range toot.Object.Tag {
182			switch t.Type {
183			case "Hashtag":
184				honk.Onts = append(honk.Onts, t.Name)
185			}
186		}
187		savehonk(&honk)
188	}
189}
190
191func importMastotooters(user *WhatAbout, source string) {
192	ilog.Println("Importing honkers...")
193	fd, err := os.Open(source + "/following_accounts.csv")
194	if err != nil {
195		elog.Fatal(err)
196	}
197	r := csv.NewReader(fd)
198	data, err := r.ReadAll()
199	if err != nil {
200		elog.Fatal(err)
201	}
202	fd.Close()
203
204	var meta HonkerMeta
205	mj, _ := jsonify(&meta)
206
207	for i, d := range data {
208		if i == 0 {
209			continue
210		}
211		url := "@" + d[0]
212		name := ""
213		flavor := "peep"
214		combos := ""
215		_, err := savehonker(user, url, name, flavor, combos, mj)
216		if err != nil {
217			elog.Printf("trouble with a honker: %s", err)
218		}
219	}
220}
221
222func importTwitter(username, source string) {
223	user, err := butwhatabout(username)
224	if err != nil {
225		elog.Fatal(err)
226	}
227
228	type Tweet struct {
229		date   time.Time
230		convoy string
231		Tweet  struct {
232			CreatedAt        string   `json:"created_at"`
233			DisplayTextRange []string `json:"display_text_range"`
234			EditInfo         struct {
235				Initial struct {
236					EditTweetIds   []string `json:"editTweetIds"`
237					EditableUntil  string   `json:"editableUntil"`
238					EditsRemaining string   `json:"editsRemaining"`
239					IsEditEligible bool     `json:"isEditEligible"`
240				} `json:"initial"`
241			} `json:"edit_info"`
242			Entities struct {
243				Hashtags []struct {
244					Indices []string `json:"indices"`
245					Text    string   `json:"text"`
246				} `json:"hashtags"`
247				Media []struct {
248					DisplayURL    string   `json:"display_url"`
249					ExpandedURL   string   `json:"expanded_url"`
250					ID            string   `json:"id"`
251					IdStr         string   `json:"id_str"`
252					Indices       []string `json:"indices"`
253					MediaURL      string   `json:"media_url"`
254					MediaUrlHttps string   `json:"media_url_https"`
255					Sizes         struct {
256						Large struct {
257							H      string `json:"h"`
258							Resize string `json:"resize"`
259							W      string `json:"w"`
260						} `json:"large"`
261						Medium struct {
262							H      string `json:"h"`
263							Resize string `json:"resize"`
264							W      string `json:"w"`
265						} `json:"medium"`
266						Small struct {
267							H      string `json:"h"`
268							Resize string `json:"resize"`
269							W      string `json:"w"`
270						} `json:"small"`
271						Thumb struct {
272							H      string `json:"h"`
273							Resize string `json:"resize"`
274							W      string `json:"w"`
275						} `json:"thumb"`
276					} `json:"sizes"`
277					Type string `json:"type"`
278					URL  string `json:"url"`
279				} `json:"media"`
280				Symbols []interface{} `json:"symbols"`
281				Urls    []struct {
282					DisplayURL  string   `json:"display_url"`
283					ExpandedURL string   `json:"expanded_url"`
284					Indices     []string `json:"indices"`
285					URL         string   `json:"url"`
286				} `json:"urls"`
287				UserMentions []interface{} `json:"user_mentions"`
288			} `json:"entities"`
289			ExtendedEntities struct {
290				Media []struct {
291					DisplayURL    string   `json:"display_url"`
292					ExpandedURL   string   `json:"expanded_url"`
293					ID            string   `json:"id"`
294					IdStr         string   `json:"id_str"`
295					Indices       []string `json:"indices"`
296					MediaURL      string   `json:"media_url"`
297					MediaUrlHttps string   `json:"media_url_https"`
298					Sizes         struct {
299						Large struct {
300							H      string `json:"h"`
301							Resize string `json:"resize"`
302							W      string `json:"w"`
303						} `json:"large"`
304						Medium struct {
305							H      string `json:"h"`
306							Resize string `json:"resize"`
307							W      string `json:"w"`
308						} `json:"medium"`
309						Small struct {
310							H      string `json:"h"`
311							Resize string `json:"resize"`
312							W      string `json:"w"`
313						} `json:"small"`
314						Thumb struct {
315							H      string `json:"h"`
316							Resize string `json:"resize"`
317							W      string `json:"w"`
318						} `json:"thumb"`
319					} `json:"sizes"`
320					Type string `json:"type"`
321					URL  string `json:"url"`
322				} `json:"media"`
323			} `json:"extended_entities"`
324			FavoriteCount        string `json:"favorite_count"`
325			Favorited            bool   `json:"favorited"`
326			FullText             string `json:"full_text"`
327			ID                   string `json:"id"`
328			IdStr                string `json:"id_str"`
329			InReplyToScreenName  string `json:"in_reply_to_screen_name"`
330			InReplyToStatusID    string `json:"in_reply_to_status_id"`
331			InReplyToStatusIdStr string `json:"in_reply_to_status_id_str"`
332			InReplyToUserID      string `json:"in_reply_to_user_id"`
333			InReplyToUserIdStr   string `json:"in_reply_to_user_id_str"`
334			Lang                 string `json:"lang"`
335			PossiblySensitive    bool   `json:"possibly_sensitive"`
336			RetweetCount         string `json:"retweet_count"`
337			Retweeted            bool   `json:"retweeted"`
338			Source               string `json:"source"`
339			Truncated            bool   `json:"truncated"`
340		} `json:"tweet"`
341	}
342
343	var tweets []*Tweet
344	fd, err := os.Open(source + "/tweets.js")
345	if err != nil {
346		elog.Fatal(err)
347	}
348	// skip past window.YTD.tweet.part0 =
349	fd.Seek(25, 0)
350	dec := json.NewDecoder(fd)
351	err = dec.Decode(&tweets)
352	if err != nil {
353		elog.Fatalf("error parsing json: %s", err)
354	}
355	fd.Close()
356	tweetmap := make(map[string]*Tweet)
357	for _, t := range tweets {
358		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Tweet.CreatedAt)
359		tweetmap[t.Tweet.IdStr] = t
360	}
361	sort.Slice(tweets, func(i, j int) bool {
362		return tweets[i].date.Before(tweets[j].date)
363	})
364	havetwid := func(xid string) bool {
365		var id int64
366		row := stmtFindXonk.QueryRow(user.ID, xid)
367		err := row.Scan(&id)
368		if err == nil {
369			log.Printf("id = %v", id)
370			return true
371		}
372		return false
373	}
374	log.Printf("importing %v tweets", len(tweets))
375	for _, t := range tweets {
376		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.Tweet.IdStr)
377		if havetwid(xid) {
378			continue
379		}
380
381		what := "honk"
382		noise := ""
383		if parent := tweetmap[t.Tweet.InReplyToStatusID]; parent != nil {
384			t.convoy = parent.convoy
385		} else {
386			t.convoy = "data:,acoustichonkytonk-" + t.Tweet.IdStr
387			if t.Tweet.InReplyToScreenName != "" {
388				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
389					t.Tweet.InReplyToScreenName, t.Tweet.InReplyToStatusID)
390			}
391		}
392		audience := []string{thewholeworld}
393		honk := Honk{
394			UserID:   user.ID,
395			Username: user.Name,
396			What:     what,
397			Honker:   user.URL,
398			XID:      xid,
399			Date:     t.date,
400			Format:   "markdown",
401			Audience: audience,
402			Convoy:   t.convoy,
403			Public:   true,
404			Whofore:  2,
405		}
406		noise += t.Tweet.FullText
407		// unbelievable
408		noise = html.UnescapeString(noise)
409		for _, r := range t.Tweet.Entities.Urls {
410			noise = strings.Replace(noise, r.URL, r.ExpandedURL, -1)
411		}
412		for _, m := range t.Tweet.Entities.Media {
413			u := m.MediaURL
414			idx := strings.LastIndexByte(u, '/')
415			u = u[idx+1:]
416			fname := fmt.Sprintf("%s/tweets_media/%s-%s", source, t.Tweet.IdStr, u)
417			data, err := ioutil.ReadFile(fname)
418			if err != nil {
419				elog.Printf("error reading media: %s", fname)
420				continue
421			}
422			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
423
424			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
425			if err != nil {
426				elog.Printf("error saving media: %s", fname)
427				continue
428			}
429			donk := &Donk{
430				FileID: fileid,
431			}
432			honk.Donks = append(honk.Donks, donk)
433			noise = strings.Replace(noise, m.URL, "", -1)
434		}
435		for _, ht := range t.Tweet.Entities.Hashtags {
436			honk.Onts = append(honk.Onts, "#"+ht.Text)
437		}
438		honk.Noise = noise
439		err := savehonk(&honk)
440		log.Printf("honk saved %v -> %v", xid, err)
441	}
442}
443
444func importInstagram(username, source string) {
445	user, err := butwhatabout(username)
446	if err != nil {
447		elog.Fatal(err)
448	}
449
450	type Gram struct {
451		Media []struct {
452			URI      string
453			Creation int64 `json:"creation_timestamp"`
454			Title    string
455		}
456	}
457
458	var grams []*Gram
459	fd, err := os.Open(source + "/content/posts_1.json")
460	if err != nil {
461		elog.Fatal(err)
462	}
463	dec := json.NewDecoder(fd)
464	err = dec.Decode(&grams)
465	if err != nil {
466		elog.Fatalf("error parsing json: %s", err)
467	}
468	fd.Close()
469	log.Printf("importing %d grams", len(grams))
470	sort.Slice(grams, func(i, j int) bool {
471		return grams[i].Media[0].Creation < grams[j].Media[0].Creation
472	})
473	for _, g0 := range grams {
474		g := g0.Media[0]
475		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, xfiltrate())
476		what := "honk"
477		noise := g.Title
478		convoy := "data:,acoustichonkytonk-" + xfiltrate()
479		date := time.Unix(g.Creation, 0)
480		audience := []string{thewholeworld}
481		honk := Honk{
482			UserID:   user.ID,
483			Username: user.Name,
484			What:     what,
485			Honker:   user.URL,
486			XID:      xid,
487			Date:     date,
488			Format:   "markdown",
489			Audience: audience,
490			Convoy:   convoy,
491			Public:   true,
492			Whofore:  2,
493		}
494		{
495			u := xfiltrate()
496			fname := fmt.Sprintf("%s/%s", source, g.URI)
497			data, err := ioutil.ReadFile(fname)
498			if err != nil {
499				elog.Printf("error reading media: %s", fname)
500				continue
501			}
502			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
503
504			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
505			if err != nil {
506				elog.Printf("error saving media: %s", fname)
507				continue
508			}
509			donk := &Donk{
510				FileID: fileid,
511			}
512			honk.Donks = append(honk.Donks, donk)
513		}
514		honk.Noise = noise
515		err := savehonk(&honk)
516		log.Printf("honk saved %v -> %v", xid, err)
517	}
518}
519
520func export(username, file string) {
521	user, err := butwhatabout(username)
522	if err != nil {
523		elog.Fatal(err)
524	}
525	fd, err := os.OpenFile(file, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
526	if err != nil {
527		elog.Fatal(err)
528	}
529	zd := zip.NewWriter(fd)
530	donks := make(map[string]bool)
531	{
532		w, err := zd.Create("outbox.json")
533		if err != nil {
534			elog.Fatal(err)
535		}
536		var jonks []junk.Junk
537		rows, err := stmtUserHonks.Query(0, 3, user.Name, "0", 1234567)
538		honks := getsomehonks(rows, err)
539		for _, honk := range honks {
540			noise := honk.Noise
541			j, jo := jonkjonk(user, honk)
542			if honk.Format == "markdown" {
543				jo["source"] = noise
544			}
545			for _, donk := range honk.Donks {
546				donks[donk.XID] = true
547			}
548			jonks = append(jonks, j)
549		}
550		j := junk.New()
551		j["@context"] = itiswhatitis
552		j["id"] = user.URL + "/outbox"
553		j["attributedTo"] = user.URL
554		j["type"] = "OrderedCollection"
555		j["totalItems"] = len(jonks)
556		j["orderedItems"] = jonks
557		j.Write(w)
558	}
559	{
560		w, err := zd.Create("inbox.json")
561		if err != nil {
562			elog.Fatal(err)
563		}
564		var jonks []junk.Junk
565		rows, err := stmtHonksForMe.Query(0, user.ID, "0", user.ID, 1234567)
566		honks := getsomehonks(rows, err)
567		for _, honk := range honks {
568			j, _ := jonkjonk(user, honk)
569			for _, donk := range honk.Donks {
570				donks[donk.XID] = true
571			}
572			jonks = append(jonks, j)
573		}
574		j := junk.New()
575		j["@context"] = itiswhatitis
576		j["id"] = user.URL + "/inbox"
577		j["attributedTo"] = user.URL
578		j["type"] = "OrderedCollection"
579		j["totalItems"] = len(jonks)
580		j["orderedItems"] = jonks
581		j.Write(w)
582	}
583	zd.Create("media/")
584	for donk := range donks {
585		var media string
586		var data []byte
587		w, err := zd.Create("media/" + donk)
588		if err != nil {
589			elog.Fatal(err)
590		}
591		row := stmtGetFileData.QueryRow(donk)
592		err = row.Scan(&media, &data)
593		if err != nil {
594			elog.Fatal(err)
595		}
596		w.Write(data)
597	}
598	zd.Close()
599	fd.Close()
600}