all repos — honk @ a6e299aa8342c31214f2d1ab3b71a22854ba1c53

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/csv"
 20	"encoding/json"
 21	"fmt"
 22	"html"
 23	"io/ioutil"
 24	"os"
 25	"regexp"
 26	"sort"
 27	"strings"
 28	"time"
 29)
 30
 31func importMain(username, flavor, source string) {
 32	switch flavor {
 33	case "mastodon":
 34		importMastodon(username, source)
 35	case "twitter":
 36		importTwitter(username, source)
 37	default:
 38		elog.Fatal("unknown source flavor")
 39	}
 40}
 41
 42type TootObject struct {
 43	Summary      string
 44	Content      string
 45	InReplyTo    string
 46	Conversation string
 47	Published    time.Time
 48	Tag          []struct {
 49		Type string
 50		Name string
 51	}
 52	Attachment []struct {
 53		Type      string
 54		MediaType string
 55		Url       string
 56		Name      string
 57	}
 58}
 59
 60type PlainTootObject TootObject
 61
 62func (obj *TootObject) UnmarshalJSON(b []byte) error {
 63	p := (*PlainTootObject)(obj)
 64	json.Unmarshal(b, p)
 65	return nil
 66}
 67
 68func importMastodon(username, source string) {
 69	user, err := butwhatabout(username)
 70	if err != nil {
 71		elog.Fatal(err)
 72	}
 73
 74	if _, err := os.Stat(source + "/outbox.json"); err == nil {
 75		importMastotoots(user, source)
 76	} else {
 77		ilog.Printf("skipping outbox.json!")
 78	}
 79	if _, err := os.Stat(source + "/following_accounts.csv"); err == nil {
 80		importMastotooters(user, source)
 81	} else {
 82		ilog.Printf("skipping following_accounts.csv!")
 83	}
 84}
 85
 86func importMastotoots(user *WhatAbout, source string) {
 87	type Toot struct {
 88		Id     string
 89		Type   string
 90		To     []string
 91		Cc     []string
 92		Object TootObject
 93	}
 94	var outbox struct {
 95		OrderedItems []Toot
 96	}
 97	ilog.Println("Importing honks...")
 98	fd, err := os.Open(source + "/outbox.json")
 99	if err != nil {
100		elog.Fatal(err)
101	}
102	dec := json.NewDecoder(fd)
103	err = dec.Decode(&outbox)
104	if err != nil {
105		elog.Fatalf("error parsing json: %s", err)
106	}
107	fd.Close()
108
109	havetoot := func(xid string) bool {
110		var id int64
111		row := stmtFindXonk.QueryRow(user.ID, xid)
112		err := row.Scan(&id)
113		if err == nil {
114			return true
115		}
116		return false
117	}
118
119	re_tootid := regexp.MustCompile("[^/]+$")
120	for _, item := range outbox.OrderedItems {
121		toot := item
122		if toot.Type != "Create" {
123			continue
124		}
125		if strings.HasSuffix(toot.Id, "/activity") {
126			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
127		}
128		tootid := re_tootid.FindString(toot.Id)
129		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
130		if havetoot(xid) {
131			continue
132		}
133		honk := Honk{
134			UserID:   user.ID,
135			What:     "honk",
136			Honker:   user.URL,
137			XID:      xid,
138			RID:      toot.Object.InReplyTo,
139			Date:     toot.Object.Published,
140			URL:      xid,
141			Audience: append(toot.To, toot.Cc...),
142			Noise:    toot.Object.Content,
143			Convoy:   toot.Object.Conversation,
144			Whofore:  2,
145			Format:   "html",
146			Precis:   toot.Object.Summary,
147		}
148		if honk.RID != "" {
149			honk.What = "tonk"
150		}
151		if !loudandproud(honk.Audience) {
152			honk.Whofore = 3
153		}
154		for _, att := range toot.Object.Attachment {
155			switch att.Type {
156			case "Document":
157				fname := fmt.Sprintf("%s/%s", source, att.Url)
158				data, err := ioutil.ReadFile(fname)
159				if err != nil {
160					elog.Printf("error reading media: %s", fname)
161					continue
162				}
163				u := xfiltrate()
164				name := att.Name
165				desc := name
166				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
167				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
168				if err != nil {
169					elog.Printf("error saving media: %s", fname)
170					continue
171				}
172				donk := &Donk{
173					FileID: fileid,
174				}
175				honk.Donks = append(honk.Donks, donk)
176			}
177		}
178		for _, t := range toot.Object.Tag {
179			switch t.Type {
180			case "Hashtag":
181				honk.Onts = append(honk.Onts, t.Name)
182			}
183		}
184		savehonk(&honk)
185	}
186}
187
188func importMastotooters(user *WhatAbout, source string) {
189	ilog.Println("Importing honkers...")
190	fd, err := os.Open(source + "/following_accounts.csv")
191	if err != nil {
192		elog.Fatal(err)
193	}
194	r := csv.NewReader(fd)
195	data, err := r.ReadAll()
196	if err != nil {
197		elog.Fatal(err)
198	}
199	fd.Close()
200
201	var meta HonkerMeta
202	mj, _ := jsonify(&meta)
203
204	for i, d := range data {
205		if i == 0 {
206			continue
207		}
208		url := "@" + d[0]
209		name := ""
210		flavor := "peep"
211		combos := ""
212		err := savehonker(user, url, name, flavor, combos, mj)
213		if err != nil {
214			elog.Printf("trouble with a honker: %s", err)
215		}
216	}
217}
218
219func importTwitter(username, source string) {
220	user, err := butwhatabout(username)
221	if err != nil {
222		elog.Fatal(err)
223	}
224
225	type Tweet struct {
226		ID_str                  string
227		Created_at              string
228		Full_text               string
229		In_reply_to_screen_name string
230		In_reply_to_status_id   string
231		Entities                struct {
232			Hashtags []struct {
233				Text string
234			}
235			Media []struct {
236				Url       string
237				Media_url string
238			}
239			Urls []struct {
240				Url          string
241				Expanded_url string
242			}
243		}
244		date   time.Time
245		convoy string
246	}
247
248	var tweets []*Tweet
249	fd, err := os.Open(source + "/tweet.js")
250	if err != nil {
251		elog.Fatal(err)
252	}
253	// skip past window.YTD.tweet.part0 =
254	fd.Seek(25, 0)
255	dec := json.NewDecoder(fd)
256	err = dec.Decode(&tweets)
257	if err != nil {
258		elog.Fatalf("error parsing json: %s", err)
259	}
260	fd.Close()
261	tweetmap := make(map[string]*Tweet)
262	for _, t := range tweets {
263		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
264		tweetmap[t.ID_str] = t
265	}
266	sort.Slice(tweets, func(i, j int) bool {
267		return tweets[i].date.Before(tweets[j].date)
268	})
269	havetwid := func(xid string) bool {
270		var id int64
271		row := stmtFindXonk.QueryRow(user.ID, xid)
272		err := row.Scan(&id)
273		if err == nil {
274			return true
275		}
276		return false
277	}
278
279	for _, t := range tweets {
280		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
281		if havetwid(xid) {
282			continue
283		}
284		what := "honk"
285		noise := ""
286		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
287			t.convoy = parent.convoy
288			what = "tonk"
289		} else {
290			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
291			if t.In_reply_to_screen_name != "" {
292				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
293					t.In_reply_to_screen_name, t.In_reply_to_status_id)
294				what = "tonk"
295			}
296		}
297		audience := []string{thewholeworld}
298		honk := Honk{
299			UserID:   user.ID,
300			Username: user.Name,
301			What:     what,
302			Honker:   user.URL,
303			XID:      xid,
304			Date:     t.date,
305			Format:   "markdown",
306			Audience: audience,
307			Convoy:   t.convoy,
308			Public:   true,
309			Whofore:  2,
310		}
311		noise += t.Full_text
312		// unbelievable
313		noise = html.UnescapeString(noise)
314		for _, r := range t.Entities.Urls {
315			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
316		}
317		for _, m := range t.Entities.Media {
318			u := m.Media_url
319			idx := strings.LastIndexByte(u, '/')
320			u = u[idx+1:]
321			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
322			data, err := ioutil.ReadFile(fname)
323			if err != nil {
324				elog.Printf("error reading media: %s", fname)
325				continue
326			}
327			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
328
329			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
330			if err != nil {
331				elog.Printf("error saving media: %s", fname)
332				continue
333			}
334			donk := &Donk{
335				FileID: fileid,
336			}
337			honk.Donks = append(honk.Donks, donk)
338			noise = strings.Replace(noise, m.Url, "", -1)
339		}
340		for _, ht := range t.Entities.Hashtags {
341			honk.Onts = append(honk.Onts, "#"+ht.Text)
342		}
343		honk.Noise = noise
344		savehonk(&honk)
345	}
346}