all repos — honk @ 1dab6623e5cfb202c33a976d3dd63f6f89f6473a

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/json"
 20	"fmt"
 21	"html"
 22	"io/ioutil"
 23	"os"
 24	"regexp"
 25	"sort"
 26	"strings"
 27	"time"
 28)
 29
 30func importMain(username, flavor, source string) {
 31	switch flavor {
 32	case "mastodon":
 33		importMastodon(username, source)
 34	case "twitter":
 35		importTwitter(username, source)
 36	default:
 37		elog.Fatal("unknown source flavor")
 38	}
 39}
 40
 41type TootObject struct {
 42	Summary      string
 43	Content      string
 44	InReplyTo    string
 45	Conversation string
 46	Published    time.Time
 47	Tag          []struct {
 48		Type string
 49		Name string
 50	}
 51	Attachment []struct {
 52		Type      string
 53		MediaType string
 54		Url       string
 55		Name      string
 56	}
 57}
 58
 59type PlainTootObject TootObject
 60
 61func (obj *TootObject) UnmarshalJSON(b []byte) error {
 62	p := (*PlainTootObject)(obj)
 63	json.Unmarshal(b, p)
 64	return nil
 65}
 66
 67func importMastodon(username, source string) {
 68	user, err := butwhatabout(username)
 69	if err != nil {
 70		elog.Fatal(err)
 71	}
 72	type Toot struct {
 73		Id     string
 74		Type   string
 75		To     []string
 76		Cc     []string
 77		Object TootObject
 78	}
 79	var outbox struct {
 80		OrderedItems []Toot
 81	}
 82	fd, err := os.Open(source + "/outbox.json")
 83	if err != nil {
 84		elog.Fatal(err)
 85	}
 86	dec := json.NewDecoder(fd)
 87	err = dec.Decode(&outbox)
 88	if err != nil {
 89		elog.Fatalf("error parsing json: %s", err)
 90	}
 91	fd.Close()
 92
 93	havetoot := func(xid string) bool {
 94		var id int64
 95		row := stmtFindXonk.QueryRow(user.ID, xid)
 96		err := row.Scan(&id)
 97		if err == nil {
 98			return true
 99		}
100		return false
101	}
102
103	re_tootid := regexp.MustCompile("[^/]+$")
104	for _, item := range outbox.OrderedItems {
105		toot := item
106		if toot.Type != "Create" {
107			continue
108		}
109		if strings.HasSuffix(toot.Id, "/activity") {
110			toot.Id = strings.TrimSuffix(toot.Id, "/activity")
111		}
112		tootid := re_tootid.FindString(toot.Id)
113		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
114		if havetoot(xid) {
115			continue
116		}
117		honk := Honk{
118			UserID:   user.ID,
119			What:     "honk",
120			Honker:   user.URL,
121			XID:      xid,
122			RID:      toot.Object.InReplyTo,
123			Date:     toot.Object.Published,
124			URL:      xid,
125			Audience: append(toot.To, toot.Cc...),
126			Noise:    toot.Object.Content,
127			Convoy:   toot.Object.Conversation,
128			Whofore:  2,
129			Format:   "html",
130			Precis:   toot.Object.Summary,
131		}
132		if honk.RID != "" {
133			honk.What = "tonk"
134		}
135		if !loudandproud(honk.Audience) {
136			honk.Whofore = 3
137		}
138		for _, att := range toot.Object.Attachment {
139			switch att.Type {
140			case "Document":
141				fname := fmt.Sprintf("%s/%s", source, att.Url)
142				data, err := ioutil.ReadFile(fname)
143				if err != nil {
144					elog.Printf("error reading media: %s", fname)
145					continue
146				}
147				u := xfiltrate()
148				name := att.Name
149				desc := name
150				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
151				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
152				if err != nil {
153					elog.Printf("error saving media: %s", fname)
154					continue
155				}
156				donk := &Donk{
157					FileID: fileid,
158				}
159				honk.Donks = append(honk.Donks, donk)
160			}
161		}
162		for _, t := range toot.Object.Tag {
163			switch t.Type {
164			case "Hashtag":
165				honk.Onts = append(honk.Onts, t.Name)
166			}
167		}
168		savehonk(&honk)
169	}
170}
171
172func importTwitter(username, source string) {
173	user, err := butwhatabout(username)
174	if err != nil {
175		elog.Fatal(err)
176	}
177
178	type Tweet struct {
179		ID_str                  string
180		Created_at              string
181		Full_text               string
182		In_reply_to_screen_name string
183		In_reply_to_status_id   string
184		Entities                struct {
185			Hashtags []struct {
186				Text string
187			}
188			Media []struct {
189				Url       string
190				Media_url string
191			}
192			Urls []struct {
193				Url          string
194				Expanded_url string
195			}
196		}
197		date   time.Time
198		convoy string
199	}
200
201	var tweets []*Tweet
202	fd, err := os.Open(source + "/tweet.js")
203	if err != nil {
204		elog.Fatal(err)
205	}
206	// skip past window.YTD.tweet.part0 =
207	fd.Seek(25, 0)
208	dec := json.NewDecoder(fd)
209	err = dec.Decode(&tweets)
210	if err != nil {
211		elog.Fatalf("error parsing json: %s", err)
212	}
213	fd.Close()
214	tweetmap := make(map[string]*Tweet)
215	for _, t := range tweets {
216		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
217		tweetmap[t.ID_str] = t
218	}
219	sort.Slice(tweets, func(i, j int) bool {
220		return tweets[i].date.Before(tweets[j].date)
221	})
222	havetwid := func(xid string) bool {
223		var id int64
224		row := stmtFindXonk.QueryRow(user.ID, xid)
225		err := row.Scan(&id)
226		if err == nil {
227			return true
228		}
229		return false
230	}
231
232	for _, t := range tweets {
233		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
234		if havetwid(xid) {
235			continue
236		}
237		what := "honk"
238		noise := ""
239		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
240			t.convoy = parent.convoy
241			what = "tonk"
242		} else {
243			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
244			if t.In_reply_to_screen_name != "" {
245				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
246					t.In_reply_to_screen_name, t.In_reply_to_status_id)
247				what = "tonk"
248			}
249		}
250		audience := []string{thewholeworld}
251		honk := Honk{
252			UserID:   user.ID,
253			Username: user.Name,
254			What:     what,
255			Honker:   user.URL,
256			XID:      xid,
257			Date:     t.date,
258			Format:   "markdown",
259			Audience: audience,
260			Convoy:   t.convoy,
261			Public:   true,
262			Whofore:  2,
263		}
264		noise += t.Full_text
265		// unbelievable
266		noise = html.UnescapeString(noise)
267		for _, r := range t.Entities.Urls {
268			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
269		}
270		for _, m := range t.Entities.Media {
271			u := m.Media_url
272			idx := strings.LastIndexByte(u, '/')
273			u = u[idx+1:]
274			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
275			data, err := ioutil.ReadFile(fname)
276			if err != nil {
277				elog.Printf("error reading media: %s", fname)
278				continue
279			}
280			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
281
282			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
283			if err != nil {
284				elog.Printf("error saving media: %s", fname)
285				continue
286			}
287			donk := &Donk{
288				FileID: fileid,
289			}
290			honk.Donks = append(honk.Donks, donk)
291			noise = strings.Replace(noise, m.Url, "", -1)
292		}
293		for _, ht := range t.Entities.Hashtags {
294			honk.Onts = append(honk.Onts, "#"+ht.Text)
295		}
296		honk.Noise = noise
297		savehonk(&honk)
298	}
299}