all repos — honk @ 41deae42c872cab5b3758e610a0564a5137e8644

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/json"
 20	"fmt"
 21	"html"
 22	"io/ioutil"
 23	"os"
 24	"regexp"
 25	"sort"
 26	"strings"
 27	"time"
 28)
 29
 30func importMain(username, flavor, source string) {
 31	switch flavor {
 32	case "mastodon":
 33		importMastodon(username, source)
 34	case "twitter":
 35		importTwitter(username, source)
 36	default:
 37		elog.Fatal("unknown source flavor")
 38	}
 39}
 40
 41type TootObject struct {
 42	Summary      string
 43	Content      string
 44	InReplyTo    string
 45	Conversation string
 46	Published    time.Time
 47	Tag          []struct {
 48		Type string
 49		Name string
 50	}
 51	Attachment []struct {
 52		Type      string
 53		MediaType string
 54		Url       string
 55		Name      string
 56	}
 57}
 58
 59type PlainTootObject TootObject
 60
 61func (obj *TootObject) UnmarshalJSON(b []byte) error {
 62	p := (*PlainTootObject)(obj)
 63	json.Unmarshal(b, p)
 64	return nil
 65}
 66
 67func importMastodon(username, source string) {
 68	user, err := butwhatabout(username)
 69	if err != nil {
 70		elog.Fatal(err)
 71	}
 72	type Toot struct {
 73		Id     string
 74		Type   string
 75		To     []string
 76		Cc     []string
 77		Object TootObject
 78	}
 79	var outbox struct {
 80		OrderedItems []Toot
 81	}
 82	fd, err := os.Open(source + "/outbox.json")
 83	if err != nil {
 84		elog.Fatal(err)
 85	}
 86	dec := json.NewDecoder(fd)
 87	err = dec.Decode(&outbox)
 88	if err != nil {
 89		elog.Fatalf("error parsing json: %s", err)
 90	}
 91	fd.Close()
 92
 93	havetoot := func(xid string) bool {
 94		var id int64
 95		row := stmtFindXonk.QueryRow(user.ID, xid)
 96		err := row.Scan(&id)
 97		if err == nil {
 98			return true
 99		}
100		return false
101	}
102
103	re_tootid := regexp.MustCompile("[^/]+$")
104	for _, item := range outbox.OrderedItems {
105		toot := item
106		if toot.Type != "Create" {
107			continue
108		}
109		tootid := re_tootid.FindString(toot.Id)
110		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
111		if havetoot(xid) {
112			continue
113		}
114		honk := Honk{
115			UserID:   user.ID,
116			What:     "honk",
117			Honker:   user.URL,
118			XID:      xid,
119			RID:      toot.Object.InReplyTo,
120			Date:     toot.Object.Published,
121			URL:      xid,
122			Audience: append(toot.To, toot.Cc...),
123			Noise:    toot.Object.Content,
124			Convoy:   toot.Object.Conversation,
125			Whofore:  2,
126			Format:   "html",
127			Precis:   toot.Object.Summary,
128		}
129		if honk.RID != "" {
130			honk.What = "tonk"
131		}
132		if !loudandproud(honk.Audience) {
133			honk.Whofore = 3
134		}
135		for _, att := range toot.Object.Attachment {
136			switch att.Type {
137			case "Document":
138				fname := fmt.Sprintf("%s/%s", source, att.Url)
139				data, err := ioutil.ReadFile(fname)
140				if err != nil {
141					elog.Printf("error reading media: %s", fname)
142					continue
143				}
144				u := xfiltrate()
145				name := att.Name
146				desc := name
147				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
148				fileid, err := savefile(name, desc, newurl, att.MediaType, true, data)
149				if err != nil {
150					elog.Printf("error saving media: %s", fname)
151					continue
152				}
153				donk := &Donk{
154					FileID: fileid,
155				}
156				honk.Donks = append(honk.Donks, donk)
157			}
158		}
159		for _, t := range toot.Object.Tag {
160			switch t.Type {
161			case "Hashtag":
162				honk.Onts = append(honk.Onts, t.Name)
163			}
164		}
165		savehonk(&honk)
166	}
167}
168
169func importTwitter(username, source string) {
170	user, err := butwhatabout(username)
171	if err != nil {
172		elog.Fatal(err)
173	}
174
175	type Tweet struct {
176		ID_str                  string
177		Created_at              string
178		Full_text               string
179		In_reply_to_screen_name string
180		In_reply_to_status_id   string
181		Entities                struct {
182			Hashtags []struct {
183				Text string
184			}
185			Media []struct {
186				Url       string
187				Media_url string
188			}
189			Urls []struct {
190				Url          string
191				Expanded_url string
192			}
193		}
194		date   time.Time
195		convoy string
196	}
197
198	var tweets []*Tweet
199	fd, err := os.Open(source + "/tweet.js")
200	if err != nil {
201		elog.Fatal(err)
202	}
203	// skip past window.YTD.tweet.part0 =
204	fd.Seek(25, 0)
205	dec := json.NewDecoder(fd)
206	err = dec.Decode(&tweets)
207	if err != nil {
208		elog.Fatalf("error parsing json: %s", err)
209	}
210	fd.Close()
211	tweetmap := make(map[string]*Tweet)
212	for _, t := range tweets {
213		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
214		tweetmap[t.ID_str] = t
215	}
216	sort.Slice(tweets, func(i, j int) bool {
217		return tweets[i].date.Before(tweets[j].date)
218	})
219	havetwid := func(xid string) bool {
220		var id int64
221		row := stmtFindXonk.QueryRow(user.ID, xid)
222		err := row.Scan(&id)
223		if err == nil {
224			return true
225		}
226		return false
227	}
228
229	for _, t := range tweets {
230		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
231		if havetwid(xid) {
232			continue
233		}
234		what := "honk"
235		noise := ""
236		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
237			t.convoy = parent.convoy
238			what = "tonk"
239		} else {
240			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
241			if t.In_reply_to_screen_name != "" {
242				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
243					t.In_reply_to_screen_name, t.In_reply_to_status_id)
244				what = "tonk"
245			}
246		}
247		audience := []string{thewholeworld}
248		honk := Honk{
249			UserID:   user.ID,
250			Username: user.Name,
251			What:     what,
252			Honker:   user.URL,
253			XID:      xid,
254			Date:     t.date,
255			Format:   "markdown",
256			Audience: audience,
257			Convoy:   t.convoy,
258			Public:   true,
259			Whofore:  2,
260		}
261		noise += t.Full_text
262		// unbelievable
263		noise = html.UnescapeString(noise)
264		for _, r := range t.Entities.Urls {
265			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
266		}
267		for _, m := range t.Entities.Media {
268			u := m.Media_url
269			idx := strings.LastIndexByte(u, '/')
270			u = u[idx+1:]
271			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
272			data, err := ioutil.ReadFile(fname)
273			if err != nil {
274				elog.Printf("error reading media: %s", fname)
275				continue
276			}
277			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
278
279			fileid, err := savefile(u, u, newurl, "image/jpg", true, data)
280			if err != nil {
281				elog.Printf("error saving media: %s", fname)
282				continue
283			}
284			donk := &Donk{
285				FileID: fileid,
286			}
287			honk.Donks = append(honk.Donks, donk)
288			noise = strings.Replace(noise, m.Url, "", -1)
289		}
290		for _, ht := range t.Entities.Hashtags {
291			honk.Onts = append(honk.Onts, "#"+ht.Text)
292		}
293		honk.Noise = noise
294		savehonk(&honk)
295	}
296}