libzypp  17.37.5
RepoMirrorList.cc
Go to the documentation of this file.
1 /*---------------------------------------------------------------------\
2 | ____ _ __ __ ___ |
3 | |__ / \ / / . \ . \ |
4 | / / \ V /| _/ _/ |
5 | / /__ | | | | | | |
6 | /_____||_| |_| |_| |
7 | |
8 \---------------------------------------------------------------------*/
13 #include <iostream>
14 #include <fstream>
15 #include <utility>
16 #include <vector>
17 #include <time.h>
19 #include <zypp-curl/parser/MetaLinkParser>
20 #include <zypp/MediaSetAccess.h>
21 #include <zypp/base/LogTools.h>
22 #include <zypp/ZConfig.h>
23 #include <zypp/PathInfo.h>
25 
26 #include <zypp-core/fs/TmpPath.h>
32 
34 #include <zypp/media/MediaNetworkCommonHandler.h> // for the authentication workflow
35 
36 #include <zypp-core/parser/json.h>
37 
38 
40 namespace zypp
41 {
42  namespace repo
44  {
45 
47  namespace
48  {
56  struct RepoMirrorListTempProvider
57  {
58  RepoMirrorListTempProvider()
59  {}
60 
61  RepoMirrorListTempProvider( Pathname localfile_r )
62  : _localfile(std::move( localfile_r ))
63  {}
64 
65  RepoMirrorListTempProvider( const Url & url_r )
66  {
67  if ( url_r.schemeIsDownloading()
69  && url_r.getQueryStringMap().count("mirrorlist") > 0 ) {
70 
71  // Auth will probably never be triggered, but add it for completeness
72  const auto &authCb = [&]( const zypp::Url &, media::TransferSettings &settings, const std::string & availAuthTypes, bool firstTry, bool &canContinue ) {
73  media::CredentialManager cm(media::CredManagerOptions(ZConfig::instance().repoManagerRoot()));
74  if ( media::MediaNetworkCommonHandler::authenticate( url_r, cm, settings, availAuthTypes, firstTry ) ) {
75  canContinue = true;
76  return;
77  }
78  canContinue = false;
79  };
80 
81  internal::MediaNetworkRequestExecutor executor;
82  executor.sigAuthRequired ().connect(authCb);
83 
84  _tmpfile = filesystem::TmpFile();
85  _localfile = _tmpfile->path();
86 
87  // prepare Url and Settings
88  auto url = url_r;
89  auto tSettings = media::TransferSettings();
90  ::internal::prepareSettingsAndUrl( url, tSettings );
91 
92  auto req = std::make_shared<zyppng::NetworkRequest>( url_r, _localfile );
93  req->transferSettings () = tSettings;
94  executor.executeRequest ( req, nullptr );
95 
96  // apply umask
97  if ( ::chmod( _localfile.c_str(), filesystem::applyUmaskTo( 0644 ) ) )
98  {
99  ERR << "Failed to chmod file " << _localfile << endl;
100  }
101 
102  return;
103  }
104 
105  // this will handle traditional media including URL resolver plugins
106  Url abs_url( url_r );
107  abs_url.setPathName( "/" );
108  _access.reset( new MediaSetAccess( std::vector<zypp::media::MediaUrl>{abs_url} ) );
109  _localfile = _access->provideFile( url_r.getPathName() );
110 
111  }
112 
113  const Pathname & localfile() const
114  { return _localfile; }
115  private:
117  Pathname _localfile;
118  std::optional<filesystem::TmpFile> _tmpfile;
119  };
120 
121  enum class RepoMirrorListFormat {
122  Error,
123  Empty,
124  MirrorListTxt,
125  MirrorListJson,
126  MetaLink
127  };
128 
129  static RepoMirrorListFormat detectRepoMirrorListFormat( const Pathname &localfile ) {
130  // a file starting with < is most likely a metalink file,
131  // a file starting with [ is most likely a json file,
132  // else we go for txt
133  MIL << "Detecting RepoMirrorlist Format based on file content" << std::endl;
134 
135  if ( localfile.empty () )
136  return RepoMirrorListFormat::Empty;
137 
138  InputStream tmpfstream (localfile);
139  auto &str = tmpfstream.stream();
140  auto c = str.get ();
141 
142  // skip preceding whitespaces
143  while ( !str.eof () && !str.bad() && ( c == ' ' || c == '\t' || c == '\n' || c == '\r') )
144  c = str.get ();
145 
146  if ( str.eof() ) {
147  ERR << "Failed to read RepoMirrorList file, stream hit EOF early." << std::endl;
148  return RepoMirrorListFormat::Empty;
149  }
150 
151  if ( str.bad() ) {
152  ERR << "Failed to read RepoMirrorList file, stream became bad." << std::endl;
154  }
155 
156  switch ( c ) {
157  case '<': {
158  MIL << "Detected Metalink, file starts with <" << std::endl;
159  return RepoMirrorListFormat::MetaLink;
160  }
161  case '[': {
162  MIL << "Detected JSON, file starts with [" << std::endl;
163  return RepoMirrorListFormat::MirrorListJson;
164  }
165  default: {
166  MIL << "Detected TXT, file starts with " << c << std::endl;
167  return RepoMirrorListFormat::MirrorListTxt;
168  }
169  }
170  }
171 
172  inline std::vector<Url> RepoMirrorListParseXML( const Pathname &tmpfile )
173  {
174  try {
175  media::MetaLinkParser metalink;
176  metalink.parse(tmpfile);
177  return metalink.getUrls();
178  } catch (...) {
179  ZYPP_CAUGHT( std::current_exception() );
180  zypp::parser::ParseException ex("Invalid repo metalink format.");
181  ex.remember ( std::current_exception () );
182  ZYPP_THROW(ex);
183  }
184  }
185 
186  inline std::vector<Url> RepoMirrorListParseJSON( const Pathname &tmpfile )
187  {
188  InputStream tmpfstream (tmpfile);
189 
190  try {
191  using namespace zyppng::operators;
192  using zyppng::operators::operator|;
193 
194  json::Parser parser;
195  auto res = parser.parse ( tmpfstream )
196  | and_then([&]( json::Value data ) {
197 
198  std::vector<Url> urls;
199  if ( data.isNull () ) {
200  MIL << "Empty mirrorlist received, no mirrors available." << std::endl;
201  return zyppng::make_expected_success(urls);
202  }
203 
204  if ( data.type() != json::Value::ArrayType ) {
205  MIL << "Unexpected JSON format, top level element must be an array." << std::endl;
206  return zyppng::expected<std::vector<Url>>::error( ZYPP_EXCPT_PTR( zypp::Exception("Unexpected JSON format, top level element must be an array.") ));
207  }
208  const auto &topArray = data.asArray ();
209  for ( const auto &val : topArray ) {
210  if ( val.type () != json::Value::ObjectType ) {
211  MIL << "Unexpected JSON element, array must contain only objects. Ignoring current element" << std::endl;
212  continue;
213  }
214 
215  const auto &obj = val.asObject();
216  for ( const auto &key : obj ) {
217  if ( key.first == "url" ) {
218  const auto &elemValue = key.second;
219  if ( elemValue.type() != json::Value::StringType ) {
220  MIL << "Unexpected JSON element, element \"url\" must contain a string. Ignoring current element" << std::endl;
221  break;
222  }
223  try {
224  MIL << "Trying to parse URL: " << std::string(elemValue.asString()) << std::endl;
225  urls.push_back ( Url( elemValue.asString() ) );
226  } catch ( const url::UrlException &e ) {
227  ZYPP_CAUGHT(e);
228  MIL << "Invalid URL in mirrors file: "<< elemValue.asString() << ", ignoring" << std::endl;
229  }
230  }
231  }
232  }
233  return zyppng::make_expected_success(urls);
234  });
235 
236  if ( !res ) {
237  using zypp::operator<<;
238  MIL << "Error while parsing mirrorlist: (" << res.error() << "), no mirrors available" << std::endl;
239  ZYPP_RETHROW( res.error () );
240  }
241 
242  return *res;
243 
244  } catch (...) {
245  ZYPP_CAUGHT( std::current_exception() );
246  MIL << "Caught exception while parsing json" << std::endl;
247 
248  zypp::parser::ParseException ex("Invalid repo mirror list format, valid JSON was expected.");
249  ex.remember ( std::current_exception () );
250  ZYPP_THROW(ex);
251  }
252  return {};
253  }
254 
255  inline std::vector<Url> RepoMirrorListParseTXT( const Pathname &tmpfile )
256  {
257  InputStream tmpfstream (tmpfile);
258  std::vector<Url> my_urls;
259  std::string tmpurl;
260  while (getline(tmpfstream.stream(), tmpurl))
261  {
262  if ( tmpurl[0] == '#' )
263  continue;
264  try {
265  Url mirrUrl( tmpurl );
266  if ( !mirrUrl.schemeIsDownloading( ) ) {
267  MIL << "Ignoring non downloading URL " << tmpurl << std::endl;
268  }
269  my_urls.push_back(Url(tmpurl));
270  }
271  catch (...)
272  {
273  ZYPP_CAUGHT( std::current_exception() );
274 
275  // fail on invalid URLs
276  ERR << "Invalid URL in mirrorlist file." << std::endl;
277 
278  zypp::parser::ParseException ex("Invalid repo mirror list format, all Urls must be valid in a mirrorlist txt file.");
279  ex.remember ( std::current_exception () );
280  ZYPP_THROW(ex);
281  }
282  }
283  return my_urls;
284  }
285 
287  inline std::vector<Url> RepoMirrorListParse( const Url & url_r, const Pathname & listfile_r )
288  {
289  MIL << "Parsing mirrorlist file: " << listfile_r << " originally received from " << url_r << endl;
290 
291  std::vector<Url> mirrorurls;
292  switch( detectRepoMirrorListFormat (listfile_r) ) {
294  // should not happen, except when the instr goes bad
295  ZYPP_THROW( zypp::parser::ParseException( str::Format("Unable to detect metalink file format for: %1%") % listfile_r ));
296  case RepoMirrorListFormat::Empty:
297  mirrorurls = {};
298  break;
299  case RepoMirrorListFormat::MetaLink:
300  mirrorurls = RepoMirrorListParseXML( listfile_r );
301  break;
302  case RepoMirrorListFormat::MirrorListJson:
303  mirrorurls = RepoMirrorListParseJSON( listfile_r );
304  break;
305  case RepoMirrorListFormat::MirrorListTxt:
306  mirrorurls = RepoMirrorListParseTXT( listfile_r );
307  break;
308  }
309 
310  std::vector<Url> ret;
311  for ( auto & murl : mirrorurls )
312  {
313  if ( murl.getScheme() != "rsync" )
314  {
315  std::string pName = murl.getPathName();
316  size_t delpos = pName.find("repodata/repomd.xml");
317  if( delpos != std::string::npos )
318  {
319  murl.setPathName( pName.erase(delpos) );
320  }
321  ret.push_back( murl );
322  }
323  }
324  return ret;
325  }
326 
327  } // namespace
329 
330  RepoMirrorList::RepoMirrorList( const Url & url_r, const Pathname & metadatapath_r )
331  {
332  PathInfo metaPathInfo( metadatapath_r);
333  std::exception_ptr errors; // we collect errors here
334  try {
335  if ( url_r.getScheme() == "file" )
336  {
337  // never cache for local mirrorlist
338  _urls = RepoMirrorListParse( url_r, url_r.getPathName() );
339  }
340  else if ( !metaPathInfo.isDir() )
341  {
342  // no cachedir or no access
343  RepoMirrorListTempProvider provider( url_r ); // RAII: lifetime of any downloaded files
344  _urls = RepoMirrorListParse( url_r, provider.localfile() );
345  }
346  else
347  {
348  // have cachedir
349  const Pathname cachefile = metadatapath_r / cacheFileName();
350  const Pathname cookiefile = metadatapath_r / cookieFileName();
351  zypp::filesystem::PathInfo cacheinfo( cachefile );
352 
353  bool needRefresh = ( !cacheinfo.isFile()
354  // force a update on a old cache ONLY if the user can write the cache, otherwise we use an already existing cachefile
355  // it makes no sense to continously download the mirrors file if we can't store it
356  || ( cacheinfo.mtime() < time(NULL) - (long) ZConfig::instance().repo_refresh_delay() * 60 && metaPathInfo.userMayRWX () ) )
357  || ( makeCookie( url_r ) != readCookieFile( cookiefile ) );
358 
359  // up to date: try to parse and use the URLs if sucessful
360  // otherwise fetch the URL again
361  if ( !needRefresh ) {
362  MIL << "Mirror cachefile cookie valid and cache is not too old, skipping download (" << cachefile << ")" << std::endl;
363  try {
364  _urls = RepoMirrorListParse( url_r, cachefile );
365  if( _urls.empty() ) {
366  DBG << "Removing Cachefile as it contains no URLs" << endl;
367  zypp::filesystem::unlink( cachefile );
368  }
369  return;
370 
371  } catch ( const zypp::Exception & e ) {
372  ZYPP_CAUGHT(e);
373  auto ex = e;
374  if ( errors )
375  ex.remember(errors);
376  errors = std::make_exception_ptr(ex);
377  MIL << "Invalid mirrorlist cachefile, deleting it and trying to fetch a new one" << std::endl;
378  }
379  }
380 
381  // remove the old cache and its cookie, it's either broken, empty or outdated
382  if( cacheinfo.isFile() ) {
383  filesystem::unlink(cachefile);
384  }
385 
386  if ( zypp::filesystem::PathInfo(cookiefile).isFile() ) {
387  filesystem::unlink(cookiefile);
388  }
389 
390  MIL << "Getting MirrorList from URL: " << url_r << endl;
391  RepoMirrorListTempProvider provider( url_r ); // RAII: lifetime of downloaded file
392  _urls = RepoMirrorListParse( url_r, provider.localfile() );
393 
394  if ( metaPathInfo.userMayRWX() && !_urls.empty() ) {
395  // Create directory, if not existing
396  DBG << "Copy MirrorList file to " << cachefile << endl;
397  zypp::filesystem::assert_dir( metadatapath_r );
398  zypp::filesystem::hardlinkCopy( provider.localfile(), cachefile );
399  saveToCookieFile ( cookiefile, url_r );
400  // NOTE: Now we copied the mirrorlist into the metadata directory, but
401  // in case of refresh going on, new metadata are prepared in a sibling
402  // temp dir. Upon success RefreshContext<>::saveToRawCache() exchanges
403  // temp and metadata dirs. There we move an existing mirrorlist file into
404  // the new metadata dir.
405  }
406  }
407  } catch ( const zypp::Exception &e ) {
408  // Make a more user readable exception
409  ZYPP_CAUGHT(e);
410  parser::ParseException ex( str::Format("Failed to parse/receive mirror information for URL: %1%") % url_r );
411  ex.remember(e);
412  if ( errors ) ex.remember(errors);
413  ZYPP_THROW(ex);
414  }
415  }
416 
418  {
419  static const std::vector<std::string> hosts{
420  "download.opensuse.org",
421  "cdn.opensuse.org"
422  };
423  return ( std::find( hosts.begin(), hosts.end(), str::toLower( url.getHost() )) != hosts.end() );
424  }
425 
426  std::string RepoMirrorList::readCookieFile(const Pathname &path_r)
427  {
428  std::ifstream file( path_r.c_str() );
429  if ( not file ) {
430  WAR << "No cookie file " << path_r << endl;
431  return {};
432  }
433 
434  return str::getline( file );
435  }
436 
440  std::string RepoMirrorList::makeCookie( const Url &url_r )
441  {
443  }
444 
445  void RepoMirrorList::saveToCookieFile(const Pathname &path_r, const Url &url_r )
446  {
447  std::ofstream file(path_r.c_str());
448  if (!file) {
449  ERR << str::Str() << "Can't open " << path_r.asString() << std::endl;
450  return;
451  }
452  MIL << "Saving mirrorlist cookie file " << path_r << std::endl;
453  file << makeCookie(url_r);
454  file.close();
455  }
456 
458  } // namespace repo
461 } // namespace zypp
std::string getScheme() const
Returns the scheme name of the URL.
Definition: Url.cc:551
std::string toLower(const std::string &s)
Return lowercase version of s.
Definition: String.cc:180
#define MIL
Definition: Logger.h:100
static std::string readCookieFile(const Pathname &path_r)
static std::string makeCookie(const zypp::Url &url_r)
Generates the cookie value, currently this is only derived from the Url.
std::string getline(std::istream &str, const Trim trim_r)
Return stream content up to (but not returning) the next newline.
Definition: String.cc:481
int assert_dir(const Pathname &path, unsigned mode)
Like &#39;mkdir -p&#39;.
Definition: PathInfo.cc:324
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition: Exception.h:459
static ZConfig & instance()
Singleton ctor.
Definition: ZConfig.cc:940
int chmod(const Pathname &path, mode_t mode)
Like &#39;chmod&#39;.
Definition: PathInfo.cc:1097
time_t mtime() const
Definition: PathInfo.h:384
const char * c_str() const
String representation.
Definition: Pathname.h:112
String related utilities and Regular expression matching.
std::string getline(std::istream &str)
Read one line from stream.
Definition: IOStream.cc:33
Definition: Arch.h:363
static expected< std::decay_t< Type >, Err > make_expected_success(Type &&t)
Definition: expected.h:397
Helper to create and pass std::istream.
Definition: inputstream.h:56
Pathname _localfile
std::vector< Url > _urls
Convenient building of std::string with boost::format.
Definition: String.h:253
bool authenticate(const Url &url, TransferSettings &settings, const std::string &availAuthTypes, bool firstTry)
#define ZYPP_EXCPT_PTR(EXCPT)
Drops a logline and returns Exception as a std::exception_ptr.
Definition: Exception.h:463
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition: PathInfo.cc:888
#define ERR
Definition: Logger.h:102
void prepareSettingsAndUrl(zypp::Url &url_r, zypp::media::TransferSettings &s)
Definition: curlhelper.cc:180
void remember(const Exception &old_r)
Store an other Exception as history.
Definition: Exception.cc:154
bool empty() const
Test for an empty path.
Definition: Pathname.h:116
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition: Exception.h:479
static CheckSum sha256FromString(const std::string &input_r)
Definition: CheckSum.h:107
Convenient building of std::string via std::ostringstream Basically a std::ostringstream autoconverti...
Definition: String.h:212
const std::string & asString() const
String representation.
Definition: Pathname.h:93
unsigned repo_refresh_delay() const
Amount of time in minutes that must pass before another refresh.
Definition: ZConfig.cc:1191
static constexpr const char * cacheFileName()
#define WAR
Definition: Logger.h:101
std::string asCompleteString() const
Returns a complete string representation of the Url object.
Definition: Url.cc:523
RepoMirrorList(const Url &url_r, const Pathname &metadatapath_r)
zypp::media::TransferSettings TransferSettings
static void saveToCookieFile(const Pathname &path_r, const zypp::Url &url_r)
int unlink(const Pathname &path)
Like &#39;unlink&#39;.
Definition: PathInfo.cc:705
std::optional< filesystem::TmpFile > _tmpfile
zypp::Url Url
Definition: url.h:15
static bool urlSupportsMirrorLink(const zypp::Url &url)
#define ZYPP_CAUGHT(EXCPT)
Drops a logline telling the Exception was caught (in order to handle it).
Definition: Exception.h:475
Base class for Exception.
Definition: Exception.h:152
std::string checksum(const Pathname &file, const std::string &algorithm)
Compute a files checksum.
Definition: PathInfo.cc:1056
std::string getPathName(EEncoding eflag=zypp::url::E_DECODED) const
Returns the path name from the URL.
Definition: Url.cc:622
std::string getHost(EEncoding eflag=zypp::url::E_DECODED) const
Returns the hostname or IP from the URL authority.
Definition: Url.cc:606
shared_ptr< MediaSetAccess > _access
static constexpr const char * cookieFileName()
Wrapper class for ::stat/::lstat.
Definition: PathInfo.h:225
ResultType and_then(const expected< T, E > &exp, Function &&f)
Definition: expected.h:423
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition: PathInfo.h:806
Easy-to use interface to the ZYPP dependency resolver.
Definition: Application.cc:19
bool userMayRWX() const
Definition: PathInfo.h:361
Url manipulation class.
Definition: Url.h:92
#define DBG
Definition: Logger.h:99